Update Linux to v5.4.2
Change-Id: Idf6911045d9d382da2cfe01b1edff026404ac8fd
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 78eabc4..fcc4235 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0-only
#
# Timer subsystem related configuration options
#
@@ -12,6 +13,10 @@
config ARCH_CLOCKSOURCE_DATA
bool
+# Architecture has extra clocksource init called from registration
+config ARCH_CLOCKSOURCE_INIT
+ bool
+
# Clocksources require validation of the clocksource against the last
# cycle update - x86/TSC misfeature
config CLOCKSOURCE_VALIDATE_LAST_CYCLE
@@ -113,6 +118,35 @@
endchoice
+config CONTEXT_TRACKING
+ bool
+
+config CONTEXT_TRACKING_FORCE
+ bool "Force context tracking"
+ depends on CONTEXT_TRACKING
+ default y if !NO_HZ_FULL
+ help
+ The major pre-requirement for full dynticks to work is to
+ support the context tracking subsystem. But there are also
+ other dependencies to provide in order to make the full
+ dynticks working.
+
+ This option stands for testing when an arch implements the
+ context tracking backend but doesn't yet fullfill all the
+ requirements to make the full dynticks feature working.
+ Without the full dynticks, there is no way to test the support
+ for context tracking and the subsystems that rely on it: RCU
+ userspace extended quiescent state and tickless cputime
+ accounting. This option copes with the absence of the full
+ dynticks subsystem by forcing the context tracking on all
+ CPUs in the system.
+
+ Say Y only if you're working on the development of an
+ architecture backend for the context tracking.
+
+ Say N otherwise, this option brings an overhead that you
+ don't want in production.
+
config NO_HZ
bool "Old Idle dynticks config"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f1e46f3..1867044 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -16,5 +16,6 @@
endif
obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o
+obj-$(CONFIG_HAVE_GENERIC_VDSO) += vsyscall.o
obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o
obj-$(CONFIG_TEST_UDELAY) += test_udelay.o
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index fa5de5e..451f9d0 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Alarmtimer interface
*
@@ -10,10 +11,6 @@
* Copyright (C) 2010 IBM Corperation
*
* Author: John Stultz <john.stultz@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/time.h>
#include <linux/hrtimer.h>
@@ -100,7 +97,7 @@
if (!device_may_wakeup(rtc->dev.parent))
return -1;
- __ws = wakeup_source_register("alarmtimer");
+ __ws = wakeup_source_register(dev, "alarmtimer");
spin_lock_irqsave(&rtcdev_lock, flags);
if (!rtcdev) {
@@ -236,7 +233,6 @@
/**
* alarmtimer_suspend - Suspend time callback
* @dev: unused
- * @state: unused
*
* When we are going into suspend, we look through the bases
* to see which is the soonest timer to expire. We then
@@ -436,7 +432,7 @@
int ret = alarm_try_to_cancel(alarm);
if (ret >= 0)
return ret;
- cpu_relax();
+ hrtimer_cancel_wait_running(&alarm->timer);
}
}
EXPORT_SYMBOL_GPL(alarm_cancel);
@@ -597,7 +593,7 @@
{
struct alarm *alarm = &timr->it.alarm.alarmtimer;
- return ktime_sub(now, alarm->node.expires);
+ return ktime_sub(alarm->node.expires, now);
}
/**
@@ -610,6 +606,19 @@
}
/**
+ * alarm_timer_wait_running - Posix timer callback to wait for a timer
+ * @timr: Pointer to the posixtimer data struct
+ *
+ * Called from the core code when timer cancel detected that the callback
+ * is running. @timr is unlocked and rcu read lock is held to prevent it
+ * from being freed.
+ */
+static void alarm_timer_wait_running(struct k_itimer *timr)
+{
+ hrtimer_cancel_wait_running(&timr->it.alarm.alarmtimer.timer);
+}
+
+/**
* alarm_timer_arm - Posix timer callback to arm a timer
* @timr: Pointer to the posixtimer data struct
* @expires: The new expiry time
@@ -676,7 +685,7 @@
enum alarmtimer_type type;
if (!alarmtimer_get_rtcdev())
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (!capable(CAP_WAKE_ALARM))
return -EPERM;
@@ -794,7 +803,7 @@
int ret = 0;
if (!alarmtimer_get_rtcdev())
- return -ENOTSUPP;
+ return -EOPNOTSUPP;
if (flags & ~TIMER_ABSTIME)
return -EINVAL;
@@ -838,6 +847,7 @@
.timer_forward = alarm_timer_forward,
.timer_remaining = alarm_timer_remaining,
.timer_try_to_cancel = alarm_timer_try_to_cancel,
+ .timer_wait_running = alarm_timer_wait_running,
.nsleep = alarm_timer_nsleep,
};
#endif /* CONFIG_POSIX_TIMERS */
diff --git a/kernel/time/clockevents.c b/kernel/time/clockevents.c
index 8c0e409..f549022 100644
--- a/kernel/time/clockevents.c
+++ b/kernel/time/clockevents.c
@@ -1,14 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time/clockevents.c
- *
* This file contains functions which manage clock event devices.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
- *
- * This code is licenced under the GPL version 2. For details see
- * kernel-base/COPYING.
*/
#include <linux/clockchips.h>
@@ -39,10 +35,8 @@
u64 clc = (u64) latch << evt->shift;
u64 rnd;
- if (unlikely(!evt->mult)) {
+ if (WARN_ON(!evt->mult))
evt->mult = 1;
- WARN_ON(1);
- }
rnd = (u64) evt->mult - 1;
/*
@@ -164,10 +158,8 @@
* on it, so fix it up and emit a warning:
*/
if (clockevent_state_oneshot(dev)) {
- if (unlikely(!dev->mult)) {
+ if (WARN_ON(!dev->mult))
dev->mult = 1;
- WARN_ON(1);
- }
}
}
}
@@ -315,10 +307,8 @@
int64_t delta;
int rc;
- if (unlikely(expires < 0)) {
- WARN_ON_ONCE(1);
+ if (WARN_ON_ONCE(expires < 0))
return -ETIME;
- }
dev->next_event = expires;
@@ -621,6 +611,22 @@
}
#ifdef CONFIG_HOTPLUG_CPU
+
+# ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+/**
+ * tick_offline_cpu - Take CPU out of the broadcast mechanism
+ * @cpu: The outgoing CPU
+ *
+ * Called on the outgoing CPU after it took itself offline.
+ */
+void tick_offline_cpu(unsigned int cpu)
+{
+ raw_spin_lock(&clockevents_lock);
+ tick_broadcast_offline(cpu);
+ raw_spin_unlock(&clockevents_lock);
+}
+# endif
+
/**
* tick_cleanup_dead_cpu - Cleanup the tick and clockevents of a dead cpu
*/
@@ -631,8 +637,6 @@
raw_spin_lock_irqsave(&clockevents_lock, flags);
- tick_shutdown_broadcast_oneshot(cpu);
- tick_shutdown_broadcast(cpu);
tick_shutdown(cpu);
/*
* Unregister the clock event devices which were
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 0e6e97a..fff5f64 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -1,26 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * linux/kernel/time/clocksource.c
- *
* This file contains the functions which manage clocksource drivers.
*
* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * TODO WishList:
- * o Allow clocksource drivers to be unregistered
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -123,12 +105,12 @@
static int watchdog_running;
static atomic_t watchdog_reset_pending;
-static void inline clocksource_watchdog_lock(unsigned long *flags)
+static inline void clocksource_watchdog_lock(unsigned long *flags)
{
spin_lock_irqsave(&watchdog_lock, *flags);
}
-static void inline clocksource_watchdog_unlock(unsigned long *flags)
+static inline void clocksource_watchdog_unlock(unsigned long *flags)
{
spin_unlock_irqrestore(&watchdog_lock, *flags);
}
@@ -937,6 +919,8 @@
{
unsigned long flags;
+ clocksource_arch_init(cs);
+
/* Initialize mult/shift and max_idle_ns */
__clocksource_update_freq_scale(cs, scale, freq);
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index e1a549c..6560553 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -1,34 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/hrtimer.c
- *
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
*
* High-resolution kernel timers
*
- * In contrast to the low-resolution timeout API implemented in
- * kernel/timer.c, hrtimers provide finer resolution and accuracy
- * depending on system configuration and capabilities.
- *
- * These timers are currently used for:
- * - itimers
- * - POSIX timers
- * - nanosleep
- * - precise in-kernel timing
+ * In contrast to the low-resolution timeout API, aka timer wheel,
+ * hrtimers provide finer resolution and accuracy depending on system
+ * configuration and capabilities.
*
* Started by: Thomas Gleixner and Ingo Molnar
*
* Credits:
- * based on kernel/timer.c
+ * Based on the original timer wheel code
*
* Help, testing, suggestions, bugfixes, improvements were
* provided by:
*
* George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel
* et. al.
- *
- * For licencing details see kernel-base/COPYING
*/
#include <linux/cpu.h>
@@ -39,7 +30,6 @@
#include <linux/syscalls.h>
#include <linux/interrupt.h>
#include <linux/tick.h>
-#include <linux/seq_file.h>
#include <linux/err.h>
#include <linux/debugobjects.h>
#include <linux/sched/signal.h>
@@ -150,6 +140,11 @@
#define migration_base migration_cpu_base.clock_base[0]
+static inline bool is_migration_base(struct hrtimer_clock_base *base)
+{
+ return base == &migration_base;
+}
+
/*
* We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
* means that all timers which are tied to this base via timer->base are
@@ -169,7 +164,7 @@
struct hrtimer_clock_base *base;
for (;;) {
- base = timer->base;
+ base = READ_ONCE(timer->base);
if (likely(base != &migration_base)) {
raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
if (likely(base == timer->base))
@@ -249,7 +244,7 @@
return base;
/* See the comment in lock_hrtimer_base() */
- timer->base = &migration_base;
+ WRITE_ONCE(timer->base, &migration_base);
raw_spin_unlock(&base->cpu_base->lock);
raw_spin_lock(&new_base->cpu_base->lock);
@@ -258,10 +253,10 @@
raw_spin_unlock(&new_base->cpu_base->lock);
raw_spin_lock(&base->cpu_base->lock);
new_cpu_base = this_cpu_base;
- timer->base = base;
+ WRITE_ONCE(timer->base, base);
goto again;
}
- timer->base = new_base;
+ WRITE_ONCE(timer->base, new_base);
} else {
if (new_cpu_base != this_cpu_base &&
hrtimer_check_target(timer, new_base)) {
@@ -274,6 +269,11 @@
#else /* CONFIG_SMP */
+static inline bool is_migration_base(struct hrtimer_clock_base *base)
+{
+ return false;
+}
+
static inline struct hrtimer_clock_base *
lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
{
@@ -373,7 +373,7 @@
switch (state) {
case ODEBUG_STATE_ACTIVE:
WARN_ON(1);
-
+ /* fall through */
default:
return false;
}
@@ -437,6 +437,17 @@
}
EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+ clockid_t clock_id, enum hrtimer_mode mode);
+
+void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
+ clockid_t clock_id, enum hrtimer_mode mode)
+{
+ debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
+ __hrtimer_init_sleeper(sl, clock_id, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
+
void destroy_hrtimer_on_stack(struct hrtimer *timer)
{
debug_object_free(timer, &hrtimer_debug_descr);
@@ -1106,9 +1117,13 @@
/*
* Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
- * match.
+ * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
+ * expiry mode because unmarked timers are moved to softirq expiry.
*/
- WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+ if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+ WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
+ else
+ WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
base = lock_hrtimer_base(timer, &flags);
@@ -1124,9 +1139,10 @@
* @timer: hrtimer to stop
*
* Returns:
- * 0 when the timer was not active
- * 1 when the timer was active
- * -1 when the timer is currently executing the callback function and
+ *
+ * * 0 when the timer was not active
+ * * 1 when the timer was active
+ * * -1 when the timer is currently executing the callback function and
* cannot be stopped
*/
int hrtimer_try_to_cancel(struct hrtimer *timer)
@@ -1156,6 +1172,93 @@
}
EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
+#ifdef CONFIG_PREEMPT_RT
+static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
+{
+ spin_lock_init(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
+{
+ spin_lock(&base->softirq_expiry_lock);
+}
+
+static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
+{
+ spin_unlock(&base->softirq_expiry_lock);
+}
+
+/*
+ * The counterpart to hrtimer_cancel_wait_running().
+ *
+ * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
+ * the timer callback to finish. Drop expiry_lock and reaquire it. That
+ * allows the waiter to acquire the lock and make progress.
+ */
+static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
+ unsigned long flags)
+{
+ if (atomic_read(&cpu_base->timer_waiters)) {
+ raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+ spin_unlock(&cpu_base->softirq_expiry_lock);
+ spin_lock(&cpu_base->softirq_expiry_lock);
+ raw_spin_lock_irq(&cpu_base->lock);
+ }
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion: if the soft irq thread is preempted
+ * in the middle of a timer callback, then calling del_timer_sync() can
+ * lead to two issues:
+ *
+ * - If the caller is on a remote CPU then it has to spin wait for the timer
+ * handler to complete. This can result in unbound priority inversion.
+ *
+ * - If the caller originates from the task which preempted the timer
+ * handler on the same CPU, then spin waiting for the timer handler to
+ * complete is never going to end.
+ */
+void hrtimer_cancel_wait_running(const struct hrtimer *timer)
+{
+ /* Lockless read. Prevent the compiler from reloading it below */
+ struct hrtimer_clock_base *base = READ_ONCE(timer->base);
+
+ /*
+ * Just relax if the timer expires in hard interrupt context or if
+ * it is currently on the migration base.
+ */
+ if (!timer->is_soft || is_migration_base(base)) {
+ cpu_relax();
+ return;
+ }
+
+ /*
+ * Mark the base as contended and grab the expiry lock, which is
+ * held by the softirq across the timer callback. Drop the lock
+ * immediately so the softirq can expire the next timer. In theory
+ * the timer could already be running again, but that's more than
+ * unlikely and just causes another wait loop.
+ */
+ atomic_inc(&base->cpu_base->timer_waiters);
+ spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
+ atomic_dec(&base->cpu_base->timer_waiters);
+ spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
+}
+#else
+static inline void
+hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void
+hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
+static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
+ unsigned long flags) { }
+#endif
+
/**
* hrtimer_cancel - cancel a timer and wait for the handler to finish.
* @timer: the timer to be cancelled
@@ -1166,13 +1269,15 @@
*/
int hrtimer_cancel(struct hrtimer *timer)
{
- for (;;) {
- int ret = hrtimer_try_to_cancel(timer);
+ int ret;
- if (ret >= 0)
- return ret;
- cpu_relax();
- }
+ do {
+ ret = hrtimer_try_to_cancel(timer);
+
+ if (ret < 0)
+ hrtimer_cancel_wait_running(timer);
+ } while (ret < 0);
+ return ret;
}
EXPORT_SYMBOL_GPL(hrtimer_cancel);
@@ -1269,8 +1374,17 @@
enum hrtimer_mode mode)
{
bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
- int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
struct hrtimer_cpu_base *cpu_base;
+ int base;
+
+ /*
+ * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+ * marked for hard interrupt expiry mode are moved into soft
+ * interrupt context for latency reasons and because the callbacks
+ * can invoke functions which might sleep on RT, e.g. spin_lock().
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
+ softtimer = true;
memset(timer, 0, sizeof(struct hrtimer));
@@ -1284,8 +1398,10 @@
if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
clock_id = CLOCK_MONOTONIC;
+ base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
base += hrtimer_clockid_to_base(clock_id);
timer->is_soft = softtimer;
+ timer->is_hard = !softtimer;
timer->base = &cpu_base->clock_base[base];
timerqueue_init(&timer->node);
}
@@ -1458,6 +1574,8 @@
break;
__run_hrtimer(cpu_base, base, timer, &basenow, flags);
+ if (active_mask == HRTIMER_ACTIVE_SOFT)
+ hrtimer_sync_wait_running(cpu_base, flags);
}
}
}
@@ -1468,6 +1586,7 @@
unsigned long flags;
ktime_t now;
+ hrtimer_cpu_base_lock_expiry(cpu_base);
raw_spin_lock_irqsave(&cpu_base->lock, flags);
now = hrtimer_update_base(cpu_base);
@@ -1477,6 +1596,7 @@
hrtimer_update_softirq_timer(cpu_base, true);
raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
+ hrtimer_cpu_base_unlock_expiry(cpu_base);
}
#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1648,10 +1768,75 @@
return HRTIMER_NORESTART;
}
-void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task)
+/**
+ * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
+ * @sl: sleeper to be started
+ * @mode: timer mode abs/rel
+ *
+ * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
+ * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
+ */
+void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
+ enum hrtimer_mode mode)
{
+ /*
+ * Make the enqueue delivery mode check work on RT. If the sleeper
+ * was initialized for hard interrupt delivery, force the mode bit.
+ * This is a special case for hrtimer_sleepers because
+ * hrtimer_init_sleeper() determines the delivery mode on RT so the
+ * fiddling with this decision is avoided at the call sites.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
+ mode |= HRTIMER_MODE_HARD;
+
+ hrtimer_start_expires(&sl->timer, mode);
+}
+EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
+
+static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
+ clockid_t clock_id, enum hrtimer_mode mode)
+{
+ /*
+ * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
+ * marked for hard interrupt expiry mode are moved into soft
+ * interrupt context either for latency reasons or because the
+ * hrtimer callback takes regular spinlocks or invokes other
+ * functions which are not suitable for hard interrupt context on
+ * PREEMPT_RT.
+ *
+ * The hrtimer_sleeper callback is RT compatible in hard interrupt
+ * context, but there is a latency concern: Untrusted userspace can
+ * spawn many threads which arm timers for the same expiry time on
+ * the same CPU. That causes a latency spike due to the wakeup of
+ * a gazillion threads.
+ *
+ * OTOH, priviledged real-time user space applications rely on the
+ * low latency of hard interrupt wakeups. If the current task is in
+ * a real-time scheduling class, mark the mode for hard interrupt
+ * expiry.
+ */
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
+ mode |= HRTIMER_MODE_HARD;
+ }
+
+ __hrtimer_init(&sl->timer, clock_id, mode);
sl->timer.function = hrtimer_wakeup;
- sl->task = task;
+ sl->task = current;
+}
+
+/**
+ * hrtimer_init_sleeper - initialize sleeper to the given clock
+ * @sl: sleeper to be initialized
+ * @clock_id: the clock to be used
+ * @mode: timer mode abs/rel
+ */
+void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
+ enum hrtimer_mode mode)
+{
+ debug_init(&sl->timer, clock_id, mode);
+ __hrtimer_init_sleeper(sl, clock_id, mode);
+
}
EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
@@ -1660,7 +1845,7 @@
switch(restart->nanosleep.type) {
#ifdef CONFIG_COMPAT_32BIT_TIME
case TT_COMPAT:
- if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp))
+ if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp))
return -EFAULT;
break;
#endif
@@ -1678,11 +1863,9 @@
{
struct restart_block *restart;
- hrtimer_init_sleeper(t, current);
-
do {
set_current_state(TASK_INTERRUPTIBLE);
- hrtimer_start_expires(&t->timer, mode);
+ hrtimer_sleeper_start_expires(t, mode);
if (likely(t->task))
freezable_schedule();
@@ -1716,10 +1899,9 @@
struct hrtimer_sleeper t;
int ret;
- hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid,
- HRTIMER_MODE_ABS);
+ hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
+ HRTIMER_MODE_ABS);
hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
-
ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
destroy_hrtimer_on_stack(&t.timer);
return ret;
@@ -1737,7 +1919,7 @@
if (dl_task(current) || rt_task(current))
slack = 0;
- hrtimer_init_on_stack(&t.timer, clockid, mode);
+ hrtimer_init_sleeper_on_stack(&t, clockid, mode);
hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
ret = do_nanosleep(&t, mode);
if (ret != -ERESTART_RESTARTBLOCK)
@@ -1780,12 +1962,12 @@
#ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp,
- struct compat_timespec __user *, rmtp)
+SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp,
+ struct old_timespec32 __user *, rmtp)
{
struct timespec64 tu;
- if (compat_get_timespec64(&tu, rqtp))
+ if (get_old_timespec32(&tu, rqtp))
return -EFAULT;
if (!timespec64_valid(&tu))
@@ -1818,6 +2000,7 @@
cpu_base->softirq_next_timer = NULL;
cpu_base->expires_next = KTIME_MAX;
cpu_base->softirq_expires_next = KTIME_MAX;
+ hrtimer_cpu_base_init_expiry_lock(cpu_base);
return 0;
}
@@ -1936,12 +2119,9 @@
return -EINTR;
}
- hrtimer_init_on_stack(&t.timer, clock_id, mode);
+ hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
-
- hrtimer_init_sleeper(&t, current);
-
- hrtimer_start_expires(&t.timer, mode);
+ hrtimer_sleeper_start_expires(&t, mode);
if (likely(t.task))
schedule();
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 9a65713..77f1e56 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -1,7 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/itimer.c
- *
* Copyright (C) 1992 Darren Senn
*/
@@ -57,15 +55,10 @@
val = it->expires;
interval = it->incr;
if (val) {
- struct task_cputime cputime;
- u64 t;
+ u64 t, samples[CPUCLOCK_MAX];
- thread_group_cputimer(tsk, &cputime);
- if (clock_id == CPUCLOCK_PROF)
- t = cputime.utime + cputime.stime;
- else
- /* CPUCLOCK_VIRT */
- t = cputime.utime;
+ thread_group_sample_cputime(tsk, samples);
+ t = samples[clock_id];
if (val < t)
/* about to fire */
@@ -215,6 +208,7 @@
/* We are sharing ->siglock with it_real_fn() */
if (hrtimer_try_to_cancel(timer) < 0) {
spin_unlock_irq(&tsk->sighand->siglock);
+ hrtimer_cancel_wait_running(timer);
goto again;
}
expires = timeval_to_ktime(value->it_value);
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 4977191..d23b434 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -1,25 +1,9 @@
-/***********************************************************************
-* linux/kernel/time/jiffies.c
-*
-* This file contains the jiffies based clocksource.
-*
-* Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
-*
-* This program is free software; you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation; either version 2 of the License, or
-* (at your option) any later version.
-*
-* This program is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with this program; if not, write to the Free Software
-* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-*
-************************************************************************/
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * This file contains the jiffies based clocksource.
+ *
+ * Copyright (C) 2004, 2005 IBM, John Stultz (johnstul@us.ibm.com)
+ */
#include <linux/clocksource.h>
#include <linux/jiffies.h>
#include <linux/module.h>
@@ -79,7 +63,7 @@
#if (BITS_PER_LONG < 64)
u64 get_jiffies_64(void)
{
- unsigned long seq;
+ unsigned int seq;
u64 ret;
do {
@@ -105,7 +89,7 @@
return &clocksource_jiffies;
}
-struct clocksource refined_jiffies;
+static struct clocksource refined_jiffies;
int register_refined_jiffies(long cycles_per_second)
{
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index c5e0cba..069ca78 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -17,7 +17,7 @@
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/rtc.h>
-#include <linux/math64.h>
+#include <linux/audit.h>
#include "ntp_internal.h"
#include "timekeeping_internal.h"
@@ -43,6 +43,7 @@
#define MAX_TICKADJ 500LL /* usecs */
#define MAX_TICKADJ_SCALED \
(((MAX_TICKADJ * NSEC_PER_USEC) << NTP_SCALE_SHIFT) / NTP_INTERVAL_FREQ)
+#define MAX_TAI_OFFSET 100000
/*
* phase-lock loop variables
@@ -189,13 +190,13 @@
&& (status & (STA_PPSWANDER|STA_PPSERROR)));
}
-static inline void pps_fill_timex(struct timex *txc)
+static inline void pps_fill_timex(struct __kernel_timex *txc)
{
txc->ppsfreq = shift_right((pps_freq >> PPM_SCALE_INV_SHIFT) *
PPM_SCALE_INV, NTP_SCALE_SHIFT);
txc->jitter = pps_jitter;
if (!(time_status & STA_NANO))
- txc->jitter /= NSEC_PER_USEC;
+ txc->jitter = pps_jitter / NSEC_PER_USEC;
txc->shift = pps_shift;
txc->stabil = pps_stabil;
txc->jitcnt = pps_jitcnt;
@@ -221,7 +222,7 @@
return status & (STA_UNSYNC|STA_CLOCKERR);
}
-static inline void pps_fill_timex(struct timex *txc)
+static inline void pps_fill_timex(struct __kernel_timex *txc)
{
/* PPS is not implemented, so these are zero */
txc->ppsfreq = 0;
@@ -555,17 +556,9 @@
}
#ifdef CONFIG_GENERIC_CMOS_UPDATE
-int __weak update_persistent_clock(struct timespec now)
-{
- return -ENODEV;
-}
-
int __weak update_persistent_clock64(struct timespec64 now64)
{
- struct timespec now;
-
- now = timespec64_to_timespec(now64);
- return update_persistent_clock(now);
+ return -ENODEV;
}
#endif
@@ -642,7 +635,7 @@
/*
* Propagate a new txc->status value into the NTP state:
*/
-static inline void process_adj_status(const struct timex *txc)
+static inline void process_adj_status(const struct __kernel_timex *txc)
{
if ((time_status & STA_PLL) && !(txc->status & STA_PLL)) {
time_state = TIME_OK;
@@ -665,7 +658,8 @@
}
-static inline void process_adjtimex_modes(const struct timex *txc, s32 *time_tai)
+static inline void process_adjtimex_modes(const struct __kernel_timex *txc,
+ s32 *time_tai)
{
if (txc->modes & ADJ_STATUS)
process_adj_status(txc);
@@ -698,7 +692,8 @@
time_constant = max(time_constant, 0l);
}
- if (txc->modes & ADJ_TAI && txc->constant > 0)
+ if (txc->modes & ADJ_TAI &&
+ txc->constant >= 0 && txc->constant <= MAX_TAI_OFFSET)
*time_tai = txc->constant;
if (txc->modes & ADJ_OFFSET)
@@ -716,7 +711,8 @@
* adjtimex mainly allows reading (and writing, if superuser) of
* kernel time-keeping variables. used by xntpd.
*/
-int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai)
+int __do_adjtimex(struct __kernel_timex *txc, const struct timespec64 *ts,
+ s32 *time_tai, struct audit_ntp_data *ad)
{
int result;
@@ -727,18 +723,33 @@
/* adjtime() is independent from ntp_adjtime() */
time_adjust = txc->offset;
ntp_update_frequency();
+
+ audit_ntp_set_old(ad, AUDIT_NTP_ADJUST, save_adjust);
+ audit_ntp_set_new(ad, AUDIT_NTP_ADJUST, time_adjust);
}
txc->offset = save_adjust;
} else {
-
/* If there are input parameters, then process them: */
- if (txc->modes)
+ if (txc->modes) {
+ audit_ntp_set_old(ad, AUDIT_NTP_OFFSET, time_offset);
+ audit_ntp_set_old(ad, AUDIT_NTP_FREQ, time_freq);
+ audit_ntp_set_old(ad, AUDIT_NTP_STATUS, time_status);
+ audit_ntp_set_old(ad, AUDIT_NTP_TAI, *time_tai);
+ audit_ntp_set_old(ad, AUDIT_NTP_TICK, tick_usec);
+
process_adjtimex_modes(txc, time_tai);
+ audit_ntp_set_new(ad, AUDIT_NTP_OFFSET, time_offset);
+ audit_ntp_set_new(ad, AUDIT_NTP_FREQ, time_freq);
+ audit_ntp_set_new(ad, AUDIT_NTP_STATUS, time_status);
+ audit_ntp_set_new(ad, AUDIT_NTP_TAI, *time_tai);
+ audit_ntp_set_new(ad, AUDIT_NTP_TICK, tick_usec);
+ }
+
txc->offset = shift_right(time_offset * NTP_INTERVAL_FREQ,
NTP_SCALE_SHIFT);
if (!(time_status & STA_NANO))
- txc->offset /= NSEC_PER_USEC;
+ txc->offset = (u32)txc->offset / NSEC_PER_USEC;
}
result = time_state; /* mostly `TIME_OK' */
@@ -760,10 +771,10 @@
/* fill PPS status fields */
pps_fill_timex(txc);
- txc->time.tv_sec = (time_t)ts->tv_sec;
+ txc->time.tv_sec = ts->tv_sec;
txc->time.tv_usec = ts->tv_nsec;
if (!(time_status & STA_NANO))
- txc->time.tv_usec /= NSEC_PER_USEC;
+ txc->time.tv_usec = ts->tv_nsec / NSEC_PER_USEC;
/* Handle leapsec adjustments */
if (unlikely(ts->tv_sec >= ntp_next_leap_sec)) {
diff --git a/kernel/time/ntp_internal.h b/kernel/time/ntp_internal.h
index c24b0e1..908ecaa 100644
--- a/kernel/time/ntp_internal.h
+++ b/kernel/time/ntp_internal.h
@@ -8,6 +8,8 @@
extern u64 ntp_tick_length(void);
extern ktime_t ntp_get_next_leap(void);
extern int second_overflow(time64_t secs);
-extern int __do_adjtimex(struct timex *txc, const struct timespec64 *ts, s32 *time_tai);
+extern int __do_adjtimex(struct __kernel_timex *txc,
+ const struct timespec64 *ts,
+ s32 *time_tai, struct audit_ntp_data *ad);
extern void __hardpps(const struct timespec64 *phase_ts, const struct timespec64 *raw_ts);
#endif /* _LINUX_NTP_INTERNAL_H */
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index fe56c4e..ec960bb 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -1,21 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * posix-clock.c - support for dynamic clock devices
+ * Support for dynamic clock devices
*
* Copyright (C) 2010 OMICRON electronics GmbH
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
#include <linux/device.h>
#include <linux/export.h>
@@ -241,7 +228,7 @@
fput(cd->fp);
}
-static int pc_clock_adjtime(clockid_t id, struct timex *tx)
+static int pc_clock_adjtime(clockid_t id, struct __kernel_timex *tx)
{
struct posix_clock_desc cd;
int err;
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index ce32cf7..42d512f 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -20,11 +20,20 @@
static void posix_cpu_timer_rearm(struct k_itimer *timer);
+void posix_cputimers_group_init(struct posix_cputimers *pct, u64 cpu_limit)
+{
+ posix_cputimers_init(pct);
+ if (cpu_limit != RLIM_INFINITY) {
+ pct->bases[CPUCLOCK_PROF].nextevt = cpu_limit * NSEC_PER_SEC;
+ pct->timers_active = true;
+ }
+}
+
/*
* Called after updating RLIMIT_CPU to run cpu timer and update
- * tsk->signal->cputime_expires expiration cache if necessary. Needs
- * siglock protection since other code may update expiration cache as
- * well.
+ * tsk->signal->posix_cputimers.bases[clock].nextevt expiration cache if
+ * necessary. Needs siglock protection since other code may update the
+ * expiration cache as well.
*/
void update_rlimit_cpu(struct task_struct *task, unsigned long rlim_new)
{
@@ -35,46 +44,97 @@
spin_unlock_irq(&task->sighand->siglock);
}
-static int check_clock(const clockid_t which_clock)
+/*
+ * Functions for validating access to tasks.
+ */
+static struct task_struct *lookup_task(const pid_t pid, bool thread,
+ bool gettime)
{
- int error = 0;
struct task_struct *p;
- const pid_t pid = CPUCLOCK_PID(which_clock);
- if (CPUCLOCK_WHICH(which_clock) >= CPUCLOCK_MAX)
- return -EINVAL;
+ /*
+ * If the encoded PID is 0, then the timer is targeted at current
+ * or the process to which current belongs.
+ */
+ if (!pid)
+ return thread ? current : current->group_leader;
- if (pid == 0)
- return 0;
+ p = find_task_by_vpid(pid);
+ if (!p)
+ return p;
+
+ if (thread)
+ return same_thread_group(p, current) ? p : NULL;
+
+ if (gettime) {
+ /*
+ * For clock_gettime(PROCESS) the task does not need to be
+ * the actual group leader. tsk->sighand gives
+ * access to the group's clock.
+ *
+ * Timers need the group leader because they take a
+ * reference on it and store the task pointer until the
+ * timer is destroyed.
+ */
+ return (p == current || thread_group_leader(p)) ? p : NULL;
+ }
+
+ /*
+ * For processes require that p is group leader.
+ */
+ return has_group_leader_pid(p) ? p : NULL;
+}
+
+static struct task_struct *__get_task_for_clock(const clockid_t clock,
+ bool getref, bool gettime)
+{
+ const bool thread = !!CPUCLOCK_PERTHREAD(clock);
+ const pid_t pid = CPUCLOCK_PID(clock);
+ struct task_struct *p;
+
+ if (CPUCLOCK_WHICH(clock) >= CPUCLOCK_MAX)
+ return NULL;
rcu_read_lock();
- p = find_task_by_vpid(pid);
- if (!p || !(CPUCLOCK_PERTHREAD(which_clock) ?
- same_thread_group(p, current) : has_group_leader_pid(p))) {
- error = -EINVAL;
- }
+ p = lookup_task(pid, thread, gettime);
+ if (p && getref)
+ get_task_struct(p);
rcu_read_unlock();
+ return p;
+}
- return error;
+static inline struct task_struct *get_task_for_clock(const clockid_t clock)
+{
+ return __get_task_for_clock(clock, true, false);
+}
+
+static inline struct task_struct *get_task_for_clock_get(const clockid_t clock)
+{
+ return __get_task_for_clock(clock, true, true);
+}
+
+static inline int validate_clock_permissions(const clockid_t clock)
+{
+ return __get_task_for_clock(clock, false, false) ? 0 : -EINVAL;
}
/*
* Update expiry time from increment, and increase overrun count,
* given the current clock sample.
*/
-static void bump_cpu_timer(struct k_itimer *timer, u64 now)
+static u64 bump_cpu_timer(struct k_itimer *timer, u64 now)
{
+ u64 delta, incr, expires = timer->it.cpu.node.expires;
int i;
- u64 delta, incr;
- if (timer->it.cpu.incr == 0)
- return;
+ if (!timer->it_interval)
+ return expires;
- if (now < timer->it.cpu.expires)
- return;
+ if (now < expires)
+ return expires;
- incr = timer->it.cpu.incr;
- delta = now + incr - timer->it.cpu.expires;
+ incr = timer->it_interval;
+ delta = now + incr - expires;
/* Don't use (incr*2 < delta), incr*2 might overflow. */
for (i = 0; incr < delta - incr; i++)
@@ -84,48 +144,26 @@
if (delta < incr)
continue;
- timer->it.cpu.expires += incr;
+ timer->it.cpu.node.expires += incr;
timer->it_overrun += 1LL << i;
delta -= incr;
}
+ return timer->it.cpu.node.expires;
}
-/**
- * task_cputime_zero - Check a task_cputime struct for all zero fields.
- *
- * @cputime: The struct to compare.
- *
- * Checks @cputime to see if all fields are zero. Returns true if all fields
- * are zero, false if any field is nonzero.
- */
-static inline int task_cputime_zero(const struct task_cputime *cputime)
+/* Check whether all cache entries contain U64_MAX, i.e. eternal expiry time */
+static inline bool expiry_cache_is_inactive(const struct posix_cputimers *pct)
{
- if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime)
- return 1;
- return 0;
-}
-
-static inline u64 prof_ticks(struct task_struct *p)
-{
- u64 utime, stime;
-
- task_cputime(p, &utime, &stime);
-
- return utime + stime;
-}
-static inline u64 virt_ticks(struct task_struct *p)
-{
- u64 utime, stime;
-
- task_cputime(p, &utime, &stime);
-
- return utime;
+ return !(~pct->bases[CPUCLOCK_PROF].nextevt |
+ ~pct->bases[CPUCLOCK_VIRT].nextevt |
+ ~pct->bases[CPUCLOCK_SCHED].nextevt);
}
static int
posix_cpu_clock_getres(const clockid_t which_clock, struct timespec64 *tp)
{
- int error = check_clock(which_clock);
+ int error = validate_clock_permissions(which_clock);
+
if (!error) {
tp->tv_sec = 0;
tp->tv_nsec = ((NSEC_PER_SEC + HZ - 1) / HZ);
@@ -142,42 +180,66 @@
}
static int
-posix_cpu_clock_set(const clockid_t which_clock, const struct timespec64 *tp)
+posix_cpu_clock_set(const clockid_t clock, const struct timespec64 *tp)
{
+ int error = validate_clock_permissions(clock);
+
/*
* You can never reset a CPU clock, but we check for other errors
* in the call before failing with EPERM.
*/
- int error = check_clock(which_clock);
- if (error == 0) {
- error = -EPERM;
- }
- return error;
+ return error ? : -EPERM;
}
-
/*
- * Sample a per-thread clock for the given task.
+ * Sample a per-thread clock for the given task. clkid is validated.
*/
-static int cpu_clock_sample(const clockid_t which_clock,
- struct task_struct *p, u64 *sample)
+static u64 cpu_clock_sample(const clockid_t clkid, struct task_struct *p)
{
- switch (CPUCLOCK_WHICH(which_clock)) {
- default:
- return -EINVAL;
+ u64 utime, stime;
+
+ if (clkid == CPUCLOCK_SCHED)
+ return task_sched_runtime(p);
+
+ task_cputime(p, &utime, &stime);
+
+ switch (clkid) {
case CPUCLOCK_PROF:
- *sample = prof_ticks(p);
- break;
+ return utime + stime;
case CPUCLOCK_VIRT:
- *sample = virt_ticks(p);
- break;
- case CPUCLOCK_SCHED:
- *sample = task_sched_runtime(p);
- break;
+ return utime;
+ default:
+ WARN_ON_ONCE(1);
}
return 0;
}
+static inline void store_samples(u64 *samples, u64 stime, u64 utime, u64 rtime)
+{
+ samples[CPUCLOCK_PROF] = stime + utime;
+ samples[CPUCLOCK_VIRT] = utime;
+ samples[CPUCLOCK_SCHED] = rtime;
+}
+
+static void task_sample_cputime(struct task_struct *p, u64 *samples)
+{
+ u64 stime, utime;
+
+ task_cputime(p, &utime, &stime);
+ store_samples(samples, stime, utime, p->se.sum_exec_runtime);
+}
+
+static void proc_sample_cputime_atomic(struct task_cputime_atomic *at,
+ u64 *samples)
+{
+ u64 stime, utime, rtime;
+
+ utime = atomic64_read(&at->utime);
+ stime = atomic64_read(&at->stime);
+ rtime = atomic64_read(&at->sum_exec_runtime);
+ store_samples(samples, stime, utime, rtime);
+}
+
/*
* Set cputime to sum_cputime if sum_cputime > cputime. Use cmpxchg
* to avoid race conditions with concurrent updates to cputime.
@@ -193,29 +255,56 @@
}
}
-static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum)
+static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic,
+ struct task_cputime *sum)
{
__update_gt_cputime(&cputime_atomic->utime, sum->utime);
__update_gt_cputime(&cputime_atomic->stime, sum->stime);
__update_gt_cputime(&cputime_atomic->sum_exec_runtime, sum->sum_exec_runtime);
}
-/* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */
-static inline void sample_cputime_atomic(struct task_cputime *times,
- struct task_cputime_atomic *atomic_times)
-{
- times->utime = atomic64_read(&atomic_times->utime);
- times->stime = atomic64_read(&atomic_times->stime);
- times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime);
-}
-
-void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
+/**
+ * thread_group_sample_cputime - Sample cputime for a given task
+ * @tsk: Task for which cputime needs to be started
+ * @samples: Storage for time samples
+ *
+ * Called from sys_getitimer() to calculate the expiry time of an active
+ * timer. That means group cputime accounting is already active. Called
+ * with task sighand lock held.
+ *
+ * Updates @times with an uptodate sample of the thread group cputimes.
+ */
+void thread_group_sample_cputime(struct task_struct *tsk, u64 *samples)
{
struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
- struct task_cputime sum;
+ struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
+
+ WARN_ON_ONCE(!pct->timers_active);
+
+ proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
+}
+
+/**
+ * thread_group_start_cputime - Start cputime and return a sample
+ * @tsk: Task for which cputime needs to be started
+ * @samples: Storage for time samples
+ *
+ * The thread group cputime accouting is avoided when there are no posix
+ * CPU timers armed. Before starting a timer it's required to check whether
+ * the time accounting is active. If not, a full update of the atomic
+ * accounting store needs to be done and the accounting enabled.
+ *
+ * Updates @times with an uptodate sample of the thread group cputimes.
+ */
+static void thread_group_start_cputime(struct task_struct *tsk, u64 *samples)
+{
+ struct thread_group_cputimer *cputimer = &tsk->signal->cputimer;
+ struct posix_cputimers *pct = &tsk->signal->posix_cputimers;
/* Check if cputimer isn't running. This is accessed without locking. */
- if (!READ_ONCE(cputimer->running)) {
+ if (!READ_ONCE(pct->timers_active)) {
+ struct task_cputime sum;
+
/*
* The POSIX timer interface allows for absolute time expiry
* values through the TIMER_ABSTIME flag, therefore we have
@@ -225,96 +314,71 @@
update_gt_cputime(&cputimer->cputime_atomic, &sum);
/*
- * We're setting cputimer->running without a lock. Ensure
- * this only gets written to in one operation. We set
- * running after update_gt_cputime() as a small optimization,
- * but barriers are not required because update_gt_cputime()
+ * We're setting timers_active without a lock. Ensure this
+ * only gets written to in one operation. We set it after
+ * update_gt_cputime() as a small optimization, but
+ * barriers are not required because update_gt_cputime()
* can handle concurrent updates.
*/
- WRITE_ONCE(cputimer->running, true);
+ WRITE_ONCE(pct->timers_active, true);
}
- sample_cputime_atomic(times, &cputimer->cputime_atomic);
+ proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
+}
+
+static void __thread_group_cputime(struct task_struct *tsk, u64 *samples)
+{
+ struct task_cputime ct;
+
+ thread_group_cputime(tsk, &ct);
+ store_samples(samples, ct.stime, ct.utime, ct.sum_exec_runtime);
}
/*
- * Sample a process (thread group) clock for the given group_leader task.
- * Must be called with task sighand lock held for safe while_each_thread()
- * traversal.
+ * Sample a process (thread group) clock for the given task clkid. If the
+ * group's cputime accounting is already enabled, read the atomic
+ * store. Otherwise a full update is required. Task's sighand lock must be
+ * held to protect the task traversal on a full update. clkid is already
+ * validated.
*/
-static int cpu_clock_sample_group(const clockid_t which_clock,
- struct task_struct *p,
- u64 *sample)
+static u64 cpu_clock_sample_group(const clockid_t clkid, struct task_struct *p,
+ bool start)
{
- struct task_cputime cputime;
+ struct thread_group_cputimer *cputimer = &p->signal->cputimer;
+ struct posix_cputimers *pct = &p->signal->posix_cputimers;
+ u64 samples[CPUCLOCK_MAX];
- switch (CPUCLOCK_WHICH(which_clock)) {
- default:
+ if (!READ_ONCE(pct->timers_active)) {
+ if (start)
+ thread_group_start_cputime(p, samples);
+ else
+ __thread_group_cputime(p, samples);
+ } else {
+ proc_sample_cputime_atomic(&cputimer->cputime_atomic, samples);
+ }
+
+ return samples[clkid];
+}
+
+static int posix_cpu_clock_get(const clockid_t clock, struct timespec64 *tp)
+{
+ const clockid_t clkid = CPUCLOCK_WHICH(clock);
+ struct task_struct *tsk;
+ u64 t;
+
+ tsk = get_task_for_clock_get(clock);
+ if (!tsk)
return -EINVAL;
- case CPUCLOCK_PROF:
- thread_group_cputime(p, &cputime);
- *sample = cputime.utime + cputime.stime;
- break;
- case CPUCLOCK_VIRT:
- thread_group_cputime(p, &cputime);
- *sample = cputime.utime;
- break;
- case CPUCLOCK_SCHED:
- thread_group_cputime(p, &cputime);
- *sample = cputime.sum_exec_runtime;
- break;
- }
+
+ if (CPUCLOCK_PERTHREAD(clock))
+ t = cpu_clock_sample(clkid, tsk);
+ else
+ t = cpu_clock_sample_group(clkid, tsk, false);
+ put_task_struct(tsk);
+
+ *tp = ns_to_timespec64(t);
return 0;
}
-static int posix_cpu_clock_get_task(struct task_struct *tsk,
- const clockid_t which_clock,
- struct timespec64 *tp)
-{
- int err = -EINVAL;
- u64 rtn;
-
- if (CPUCLOCK_PERTHREAD(which_clock)) {
- if (same_thread_group(tsk, current))
- err = cpu_clock_sample(which_clock, tsk, &rtn);
- } else {
- if (tsk == current || thread_group_leader(tsk))
- err = cpu_clock_sample_group(which_clock, tsk, &rtn);
- }
-
- if (!err)
- *tp = ns_to_timespec64(rtn);
-
- return err;
-}
-
-
-static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec64 *tp)
-{
- const pid_t pid = CPUCLOCK_PID(which_clock);
- int err = -EINVAL;
-
- if (pid == 0) {
- /*
- * Special case constant value for our own clocks.
- * We don't have to do any lookup to find ourselves.
- */
- err = posix_cpu_clock_get_task(current, which_clock, tp);
- } else {
- /*
- * Find the given PID, and validate that the caller
- * should be able to see it.
- */
- struct task_struct *p;
- rcu_read_lock();
- p = find_task_by_vpid(pid);
- if (p)
- err = posix_cpu_clock_get_task(p, which_clock, tp);
- rcu_read_unlock();
- }
-
- return err;
-}
-
/*
* Validate the clockid_t for a new CPU-clock timer, and initialize the timer.
* This is called from sys_timer_create() and do_cpu_nanosleep() with the
@@ -322,44 +386,15 @@
*/
static int posix_cpu_timer_create(struct k_itimer *new_timer)
{
- int ret = 0;
- const pid_t pid = CPUCLOCK_PID(new_timer->it_clock);
- struct task_struct *p;
+ struct task_struct *p = get_task_for_clock(new_timer->it_clock);
- if (CPUCLOCK_WHICH(new_timer->it_clock) >= CPUCLOCK_MAX)
+ if (!p)
return -EINVAL;
new_timer->kclock = &clock_posix_cpu;
-
- INIT_LIST_HEAD(&new_timer->it.cpu.entry);
-
- rcu_read_lock();
- if (CPUCLOCK_PERTHREAD(new_timer->it_clock)) {
- if (pid == 0) {
- p = current;
- } else {
- p = find_task_by_vpid(pid);
- if (p && !same_thread_group(p, current))
- p = NULL;
- }
- } else {
- if (pid == 0) {
- p = current->group_leader;
- } else {
- p = find_task_by_vpid(pid);
- if (p && !has_group_leader_pid(p))
- p = NULL;
- }
- }
+ timerqueue_init(&new_timer->it.cpu.node);
new_timer->it.cpu.task = p;
- if (p) {
- get_task_struct(p);
- } else {
- ret = -EINVAL;
- }
- rcu_read_unlock();
-
- return ret;
+ return 0;
}
/*
@@ -370,12 +405,14 @@
*/
static int posix_cpu_timer_del(struct k_itimer *timer)
{
- int ret = 0;
- unsigned long flags;
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ struct task_struct *p = ctmr->task;
struct sighand_struct *sighand;
- struct task_struct *p = timer->it.cpu.task;
+ unsigned long flags;
+ int ret = 0;
- WARN_ON_ONCE(p == NULL);
+ if (WARN_ON_ONCE(!p))
+ return -EINVAL;
/*
* Protect against sighand release/switch in exit/exec and process/
@@ -384,15 +421,15 @@
sighand = lock_task_sighand(p, &flags);
if (unlikely(sighand == NULL)) {
/*
- * We raced with the reaping of the task.
- * The deletion should have cleared us off the list.
+ * This raced with the reaping of the task. The exit cleanup
+ * should have removed this timer from the timer queue.
*/
- WARN_ON_ONCE(!list_empty(&timer->it.cpu.entry));
+ WARN_ON_ONCE(ctmr->head || timerqueue_node_queued(&ctmr->node));
} else {
if (timer->it.cpu.firing)
ret = TIMER_RETRY;
else
- list_del(&timer->it.cpu.entry);
+ cpu_timer_dequeue(ctmr);
unlock_task_sighand(p, &flags);
}
@@ -403,25 +440,30 @@
return ret;
}
-static void cleanup_timers_list(struct list_head *head)
+static void cleanup_timerqueue(struct timerqueue_head *head)
{
- struct cpu_timer_list *timer, *next;
+ struct timerqueue_node *node;
+ struct cpu_timer *ctmr;
- list_for_each_entry_safe(timer, next, head, entry)
- list_del_init(&timer->entry);
+ while ((node = timerqueue_getnext(head))) {
+ timerqueue_del(head, node);
+ ctmr = container_of(node, struct cpu_timer, node);
+ ctmr->head = NULL;
+ }
}
/*
- * Clean out CPU timers still ticking when a thread exited. The task
- * pointer is cleared, and the expiry time is replaced with the residual
- * time for later timer_gettime calls to return.
+ * Clean out CPU timers which are still armed when a thread exits. The
+ * timers are only removed from the list. No other updates are done. The
+ * corresponding posix timers are still accessible, but cannot be rearmed.
+ *
* This must be called with the siglock held.
*/
-static void cleanup_timers(struct list_head *head)
+static void cleanup_timers(struct posix_cputimers *pct)
{
- cleanup_timers_list(head);
- cleanup_timers_list(++head);
- cleanup_timers_list(++head);
+ cleanup_timerqueue(&pct->bases[CPUCLOCK_PROF].tqhead);
+ cleanup_timerqueue(&pct->bases[CPUCLOCK_VIRT].tqhead);
+ cleanup_timerqueue(&pct->bases[CPUCLOCK_SCHED].tqhead);
}
/*
@@ -431,16 +473,11 @@
*/
void posix_cpu_timers_exit(struct task_struct *tsk)
{
- cleanup_timers(tsk->cpu_timers);
+ cleanup_timers(&tsk->posix_cputimers);
}
void posix_cpu_timers_exit_group(struct task_struct *tsk)
{
- cleanup_timers(tsk->signal->cpu_timers);
-}
-
-static inline int expires_gt(u64 expires, u64 new_exp)
-{
- return expires == 0 || expires > new_exp;
+ cleanup_timers(&tsk->signal->posix_cputimers);
}
/*
@@ -449,58 +486,33 @@
*/
static void arm_timer(struct k_itimer *timer)
{
- struct task_struct *p = timer->it.cpu.task;
- struct list_head *head, *listpos;
- struct task_cputime *cputime_expires;
- struct cpu_timer_list *const nt = &timer->it.cpu;
- struct cpu_timer_list *next;
+ int clkidx = CPUCLOCK_WHICH(timer->it_clock);
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ u64 newexp = cpu_timer_getexpires(ctmr);
+ struct task_struct *p = ctmr->task;
+ struct posix_cputimer_base *base;
- if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- head = p->cpu_timers;
- cputime_expires = &p->cputime_expires;
- } else {
- head = p->signal->cpu_timers;
- cputime_expires = &p->signal->cputime_expires;
- }
- head += CPUCLOCK_WHICH(timer->it_clock);
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ base = p->posix_cputimers.bases + clkidx;
+ else
+ base = p->signal->posix_cputimers.bases + clkidx;
- listpos = head;
- list_for_each_entry(next, head, entry) {
- if (nt->expires < next->expires)
- break;
- listpos = &next->entry;
- }
- list_add(&nt->entry, listpos);
+ if (!cpu_timer_enqueue(&base->tqhead, ctmr))
+ return;
- if (listpos == head) {
- u64 exp = nt->expires;
+ /*
+ * We are the new earliest-expiring POSIX 1.b timer, hence
+ * need to update expiration cache. Take into account that
+ * for process timers we share expiration cache with itimers
+ * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
+ */
+ if (newexp < base->nextevt)
+ base->nextevt = newexp;
- /*
- * We are the new earliest-expiring POSIX 1.b timer, hence
- * need to update expiration cache. Take into account that
- * for process timers we share expiration cache with itimers
- * and RLIMIT_CPU and for thread timers with RLIMIT_RTTIME.
- */
-
- switch (CPUCLOCK_WHICH(timer->it_clock)) {
- case CPUCLOCK_PROF:
- if (expires_gt(cputime_expires->prof_exp, exp))
- cputime_expires->prof_exp = exp;
- break;
- case CPUCLOCK_VIRT:
- if (expires_gt(cputime_expires->virt_exp, exp))
- cputime_expires->virt_exp = exp;
- break;
- case CPUCLOCK_SCHED:
- if (expires_gt(cputime_expires->sched_exp, exp))
- cputime_expires->sched_exp = exp;
- break;
- }
- if (CPUCLOCK_PERTHREAD(timer->it_clock))
- tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
- else
- tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
- }
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ tick_dep_set_task(p, TICK_DEP_BIT_POSIX_TIMER);
+ else
+ tick_dep_set_signal(p->signal, TICK_DEP_BIT_POSIX_TIMER);
}
/*
@@ -508,24 +520,26 @@
*/
static void cpu_timer_fire(struct k_itimer *timer)
{
+ struct cpu_timer *ctmr = &timer->it.cpu;
+
if ((timer->it_sigev_notify & ~SIGEV_THREAD_ID) == SIGEV_NONE) {
/*
* User don't want any signal.
*/
- timer->it.cpu.expires = 0;
+ cpu_timer_setexpires(ctmr, 0);
} else if (unlikely(timer->sigq == NULL)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
*/
wake_up_process(timer->it_process);
- timer->it.cpu.expires = 0;
- } else if (timer->it.cpu.incr == 0) {
+ cpu_timer_setexpires(ctmr, 0);
+ } else if (!timer->it_interval) {
/*
* One-shot timer. Clear it as soon as it's fired.
*/
posix_timer_event(timer, 0);
- timer->it.cpu.expires = 0;
+ cpu_timer_setexpires(ctmr, 0);
} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
/*
* The signal did not get queued because the signal
@@ -539,33 +553,6 @@
}
/*
- * Sample a process (thread group) timer for the given group_leader task.
- * Must be called with task sighand lock held for safe while_each_thread()
- * traversal.
- */
-static int cpu_timer_sample_group(const clockid_t which_clock,
- struct task_struct *p, u64 *sample)
-{
- struct task_cputime cputime;
-
- thread_group_cputimer(p, &cputime);
- switch (CPUCLOCK_WHICH(which_clock)) {
- default:
- return -EINVAL;
- case CPUCLOCK_PROF:
- *sample = cputime.utime + cputime.stime;
- break;
- case CPUCLOCK_VIRT:
- *sample = cputime.utime;
- break;
- case CPUCLOCK_SCHED:
- *sample = cputime.sum_exec_runtime;
- break;
- }
- return 0;
-}
-
-/*
* Guts of sys_timer_settime for CPU timers.
* This is called with the timer locked and interrupts disabled.
* If we return TIMER_RETRY, it's necessary to release the timer's lock
@@ -574,13 +561,16 @@
static int posix_cpu_timer_set(struct k_itimer *timer, int timer_flags,
struct itimerspec64 *new, struct itimerspec64 *old)
{
- unsigned long flags;
- struct sighand_struct *sighand;
- struct task_struct *p = timer->it.cpu.task;
+ clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
u64 old_expires, new_expires, old_incr, val;
- int ret;
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ struct task_struct *p = ctmr->task;
+ struct sighand_struct *sighand;
+ unsigned long flags;
+ int ret = 0;
- WARN_ON_ONCE(p == NULL);
+ if (WARN_ON_ONCE(!p))
+ return -EINVAL;
/*
* Use the to_ktime conversion because that clamps the maximum
@@ -597,22 +587,21 @@
* If p has just been reaped, we can no
* longer get any information about it at all.
*/
- if (unlikely(sighand == NULL)) {
+ if (unlikely(sighand == NULL))
return -ESRCH;
- }
/*
* Disarm any old timer after extracting its expiry time.
*/
+ old_incr = timer->it_interval;
+ old_expires = cpu_timer_getexpires(ctmr);
- ret = 0;
- old_incr = timer->it.cpu.incr;
- old_expires = timer->it.cpu.expires;
if (unlikely(timer->it.cpu.firing)) {
timer->it.cpu.firing = -1;
ret = TIMER_RETRY;
- } else
- list_del_init(&timer->it.cpu.entry);
+ } else {
+ cpu_timer_dequeue(ctmr);
+ }
/*
* We need to sample the current value to convert the new
@@ -622,11 +611,10 @@
* times (in arm_timer). With an absolute time, we must
* check if it's already passed. In short, we need a sample.
*/
- if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- cpu_clock_sample(timer->it_clock, p, &val);
- } else {
- cpu_timer_sample_group(timer->it_clock, p, &val);
- }
+ if (CPUCLOCK_PERTHREAD(timer->it_clock))
+ val = cpu_clock_sample(clkid, p);
+ else
+ val = cpu_clock_sample_group(clkid, p, true);
if (old) {
if (old_expires == 0) {
@@ -634,18 +622,16 @@
old->it_value.tv_nsec = 0;
} else {
/*
- * Update the timer in case it has
- * overrun already. If it has,
- * we'll report it as having overrun
- * and with the next reloaded timer
- * already ticking, though we are
- * swallowing that pending
- * notification here to install the
- * new setting.
+ * Update the timer in case it has overrun already.
+ * If it has, we'll report it as having overrun and
+ * with the next reloaded timer already ticking,
+ * though we are swallowing that pending
+ * notification here to install the new setting.
*/
- bump_cpu_timer(timer, val);
- if (val < timer->it.cpu.expires) {
- old_expires = timer->it.cpu.expires - val;
+ u64 exp = bump_cpu_timer(timer, val);
+
+ if (val < exp) {
+ old_expires = exp - val;
old->it_value = ns_to_timespec64(old_expires);
} else {
old->it_value.tv_nsec = 1;
@@ -674,7 +660,7 @@
* For a timer with no notification action, we don't actually
* arm the timer (we'll just fake it for timer_gettime).
*/
- timer->it.cpu.expires = new_expires;
+ cpu_timer_setexpires(ctmr, new_expires);
if (new_expires != 0 && val < new_expires) {
arm_timer(timer);
}
@@ -684,7 +670,7 @@
* Install the new reload setting, and
* set up the signal and overrun bookkeeping.
*/
- timer->it.cpu.incr = timespec64_to_ns(&new->it_interval);
+ timer->it_interval = timespec64_to_ktime(new->it_interval);
/*
* This acts as a modification timestamp for the timer,
@@ -715,24 +701,27 @@
static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec64 *itp)
{
- u64 now;
- struct task_struct *p = timer->it.cpu.task;
+ clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ u64 now, expires = cpu_timer_getexpires(ctmr);
+ struct task_struct *p = ctmr->task;
- WARN_ON_ONCE(p == NULL);
+ if (WARN_ON_ONCE(!p))
+ return;
/*
* Easy part: convert the reload time.
*/
- itp->it_interval = ns_to_timespec64(timer->it.cpu.incr);
+ itp->it_interval = ktime_to_timespec64(timer->it_interval);
- if (!timer->it.cpu.expires)
+ if (!expires)
return;
/*
* Sample the clock to take the difference with the expiry time.
*/
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- cpu_clock_sample(timer->it_clock, p, &now);
+ now = cpu_clock_sample(clkid, p);
} else {
struct sighand_struct *sighand;
unsigned long flags;
@@ -747,18 +736,18 @@
/*
* The process has been reaped.
* We can't even collect a sample any more.
- * Call the timer disarmed, nothing else to do.
+ * Disarm the timer, nothing else to do.
*/
- timer->it.cpu.expires = 0;
+ cpu_timer_setexpires(ctmr, 0);
return;
} else {
- cpu_timer_sample_group(timer->it_clock, p, &now);
+ now = cpu_clock_sample_group(clkid, p, false);
unlock_task_sighand(p, &flags);
}
}
- if (now < timer->it.cpu.expires) {
- itp->it_value = ns_to_timespec64(timer->it.cpu.expires - now);
+ if (now < expires) {
+ itp->it_value = ns_to_timespec64(expires - now);
} else {
/*
* The timer should have expired already, but the firing
@@ -769,26 +758,42 @@
}
}
-static unsigned long long
-check_timers_list(struct list_head *timers,
- struct list_head *firing,
- unsigned long long curr)
+#define MAX_COLLECTED 20
+
+static u64 collect_timerqueue(struct timerqueue_head *head,
+ struct list_head *firing, u64 now)
{
- int maxfire = 20;
+ struct timerqueue_node *next;
+ int i = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *t;
+ while ((next = timerqueue_getnext(head))) {
+ struct cpu_timer *ctmr;
+ u64 expires;
- t = list_first_entry(timers, struct cpu_timer_list, entry);
+ ctmr = container_of(next, struct cpu_timer, node);
+ expires = cpu_timer_getexpires(ctmr);
+ /* Limit the number of timers to expire at once */
+ if (++i == MAX_COLLECTED || now < expires)
+ return expires;
- if (!--maxfire || curr < t->expires)
- return t->expires;
-
- t->firing = 1;
- list_move_tail(&t->entry, firing);
+ ctmr->firing = 1;
+ cpu_timer_dequeue(ctmr);
+ list_add_tail(&ctmr->elist, firing);
}
- return 0;
+ return U64_MAX;
+}
+
+static void collect_posix_cputimers(struct posix_cputimers *pct, u64 *samples,
+ struct list_head *firing)
+{
+ struct posix_cputimer_base *base = pct->bases;
+ int i;
+
+ for (i = 0; i < CPUCLOCK_MAX; i++, base++) {
+ base->nextevt = collect_timerqueue(&base->tqhead, firing,
+ samples[i]);
+ }
}
static inline void check_dl_overrun(struct task_struct *tsk)
@@ -799,6 +804,20 @@
}
}
+static bool check_rlimit(u64 time, u64 limit, int signo, bool rt, bool hard)
+{
+ if (time < limit)
+ return false;
+
+ if (print_fatal_signals) {
+ pr_info("%s Watchdog Timeout (%s): %s[%d]\n",
+ rt ? "RT" : "CPU", hard ? "hard" : "soft",
+ current->comm, task_pid_nr(current));
+ }
+ __group_send_sig_info(signo, SEND_SIG_PRIV, current);
+ return true;
+}
+
/*
* Check for any per-thread CPU timers that have fired and move them off
* the tsk->cpu_timers[N] list onto the firing list. Here we update the
@@ -807,76 +826,50 @@
static void check_thread_timers(struct task_struct *tsk,
struct list_head *firing)
{
- struct list_head *timers = tsk->cpu_timers;
- struct task_cputime *tsk_expires = &tsk->cputime_expires;
- u64 expires;
+ struct posix_cputimers *pct = &tsk->posix_cputimers;
+ u64 samples[CPUCLOCK_MAX];
unsigned long soft;
if (dl_task(tsk))
check_dl_overrun(tsk);
- /*
- * If cputime_expires is zero, then there are no active
- * per thread CPU timers.
- */
- if (task_cputime_zero(&tsk->cputime_expires))
+ if (expiry_cache_is_inactive(pct))
return;
- expires = check_timers_list(timers, firing, prof_ticks(tsk));
- tsk_expires->prof_exp = expires;
-
- expires = check_timers_list(++timers, firing, virt_ticks(tsk));
- tsk_expires->virt_exp = expires;
-
- tsk_expires->sched_exp = check_timers_list(++timers, firing,
- tsk->se.sum_exec_runtime);
+ task_sample_cputime(tsk, samples);
+ collect_posix_cputimers(pct, samples, firing);
/*
* Check for the special case thread timers.
*/
soft = task_rlimit(tsk, RLIMIT_RTTIME);
if (soft != RLIM_INFINITY) {
+ /* Task RT timeout is accounted in jiffies. RTTIME is usec */
+ unsigned long rttime = tsk->rt.timeout * (USEC_PER_SEC / HZ);
unsigned long hard = task_rlimit_max(tsk, RLIMIT_RTTIME);
+ /* At the hard limit, send SIGKILL. No further action. */
if (hard != RLIM_INFINITY &&
- tsk->rt.timeout > DIV_ROUND_UP(hard, USEC_PER_SEC/HZ)) {
- /*
- * At the hard limit, we just die.
- * No need to calculate anything else now.
- */
- if (print_fatal_signals) {
- pr_info("CPU Watchdog Timeout (hard): %s[%d]\n",
- tsk->comm, task_pid_nr(tsk));
- }
- __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+ check_rlimit(rttime, hard, SIGKILL, true, true))
return;
- }
- if (tsk->rt.timeout > DIV_ROUND_UP(soft, USEC_PER_SEC/HZ)) {
- /*
- * At the soft limit, send a SIGXCPU every second.
- */
- if (soft < hard) {
- soft += USEC_PER_SEC;
- tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur =
- soft;
- }
- if (print_fatal_signals) {
- pr_info("RT Watchdog Timeout (soft): %s[%d]\n",
- tsk->comm, task_pid_nr(tsk));
- }
- __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
+
+ /* At the soft limit, send a SIGXCPU every second */
+ if (check_rlimit(rttime, soft, SIGXCPU, true, false)) {
+ soft += USEC_PER_SEC;
+ tsk->signal->rlim[RLIMIT_RTTIME].rlim_cur = soft;
}
}
- if (task_cputime_zero(tsk_expires))
+
+ if (expiry_cache_is_inactive(pct))
tick_dep_clear_task(tsk, TICK_DEP_BIT_POSIX_TIMER);
}
static inline void stop_process_timers(struct signal_struct *sig)
{
- struct thread_group_cputimer *cputimer = &sig->cputimer;
+ struct posix_cputimers *pct = &sig->posix_cputimers;
- /* Turn off cputimer->running. This is done without locking. */
- WRITE_ONCE(cputimer->running, false);
+ /* Turn off the active flag. This is done without locking. */
+ WRITE_ONCE(pct->timers_active, false);
tick_dep_clear_signal(sig, TICK_DEP_BIT_POSIX_TIMER);
}
@@ -898,7 +891,7 @@
__group_send_sig_info(signo, SEND_SIG_PRIV, tsk);
}
- if (it->expires && (!*expires || it->expires < *expires))
+ if (it->expires && it->expires < *expires)
*expires = it->expires;
}
@@ -911,90 +904,69 @@
struct list_head *firing)
{
struct signal_struct *const sig = tsk->signal;
- u64 utime, ptime, virt_expires, prof_expires;
- u64 sum_sched_runtime, sched_expires;
- struct list_head *timers = sig->cpu_timers;
- struct task_cputime cputime;
+ struct posix_cputimers *pct = &sig->posix_cputimers;
+ u64 samples[CPUCLOCK_MAX];
unsigned long soft;
- if (dl_task(tsk))
- check_dl_overrun(tsk);
-
/*
- * If cputimer is not running, then there are no active
- * process wide timers (POSIX 1.b, itimers, RLIMIT_CPU).
+ * If there are no active process wide timers (POSIX 1.b, itimers,
+ * RLIMIT_CPU) nothing to check. Also skip the process wide timer
+ * processing when there is already another task handling them.
*/
- if (!READ_ONCE(tsk->signal->cputimer.running))
+ if (!READ_ONCE(pct->timers_active) || pct->expiry_active)
return;
- /*
+ /*
* Signify that a thread is checking for process timers.
* Write access to this field is protected by the sighand lock.
*/
- sig->cputimer.checking_timer = true;
+ pct->expiry_active = true;
/*
- * Collect the current process totals.
+ * Collect the current process totals. Group accounting is active
+ * so the sample can be taken directly.
*/
- thread_group_cputimer(tsk, &cputime);
- utime = cputime.utime;
- ptime = utime + cputime.stime;
- sum_sched_runtime = cputime.sum_exec_runtime;
-
- prof_expires = check_timers_list(timers, firing, ptime);
- virt_expires = check_timers_list(++timers, firing, utime);
- sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
+ proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic, samples);
+ collect_posix_cputimers(pct, samples, firing);
/*
* Check for the special case process timers.
*/
- check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF], &prof_expires, ptime,
- SIGPROF);
- check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT], &virt_expires, utime,
- SIGVTALRM);
+ check_cpu_itimer(tsk, &sig->it[CPUCLOCK_PROF],
+ &pct->bases[CPUCLOCK_PROF].nextevt,
+ samples[CPUCLOCK_PROF], SIGPROF);
+ check_cpu_itimer(tsk, &sig->it[CPUCLOCK_VIRT],
+ &pct->bases[CPUCLOCK_VIRT].nextevt,
+ samples[CPUCLOCK_VIRT], SIGVTALRM);
+
soft = task_rlimit(tsk, RLIMIT_CPU);
if (soft != RLIM_INFINITY) {
- unsigned long psecs = div_u64(ptime, NSEC_PER_SEC);
+ /* RLIMIT_CPU is in seconds. Samples are nanoseconds */
unsigned long hard = task_rlimit_max(tsk, RLIMIT_CPU);
- u64 x;
- if (psecs >= hard) {
- /*
- * At the hard limit, we just die.
- * No need to calculate anything else now.
- */
- if (print_fatal_signals) {
- pr_info("RT Watchdog Timeout (hard): %s[%d]\n",
- tsk->comm, task_pid_nr(tsk));
- }
- __group_send_sig_info(SIGKILL, SEND_SIG_PRIV, tsk);
+ u64 ptime = samples[CPUCLOCK_PROF];
+ u64 softns = (u64)soft * NSEC_PER_SEC;
+ u64 hardns = (u64)hard * NSEC_PER_SEC;
+
+ /* At the hard limit, send SIGKILL. No further action. */
+ if (hard != RLIM_INFINITY &&
+ check_rlimit(ptime, hardns, SIGKILL, false, true))
return;
+
+ /* At the soft limit, send a SIGXCPU every second */
+ if (check_rlimit(ptime, softns, SIGXCPU, false, false)) {
+ sig->rlim[RLIMIT_CPU].rlim_cur = soft + 1;
+ softns += NSEC_PER_SEC;
}
- if (psecs >= soft) {
- /*
- * At the soft limit, send a SIGXCPU every second.
- */
- if (print_fatal_signals) {
- pr_info("CPU Watchdog Timeout (soft): %s[%d]\n",
- tsk->comm, task_pid_nr(tsk));
- }
- __group_send_sig_info(SIGXCPU, SEND_SIG_PRIV, tsk);
- if (soft < hard) {
- soft++;
- sig->rlim[RLIMIT_CPU].rlim_cur = soft;
- }
- }
- x = soft * NSEC_PER_SEC;
- if (!prof_expires || x < prof_expires)
- prof_expires = x;
+
+ /* Update the expiry cache */
+ if (softns < pct->bases[CPUCLOCK_PROF].nextevt)
+ pct->bases[CPUCLOCK_PROF].nextevt = softns;
}
- sig->cputime_expires.prof_exp = prof_expires;
- sig->cputime_expires.virt_exp = virt_expires;
- sig->cputime_expires.sched_exp = sched_expires;
- if (task_cputime_zero(&sig->cputime_expires))
+ if (expiry_cache_is_inactive(pct))
stop_process_timers(sig);
- sig->cputimer.checking_timer = false;
+ pct->expiry_active = false;
}
/*
@@ -1003,18 +975,21 @@
*/
static void posix_cpu_timer_rearm(struct k_itimer *timer)
{
+ clockid_t clkid = CPUCLOCK_WHICH(timer->it_clock);
+ struct cpu_timer *ctmr = &timer->it.cpu;
+ struct task_struct *p = ctmr->task;
struct sighand_struct *sighand;
unsigned long flags;
- struct task_struct *p = timer->it.cpu.task;
u64 now;
- WARN_ON_ONCE(p == NULL);
+ if (WARN_ON_ONCE(!p))
+ return;
/*
* Fetch the current sample and update the timer's expiry time.
*/
if (CPUCLOCK_PERTHREAD(timer->it_clock)) {
- cpu_clock_sample(timer->it_clock, p, &now);
+ now = cpu_clock_sample(clkid, p);
bump_cpu_timer(timer, now);
if (unlikely(p->exit_state))
return;
@@ -1034,13 +1009,13 @@
* The process has been reaped.
* We can't even collect a sample any more.
*/
- timer->it.cpu.expires = 0;
+ cpu_timer_setexpires(ctmr, 0);
return;
} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
/* If the process is dying, no need to rearm */
goto unlock;
}
- cpu_timer_sample_group(timer->it_clock, p, &now);
+ now = cpu_clock_sample_group(clkid, p, true);
bump_cpu_timer(timer, now);
/* Leave the sighand locked for the call below. */
}
@@ -1054,26 +1029,24 @@
}
/**
- * task_cputime_expired - Compare two task_cputime entities.
+ * task_cputimers_expired - Check whether posix CPU timers are expired
*
- * @sample: The task_cputime structure to be checked for expiration.
- * @expires: Expiration times, against which @sample will be checked.
+ * @samples: Array of current samples for the CPUCLOCK clocks
+ * @pct: Pointer to a posix_cputimers container
*
- * Checks @sample against @expires to see if any field of @sample has expired.
- * Returns true if any field of the former is greater than the corresponding
- * field of the latter if the latter field is set. Otherwise returns false.
+ * Returns true if any member of @samples is greater than the corresponding
+ * member of @pct->bases[CLK].nextevt. False otherwise
*/
-static inline int task_cputime_expired(const struct task_cputime *sample,
- const struct task_cputime *expires)
+static inline bool
+task_cputimers_expired(const u64 *samples, struct posix_cputimers *pct)
{
- if (expires->utime && sample->utime >= expires->utime)
- return 1;
- if (expires->stime && sample->utime + sample->stime >= expires->stime)
- return 1;
- if (expires->sum_exec_runtime != 0 &&
- sample->sum_exec_runtime >= expires->sum_exec_runtime)
- return 1;
- return 0;
+ int i;
+
+ for (i = 0; i < CPUCLOCK_MAX; i++) {
+ if (samples[i] >= pct->bases[i].nextevt)
+ return true;
+ }
+ return false;
}
/**
@@ -1086,48 +1059,50 @@
* timers and compare them with the corresponding expiration times. Return
* true if a timer has expired, else return false.
*/
-static inline int fastpath_timer_check(struct task_struct *tsk)
+static inline bool fastpath_timer_check(struct task_struct *tsk)
{
+ struct posix_cputimers *pct = &tsk->posix_cputimers;
struct signal_struct *sig;
- if (!task_cputime_zero(&tsk->cputime_expires)) {
- struct task_cputime task_sample;
+ if (!expiry_cache_is_inactive(pct)) {
+ u64 samples[CPUCLOCK_MAX];
- task_cputime(tsk, &task_sample.utime, &task_sample.stime);
- task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime;
- if (task_cputime_expired(&task_sample, &tsk->cputime_expires))
- return 1;
+ task_sample_cputime(tsk, samples);
+ if (task_cputimers_expired(samples, pct))
+ return true;
}
sig = tsk->signal;
+ pct = &sig->posix_cputimers;
/*
- * Check if thread group timers expired when the cputimer is
- * running and no other thread in the group is already checking
- * for thread group cputimers. These fields are read without the
- * sighand lock. However, this is fine because this is meant to
- * be a fastpath heuristic to determine whether we should try to
- * acquire the sighand lock to check/handle timers.
+ * Check if thread group timers expired when timers are active and
+ * no other thread in the group is already handling expiry for
+ * thread group cputimers. These fields are read without the
+ * sighand lock. However, this is fine because this is meant to be
+ * a fastpath heuristic to determine whether we should try to
+ * acquire the sighand lock to handle timer expiry.
*
- * In the worst case scenario, if 'running' or 'checking_timer' gets
- * set but the current thread doesn't see the change yet, we'll wait
- * until the next thread in the group gets a scheduler interrupt to
- * handle the timer. This isn't an issue in practice because these
- * types of delays with signals actually getting sent are expected.
+ * In the worst case scenario, if concurrently timers_active is set
+ * or expiry_active is cleared, but the current thread doesn't see
+ * the change yet, the timer checks are delayed until the next
+ * thread in the group gets a scheduler interrupt to handle the
+ * timer. This isn't an issue in practice because these types of
+ * delays with signals actually getting sent are expected.
*/
- if (READ_ONCE(sig->cputimer.running) &&
- !READ_ONCE(sig->cputimer.checking_timer)) {
- struct task_cputime group_sample;
+ if (READ_ONCE(pct->timers_active) && !READ_ONCE(pct->expiry_active)) {
+ u64 samples[CPUCLOCK_MAX];
- sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic);
+ proc_sample_cputime_atomic(&sig->cputimer.cputime_atomic,
+ samples);
- if (task_cputime_expired(&group_sample, &sig->cputime_expires))
- return 1;
+ if (task_cputimers_expired(samples, pct))
+ return true;
}
if (dl_task(tsk) && tsk->dl.dl_overrun)
- return 1;
+ return true;
- return 0;
+ return false;
}
/*
@@ -1135,11 +1110,12 @@
* already updated our counts. We need to check if any timers fire now.
* Interrupts are disabled.
*/
-void run_posix_cpu_timers(struct task_struct *tsk)
+void run_posix_cpu_timers(void)
{
- LIST_HEAD(firing);
+ struct task_struct *tsk = current;
struct k_itimer *timer, *next;
unsigned long flags;
+ LIST_HEAD(firing);
lockdep_assert_irqs_disabled();
@@ -1177,11 +1153,11 @@
* each timer's lock before clearing its firing flag, so no
* timer call will interfere.
*/
- list_for_each_entry_safe(timer, next, &firing, it.cpu.entry) {
+ list_for_each_entry_safe(timer, next, &firing, it.cpu.elist) {
int cpu_firing;
spin_lock(&timer->it_lock);
- list_del_init(&timer->it.cpu.entry);
+ list_del_init(&timer->it.cpu.elist);
cpu_firing = timer->it.cpu.firing;
timer->it.cpu.firing = 0;
/*
@@ -1199,16 +1175,18 @@
* Set one of the process-wide special case CPU timers or RLIMIT_CPU.
* The tsk->sighand->siglock must be held by the caller.
*/
-void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
+void set_process_cpu_timer(struct task_struct *tsk, unsigned int clkid,
u64 *newval, u64 *oldval)
{
- u64 now;
- int ret;
+ u64 now, *nextevt;
- WARN_ON_ONCE(clock_idx == CPUCLOCK_SCHED);
- ret = cpu_timer_sample_group(clock_idx, tsk, &now);
+ if (WARN_ON_ONCE(clkid >= CPUCLOCK_SCHED))
+ return;
- if (oldval && ret != -EINVAL) {
+ nextevt = &tsk->signal->posix_cputimers.bases[clkid].nextevt;
+ now = cpu_clock_sample_group(clkid, tsk, true);
+
+ if (oldval) {
/*
* We are setting itimer. The *oldval is absolute and we update
* it to be relative, *newval argument is relative and we update
@@ -1229,19 +1207,11 @@
}
/*
- * Update expiration cache if we are the earliest timer, or eventually
- * RLIMIT_CPU limit is earlier than prof_exp cpu timer expire.
+ * Update expiration cache if this is the earliest timer. CPUCLOCK_PROF
+ * expiry cache is also used by RLIMIT_CPU!.
*/
- switch (clock_idx) {
- case CPUCLOCK_PROF:
- if (expires_gt(tsk->signal->cputime_expires.prof_exp, *newval))
- tsk->signal->cputime_expires.prof_exp = *newval;
- break;
- case CPUCLOCK_VIRT:
- if (expires_gt(tsk->signal->cputime_expires.virt_exp, *newval))
- tsk->signal->cputime_expires.virt_exp = *newval;
- break;
- }
+ if (*newval < *nextevt)
+ *nextevt = *newval;
tick_dep_set_signal(tsk->signal, TICK_DEP_BIT_POSIX_TIMER);
}
@@ -1263,6 +1233,7 @@
timer.it_overrun = -1;
error = posix_cpu_timer_create(&timer);
timer.it_process = current;
+
if (!error) {
static struct itimerspec64 zero_it;
struct restart_block *restart;
@@ -1278,7 +1249,7 @@
}
while (!signal_pending(current)) {
- if (timer.it.cpu.expires == 0) {
+ if (!cpu_timer_getexpires(&timer.it.cpu)) {
/*
* Our timer fired and was reset, below
* deletion can not fail.
@@ -1300,7 +1271,7 @@
/*
* We were interrupted by a signal.
*/
- expires = timer.it.cpu.expires;
+ expires = cpu_timer_getexpires(&timer.it.cpu);
error = posix_cpu_timer_set(&timer, 0, &zero_it, &it);
if (!error) {
/*
diff --git a/kernel/time/posix-stubs.c b/kernel/time/posix-stubs.c
index 2c6847d..67df65f 100644
--- a/kernel/time/posix-stubs.c
+++ b/kernel/time/posix-stubs.c
@@ -1,12 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Dummy stubs used when CONFIG_POSIX_TIMERS=n
*
* Created by: Nicolas Pitre, July 2016
* Copyright: (C) 2016 Linaro Limited
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/linkage.h>
@@ -48,6 +45,7 @@
SYS_NI(clock_adjtime);
SYS_NI(getitimer);
SYS_NI(setitimer);
+SYS_NI(clock_adjtime32);
#ifdef __ARCH_WANT_SYS_ALARM
SYS_NI(alarm);
#endif
@@ -153,29 +151,29 @@
#ifdef CONFIG_COMPAT
COMPAT_SYS_NI(timer_create);
-COMPAT_SYS_NI(clock_adjtime);
-COMPAT_SYS_NI(timer_settime);
-COMPAT_SYS_NI(timer_gettime);
COMPAT_SYS_NI(getitimer);
COMPAT_SYS_NI(setitimer);
#endif
#ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE2(clock_settime, const clockid_t, which_clock,
- struct compat_timespec __user *, tp)
+SYS_NI(timer_settime32);
+SYS_NI(timer_gettime32);
+
+SYSCALL_DEFINE2(clock_settime32, const clockid_t, which_clock,
+ struct old_timespec32 __user *, tp)
{
struct timespec64 new_tp;
if (which_clock != CLOCK_REALTIME)
return -EINVAL;
- if (compat_get_timespec64(&new_tp, tp))
+ if (get_old_timespec32(&new_tp, tp))
return -EFAULT;
return do_sys_settimeofday64(&new_tp, NULL);
}
-COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
- struct compat_timespec __user *, tp)
+SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
+ struct old_timespec32 __user *, tp)
{
int ret;
struct timespec64 kernel_tp;
@@ -184,13 +182,13 @@
if (ret)
return ret;
- if (compat_put_timespec64(&kernel_tp, tp))
+ if (put_old_timespec32(&kernel_tp, tp))
return -EFAULT;
return 0;
}
-COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
- struct compat_timespec __user *, tp)
+SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
+ struct old_timespec32 __user *, tp)
{
struct timespec64 rtn_tp = {
.tv_sec = 0,
@@ -201,7 +199,7 @@
case CLOCK_REALTIME:
case CLOCK_MONOTONIC:
case CLOCK_BOOTTIME:
- if (compat_put_timespec64(&rtn_tp, tp))
+ if (put_old_timespec32(&rtn_tp, tp))
return -EFAULT;
return 0;
default:
@@ -209,9 +207,9 @@
}
}
-COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
- struct compat_timespec __user *, rqtp,
- struct compat_timespec __user *, rmtp)
+SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
+ struct old_timespec32 __user *, rqtp,
+ struct old_timespec32 __user *, rmtp)
{
struct timespec64 t;
@@ -224,7 +222,7 @@
return -EINVAL;
}
- if (compat_get_timespec64(&t, rqtp))
+ if (get_old_timespec32(&t, rqtp))
return -EFAULT;
if (!timespec64_valid(&t))
return -EINVAL;
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 5a01c4f..0ec5b7a 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1,34 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * linux/kernel/posix-timers.c
- *
- *
* 2002-10-15 Posix Clocks & timers
* by George Anzinger george@mvista.com
- *
* Copyright (C) 2002 2003 by MontaVista Software.
*
* 2004-06-01 Fix CLOCK_REALTIME clock/timer TIMER_ABSTIME bug.
* Copyright (C) 2004 Boris Hu
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * MontaVista Software | 1237 East Arques Avenue | Sunnyvale | CA 94085 | USA
- */
-
-/* These are all the functions necessary to implement
- * POSIX clocks & timers
+ * These are all the functions necessary to implement POSIX clocks & timers
*/
#include <linux/mm.h>
#include <linux/interrupt.h>
@@ -200,7 +179,7 @@
}
static int posix_clock_realtime_adj(const clockid_t which_clock,
- struct timex *t)
+ struct __kernel_timex *t)
{
return do_adjtimex(t);
}
@@ -305,7 +284,7 @@
* To protect against the timer going away while the interrupt is queued,
* we require that the it_requeue_pending flag be set.
*/
-void posixtimer_rearm(struct siginfo *info)
+void posixtimer_rearm(struct kernel_siginfo *info)
{
struct k_itimer *timr;
unsigned long flags;
@@ -463,7 +442,7 @@
static void k_itimer_rcu_free(struct rcu_head *head)
{
- struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu);
+ struct k_itimer *tmr = container_of(head, struct k_itimer, rcu);
kmem_cache_free(posix_timers_cache, tmr);
}
@@ -480,7 +459,7 @@
}
put_pid(tmr->it_pid);
sigqueue_free(tmr->sigq);
- call_rcu(&tmr->it.rcu, k_itimer_rcu_free);
+ call_rcu(&tmr->rcu, k_itimer_rcu_free);
}
static int common_timer_create(struct k_itimer *new_timer)
@@ -751,14 +730,14 @@
#ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE2(timer_gettime, timer_t, timer_id,
- struct compat_itimerspec __user *, setting)
+SYSCALL_DEFINE2(timer_gettime32, timer_t, timer_id,
+ struct old_itimerspec32 __user *, setting)
{
struct itimerspec64 cur_setting;
int ret = do_timer_gettime(timer_id, &cur_setting);
if (!ret) {
- if (put_compat_itimerspec64(&cur_setting, setting))
+ if (put_old_itimerspec32(&cur_setting, setting))
ret = -EFAULT;
}
return ret;
@@ -826,6 +805,35 @@
return hrtimer_try_to_cancel(&timr->it.real.timer);
}
+static void common_timer_wait_running(struct k_itimer *timer)
+{
+ hrtimer_cancel_wait_running(&timer->it.real.timer);
+}
+
+/*
+ * On PREEMPT_RT this prevent priority inversion against softirq kthread in
+ * case it gets preempted while executing a timer callback. See comments in
+ * hrtimer_cancel_wait_running. For PREEMPT_RT=n this just results in a
+ * cpu_relax().
+ */
+static struct k_itimer *timer_wait_running(struct k_itimer *timer,
+ unsigned long *flags)
+{
+ const struct k_clock *kc = READ_ONCE(timer->kclock);
+ timer_t timer_id = READ_ONCE(timer->it_id);
+
+ /* Prevent kfree(timer) after dropping the lock */
+ rcu_read_lock();
+ unlock_timer(timer, *flags);
+
+ if (!WARN_ON_ONCE(!kc->timer_wait_running))
+ kc->timer_wait_running(timer);
+
+ rcu_read_unlock();
+ /* Relock the timer. It might be not longer hashed. */
+ return lock_timer(timer_id, flags);
+}
+
/* Set a POSIX.1b interval timer. */
int common_timer_set(struct k_itimer *timr, int flags,
struct itimerspec64 *new_setting,
@@ -865,13 +873,13 @@
return 0;
}
-static int do_timer_settime(timer_t timer_id, int flags,
+static int do_timer_settime(timer_t timer_id, int tmr_flags,
struct itimerspec64 *new_spec64,
struct itimerspec64 *old_spec64)
{
const struct k_clock *kc;
struct k_itimer *timr;
- unsigned long flag;
+ unsigned long flags;
int error = 0;
if (!timespec64_valid(&new_spec64->it_interval) ||
@@ -880,8 +888,9 @@
if (old_spec64)
memset(old_spec64, 0, sizeof(*old_spec64));
+
+ timr = lock_timer(timer_id, &flags);
retry:
- timr = lock_timer(timer_id, &flag);
if (!timr)
return -EINVAL;
@@ -889,13 +898,16 @@
if (WARN_ON_ONCE(!kc || !kc->timer_set))
error = -EINVAL;
else
- error = kc->timer_set(timr, flags, new_spec64, old_spec64);
+ error = kc->timer_set(timr, tmr_flags, new_spec64, old_spec64);
- unlock_timer(timr, flag);
if (error == TIMER_RETRY) {
- old_spec64 = NULL; // We already got the old time...
+ // We already got the old time...
+ old_spec64 = NULL;
+ /* Unlocks and relocks the timer if it still exists */
+ timr = timer_wait_running(timr, &flags);
goto retry;
}
+ unlock_timer(timr, flags);
return error;
}
@@ -924,9 +936,9 @@
}
#ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE4(timer_settime, timer_t, timer_id, int, flags,
- struct compat_itimerspec __user *, new,
- struct compat_itimerspec __user *, old)
+SYSCALL_DEFINE4(timer_settime32, timer_t, timer_id, int, flags,
+ struct old_itimerspec32 __user *, new,
+ struct old_itimerspec32 __user *, old)
{
struct itimerspec64 new_spec, old_spec;
struct itimerspec64 *rtn = old ? &old_spec : NULL;
@@ -934,12 +946,12 @@
if (!new)
return -EINVAL;
- if (get_compat_itimerspec64(&new_spec, new))
+ if (get_old_itimerspec32(&new_spec, new))
return -EFAULT;
error = do_timer_settime(timer_id, flags, &new_spec, rtn);
if (!error && old) {
- if (put_compat_itimerspec64(&old_spec, old))
+ if (put_old_itimerspec32(&old_spec, old))
error = -EFAULT;
}
return error;
@@ -972,13 +984,15 @@
struct k_itimer *timer;
unsigned long flags;
-retry_delete:
timer = lock_timer(timer_id, &flags);
+
+retry_delete:
if (!timer)
return -EINVAL;
- if (timer_delete_hook(timer) == TIMER_RETRY) {
- unlock_timer(timer, flags);
+ if (unlikely(timer_delete_hook(timer) == TIMER_RETRY)) {
+ /* Unlocks and relocks the timer if it still exists */
+ timer = timer_wait_running(timer, &flags);
goto retry_delete;
}
@@ -1001,23 +1015,16 @@
*/
static void itimer_delete(struct k_itimer *timer)
{
- unsigned long flags;
-
retry_delete:
- spin_lock_irqsave(&timer->it_lock, flags);
+ spin_lock_irq(&timer->it_lock);
if (timer_delete_hook(timer) == TIMER_RETRY) {
- unlock_timer(timer, flags);
+ spin_unlock_irq(&timer->it_lock);
goto retry_delete;
}
list_del(&timer->list);
- /*
- * This keeps any tasks waiting on the spin lock from thinking
- * they got something (see the lock code above).
- */
- timer->it_signal = NULL;
- unlock_timer(timer, flags);
+ spin_unlock_irq(&timer->it_lock);
release_posix_timer(timer, IT_ID_SET);
}
@@ -1068,22 +1075,28 @@
return error;
}
-SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
- struct timex __user *, utx)
+int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex * ktx)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
- struct timex ktx;
- int err;
if (!kc)
return -EINVAL;
if (!kc->clock_adj)
return -EOPNOTSUPP;
+ return kc->clock_adj(which_clock, ktx);
+}
+
+SYSCALL_DEFINE2(clock_adjtime, const clockid_t, which_clock,
+ struct __kernel_timex __user *, utx)
+{
+ struct __kernel_timex ktx;
+ int err;
+
if (copy_from_user(&ktx, utx, sizeof(ktx)))
return -EFAULT;
- err = kc->clock_adj(which_clock, &ktx);
+ err = do_clock_adjtime(which_clock, &ktx);
if (err >= 0 && copy_to_user(utx, &ktx, sizeof(ktx)))
return -EFAULT;
@@ -1111,8 +1124,8 @@
#ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE2(clock_settime, clockid_t, which_clock,
- struct compat_timespec __user *, tp)
+SYSCALL_DEFINE2(clock_settime32, clockid_t, which_clock,
+ struct old_timespec32 __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 ts;
@@ -1120,14 +1133,14 @@
if (!kc || !kc->clock_set)
return -EINVAL;
- if (compat_get_timespec64(&ts, tp))
+ if (get_old_timespec32(&ts, tp))
return -EFAULT;
return kc->clock_set(which_clock, &ts);
}
-COMPAT_SYSCALL_DEFINE2(clock_gettime, clockid_t, which_clock,
- struct compat_timespec __user *, tp)
+SYSCALL_DEFINE2(clock_gettime32, clockid_t, which_clock,
+ struct old_timespec32 __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 ts;
@@ -1138,46 +1151,32 @@
err = kc->clock_get(which_clock, &ts);
- if (!err && compat_put_timespec64(&ts, tp))
+ if (!err && put_old_timespec32(&ts, tp))
err = -EFAULT;
return err;
}
-#endif
-
-#ifdef CONFIG_COMPAT
-
-COMPAT_SYSCALL_DEFINE2(clock_adjtime, clockid_t, which_clock,
- struct compat_timex __user *, utp)
+SYSCALL_DEFINE2(clock_adjtime32, clockid_t, which_clock,
+ struct old_timex32 __user *, utp)
{
- const struct k_clock *kc = clockid_to_kclock(which_clock);
- struct timex ktx;
+ struct __kernel_timex ktx;
int err;
- if (!kc)
- return -EINVAL;
- if (!kc->clock_adj)
- return -EOPNOTSUPP;
-
- err = compat_get_timex(&ktx, utp);
+ err = get_old_timex32(&ktx, utp);
if (err)
return err;
- err = kc->clock_adj(which_clock, &ktx);
+ err = do_clock_adjtime(which_clock, &ktx);
if (err >= 0)
- err = compat_put_timex(utp, &ktx);
+ err = put_old_timex32(utp, &ktx);
return err;
}
-#endif
-
-#ifdef CONFIG_COMPAT_32BIT_TIME
-
-COMPAT_SYSCALL_DEFINE2(clock_getres, clockid_t, which_clock,
- struct compat_timespec __user *, tp)
+SYSCALL_DEFINE2(clock_getres_time32, clockid_t, which_clock,
+ struct old_timespec32 __user *, tp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 ts;
@@ -1187,7 +1186,7 @@
return -EINVAL;
err = kc->clock_getres(which_clock, &ts);
- if (!err && tp && compat_put_timespec64(&ts, tp))
+ if (!err && tp && put_old_timespec32(&ts, tp))
return -EFAULT;
return err;
@@ -1233,9 +1232,9 @@
#ifdef CONFIG_COMPAT_32BIT_TIME
-COMPAT_SYSCALL_DEFINE4(clock_nanosleep, clockid_t, which_clock, int, flags,
- struct compat_timespec __user *, rqtp,
- struct compat_timespec __user *, rmtp)
+SYSCALL_DEFINE4(clock_nanosleep_time32, clockid_t, which_clock, int, flags,
+ struct old_timespec32 __user *, rqtp,
+ struct old_timespec32 __user *, rmtp)
{
const struct k_clock *kc = clockid_to_kclock(which_clock);
struct timespec64 t;
@@ -1245,7 +1244,7 @@
if (!kc->nsleep)
return -EOPNOTSUPP;
- if (compat_get_timespec64(&t, rqtp))
+ if (get_old_timespec32(&t, rqtp))
return -EFAULT;
if (!timespec64_valid(&t))
@@ -1274,6 +1273,7 @@
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
+ .timer_wait_running = common_timer_wait_running,
.timer_arm = common_hrtimer_arm,
};
@@ -1289,6 +1289,7 @@
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
+ .timer_wait_running = common_timer_wait_running,
.timer_arm = common_hrtimer_arm,
};
@@ -1319,6 +1320,7 @@
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
+ .timer_wait_running = common_timer_wait_running,
.timer_arm = common_hrtimer_arm,
};
@@ -1334,6 +1336,7 @@
.timer_forward = common_hrtimer_forward,
.timer_remaining = common_hrtimer_remaining,
.timer_try_to_cancel = common_hrtimer_try_to_cancel,
+ .timer_wait_running = common_timer_wait_running,
.timer_arm = common_hrtimer_arm,
};
diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h
index ddb2114..897c29e 100644
--- a/kernel/time/posix-timers.h
+++ b/kernel/time/posix-timers.h
@@ -8,7 +8,7 @@
const struct timespec64 *tp);
int (*clock_get)(const clockid_t which_clock,
struct timespec64 *tp);
- int (*clock_adj)(const clockid_t which_clock, struct timex *tx);
+ int (*clock_adj)(const clockid_t which_clock, struct __kernel_timex *tx);
int (*timer_create)(struct k_itimer *timer);
int (*nsleep)(const clockid_t which_clock, int flags,
const struct timespec64 *);
@@ -24,6 +24,7 @@
int (*timer_try_to_cancel)(struct k_itimer *timr);
void (*timer_arm)(struct k_itimer *timr, ktime_t expires,
bool absolute, bool sigev_none);
+ void (*timer_wait_running)(struct k_itimer *timr);
};
extern const struct k_clock clock_posix_cpu;
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index cbc72c2..dbd6905 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -1,10 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * sched_clock.c: Generic sched_clock() support, to extend low level
- * hardware time counters to full 64-bit ns values.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Generic sched_clock() support, to extend low level hardware time
+ * counters to full 64-bit ns values.
*/
#include <linux/clocksource.h>
#include <linux/init.h>
@@ -20,6 +17,8 @@
#include <linux/seqlock.h>
#include <linux/bitops.h>
+#include "timekeeping.h"
+
/**
* struct clock_read_data - data required to read from sched_clock()
*
@@ -97,7 +96,7 @@
unsigned long long notrace sched_clock(void)
{
u64 cyc, res;
- unsigned long seq;
+ unsigned int seq;
struct clock_read_data *rd;
do {
@@ -234,7 +233,7 @@
if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))
enable_sched_clock_irqtime();
- pr_debug("Registered %pF as sched_clock source\n", read);
+ pr_debug("Registered %pS as sched_clock source\n", read);
}
void __init generic_sched_clock_init(void)
@@ -270,12 +269,12 @@
*/
static u64 notrace suspended_sched_clock_read(void)
{
- unsigned long seq = raw_read_seqcount(&cd.seq);
+ unsigned int seq = raw_read_seqcount(&cd.seq);
return cd.read_data[seq & 1].epoch_cyc;
}
-static int sched_clock_suspend(void)
+int sched_clock_suspend(void)
{
struct clock_read_data *rd = &cd.read_data[0];
@@ -286,7 +285,7 @@
return 0;
}
-static void sched_clock_resume(void)
+void sched_clock_resume(void)
{
struct clock_read_data *rd = &cd.read_data[0];
diff --git a/kernel/time/test_udelay.c b/kernel/time/test_udelay.c
index b0928ab..77c6300 100644
--- a/kernel/time/test_udelay.c
+++ b/kernel/time/test_udelay.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* udelay() test kernel module
*
@@ -7,15 +8,6 @@
* Specifying usecs of 0 or negative values will run multiples tests.
*
* Copyright (C) 2014 Google, Inc.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
#include <linux/debugfs.h>
diff --git a/kernel/time/tick-broadcast-hrtimer.c b/kernel/time/tick-broadcast-hrtimer.c
index a59641f..b5a65e2 100644
--- a/kernel/time/tick-broadcast-hrtimer.c
+++ b/kernel/time/tick-broadcast-hrtimer.c
@@ -1,8 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time/tick-broadcast-hrtimer.c
- * This file emulates a local clock event device
- * via a pseudo clock device.
+ * Emulate a local clock event device via a pseudo clock device.
*/
#include <linux/cpu.h>
#include <linux/err.h>
@@ -44,34 +42,39 @@
*/
static int bc_set_next(ktime_t expires, struct clock_event_device *bc)
{
- int bc_moved;
/*
- * We try to cancel the timer first. If the callback is on
- * flight on some other cpu then we let it handle it. If we
- * were able to cancel the timer nothing can rearm it as we
- * own broadcast_lock.
+ * This is called either from enter/exit idle code or from the
+ * broadcast handler. In all cases tick_broadcast_lock is held.
*
- * However we can also be called from the event handler of
- * ce_broadcast_hrtimer itself when it expires. We cannot
- * restart the timer because we are in the callback, but we
- * can set the expiry time and let the callback return
- * HRTIMER_RESTART.
+ * hrtimer_cancel() cannot be called here neither from the
+ * broadcast handler nor from the enter/exit idle code. The idle
+ * code can run into the problem described in bc_shutdown() and the
+ * broadcast handler cannot wait for itself to complete for obvious
+ * reasons.
*
- * Since we are in the idle loop at this point and because
- * hrtimer_{start/cancel} functions call into tracing,
- * calls to these functions must be bound within RCU_NONIDLE.
+ * Each caller tries to arm the hrtimer on its own CPU, but if the
+ * hrtimer callbback function is currently running, then
+ * hrtimer_start() cannot move it and the timer stays on the CPU on
+ * which it is assigned at the moment.
+ *
+ * As this can be called from idle code, the hrtimer_start()
+ * invocation has to be wrapped with RCU_NONIDLE() as
+ * hrtimer_start() can call into tracing.
*/
- RCU_NONIDLE({
- bc_moved = hrtimer_try_to_cancel(&bctimer) >= 0;
- if (bc_moved)
- hrtimer_start(&bctimer, expires,
- HRTIMER_MODE_ABS_PINNED);});
- if (bc_moved) {
- /* Bind the "device" to the cpu */
- bc->bound_on = smp_processor_id();
- } else if (bc->bound_on == smp_processor_id()) {
- hrtimer_set_expires(&bctimer, expires);
- }
+ RCU_NONIDLE( {
+ hrtimer_start(&bctimer, expires, HRTIMER_MODE_ABS_PINNED_HARD);
+ /*
+ * The core tick broadcast mode expects bc->bound_on to be set
+ * correctly to prevent a CPU which has the broadcast hrtimer
+ * armed from going deep idle.
+ *
+ * As tick_broadcast_lock is held, nothing can change the cpu
+ * base which was just established in hrtimer_start() above. So
+ * the below access is safe even without holding the hrtimer
+ * base lock.
+ */
+ bc->bound_on = bctimer.base->cpu_base->cpu;
+ } );
return 0;
}
@@ -97,16 +100,12 @@
{
ce_broadcast_hrtimer.event_handler(&ce_broadcast_hrtimer);
- if (clockevent_state_oneshot(&ce_broadcast_hrtimer))
- if (ce_broadcast_hrtimer.next_event != KTIME_MAX)
- return HRTIMER_RESTART;
-
return HRTIMER_NORESTART;
}
void tick_setup_hrtimer_broadcast(void)
{
- hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&bctimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
bctimer.function = bc_handler;
clockevents_register_device(&ce_broadcast_hrtimer);
}
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index aa2094d..e51778c 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time/tick-broadcast.c
- *
* This file contains functions which emulate a local clock-event
* device via a broadcast event source.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
- *
- * This code is licenced under the GPL version 2. For details see
- * kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
@@ -40,10 +36,16 @@
static void tick_broadcast_setup_oneshot(struct clock_event_device *bc);
static void tick_broadcast_clear_oneshot(int cpu);
static void tick_resume_broadcast_oneshot(struct clock_event_device *bc);
+# ifdef CONFIG_HOTPLUG_CPU
+static void tick_broadcast_oneshot_offline(unsigned int cpu);
+# endif
#else
static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); }
static inline void tick_broadcast_clear_oneshot(int cpu) { }
static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { }
+# ifdef CONFIG_HOTPLUG_CPU
+static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { }
+# endif
#endif
/*
@@ -379,6 +381,7 @@
switch (mode) {
case TICK_BROADCAST_FORCE:
tick_broadcast_forced = 1;
+ /* fall through */
case TICK_BROADCAST_ON:
cpumask_set_cpu(cpu, tick_broadcast_on);
if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
@@ -400,8 +403,6 @@
if (tick_broadcast_forced)
break;
cpumask_clear_cpu(cpu, tick_broadcast_on);
- if (!tick_device_is_functional(dev))
- break;
if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
if (tick_broadcast_device.mode ==
TICKDEV_MODE_PERIODIC)
@@ -438,27 +439,29 @@
}
#ifdef CONFIG_HOTPLUG_CPU
-/*
- * Remove a CPU from broadcasting
- */
-void tick_shutdown_broadcast(unsigned int cpu)
+static void tick_shutdown_broadcast(void)
{
- struct clock_event_device *bc;
- unsigned long flags;
-
- raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
- bc = tick_broadcast_device.evtdev;
- cpumask_clear_cpu(cpu, tick_broadcast_mask);
- cpumask_clear_cpu(cpu, tick_broadcast_on);
+ struct clock_event_device *bc = tick_broadcast_device.evtdev;
if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
if (bc && cpumask_empty(tick_broadcast_mask))
clockevents_shutdown(bc);
}
-
- raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
+
+/*
+ * Remove a CPU from broadcasting
+ */
+void tick_broadcast_offline(unsigned int cpu)
+{
+ raw_spin_lock(&tick_broadcast_lock);
+ cpumask_clear_cpu(cpu, tick_broadcast_mask);
+ cpumask_clear_cpu(cpu, tick_broadcast_on);
+ tick_broadcast_oneshot_offline(cpu);
+ tick_shutdown_broadcast();
+ raw_spin_unlock(&tick_broadcast_lock);
+}
+
#endif
void tick_suspend_broadcast(void)
@@ -806,13 +809,13 @@
* either the CPU handling the broadcast
* interrupt or we got woken by something else.
*
- * We are not longer in the broadcast mask, so
+ * We are no longer in the broadcast mask, so
* if the cpu local expiry time is already
* reached, we would reprogram the cpu local
* timer with an already expired event.
*
* This can lead to a ping-pong when we return
- * to idle and therefor rearm the broadcast
+ * to idle and therefore rearm the broadcast
* timer before the cpu local timer was able
* to fire. This happens because the forced
* reprogramming makes sure that the event
@@ -955,14 +958,10 @@
}
/*
- * Remove a dead CPU from broadcasting
+ * Remove a dying CPU from broadcasting
*/
-void tick_shutdown_broadcast_oneshot(unsigned int cpu)
+static void tick_broadcast_oneshot_offline(unsigned int cpu)
{
- unsigned long flags;
-
- raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
-
/*
* Clear the broadcast masks for the dead cpu, but do not stop
* the broadcast device!
@@ -970,8 +969,6 @@
cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
-
- raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
#endif
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 14de372..59225b4 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time/tick-common.c
- *
* This file contains the base functions to manage periodic tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
- *
- * This code is licenced under the GPL version 2. For details see
- * kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
@@ -50,6 +46,14 @@
* procedure also covers cpu hotplug.
*/
int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT;
+#ifdef CONFIG_NO_HZ_FULL
+/*
+ * tick_do_timer_boot_cpu indicates the boot CPU temporarily owns
+ * tick_do_timer_cpu and it should be taken over by an eligible secondary
+ * when one comes online.
+ */
+static int tick_do_timer_boot_cpu __read_mostly = -1;
+#endif
/*
* Debugging: see timer_list.c
@@ -153,7 +157,7 @@
!tick_broadcast_oneshot_active()) {
clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC);
} else {
- unsigned long seq;
+ unsigned int seq;
ktime_t next;
do {
@@ -171,6 +175,26 @@
}
}
+#ifdef CONFIG_NO_HZ_FULL
+static void giveup_do_timer(void *info)
+{
+ int cpu = *(unsigned int *)info;
+
+ WARN_ON(tick_do_timer_cpu != smp_processor_id());
+
+ tick_do_timer_cpu = cpu;
+}
+
+static void tick_take_do_timer_from_boot(void)
+{
+ int cpu = smp_processor_id();
+ int from = tick_do_timer_boot_cpu;
+
+ if (from >= 0 && from != cpu)
+ smp_call_function_single(from, giveup_do_timer, &cpu, 1);
+}
+#endif
+
/*
* Setup the tick device
*/
@@ -190,12 +214,26 @@
* this cpu:
*/
if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) {
- if (!tick_nohz_full_cpu(cpu))
- tick_do_timer_cpu = cpu;
- else
- tick_do_timer_cpu = TICK_DO_TIMER_NONE;
+ tick_do_timer_cpu = cpu;
+
tick_next_period = ktime_get();
tick_period = NSEC_PER_SEC / HZ;
+#ifdef CONFIG_NO_HZ_FULL
+ /*
+ * The boot CPU may be nohz_full, in which case set
+ * tick_do_timer_boot_cpu so the first housekeeping
+ * secondary that comes up will take do_timer from
+ * us.
+ */
+ if (tick_nohz_full_cpu(cpu))
+ tick_do_timer_boot_cpu = cpu;
+
+ } else if (tick_do_timer_boot_cpu != -1 &&
+ !tick_nohz_full_cpu(cpu)) {
+ tick_take_do_timer_from_boot();
+ tick_do_timer_boot_cpu = -1;
+ WARN_ON(tick_do_timer_cpu != cpu);
+#endif
}
/*
@@ -491,6 +529,7 @@
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), true);
system_state = SYSTEM_SUSPEND;
+ sched_clock_suspend();
timekeeping_suspend();
} else {
tick_suspend_local();
@@ -514,6 +553,7 @@
if (tick_freeze_depth == num_online_cpus()) {
timekeeping_resume();
+ sched_clock_resume();
system_state = SYSTEM_RUNNING;
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index e277284..7b24961 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -64,7 +64,6 @@
extern int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu);
extern void tick_install_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
-extern void tick_shutdown_broadcast(unsigned int cpu);
extern void tick_suspend_broadcast(void);
extern void tick_resume_broadcast(void);
extern bool tick_resume_check_broadcast(void);
@@ -78,7 +77,6 @@
static inline int tick_is_broadcast_device(struct clock_event_device *dev) { return 0; }
static inline int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) { return 0; }
static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
-static inline void tick_shutdown_broadcast(unsigned int cpu) { }
static inline void tick_suspend_broadcast(void) { }
static inline void tick_resume_broadcast(void) { }
static inline bool tick_resume_check_broadcast(void) { return false; }
@@ -128,19 +126,23 @@
/* Functions related to oneshot broadcasting */
#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_TICK_ONESHOT)
extern void tick_broadcast_switch_to_oneshot(void);
-extern void tick_shutdown_broadcast_oneshot(unsigned int cpu);
extern int tick_broadcast_oneshot_active(void);
extern void tick_check_oneshot_broadcast_this_cpu(void);
bool tick_broadcast_oneshot_available(void);
extern struct cpumask *tick_get_broadcast_oneshot_mask(void);
#else /* !(BROADCAST && ONESHOT): */
static inline void tick_broadcast_switch_to_oneshot(void) { }
-static inline void tick_shutdown_broadcast_oneshot(unsigned int cpu) { }
static inline int tick_broadcast_oneshot_active(void) { return 0; }
static inline void tick_check_oneshot_broadcast_this_cpu(void) { }
static inline bool tick_broadcast_oneshot_available(void) { return tick_oneshot_possible(); }
#endif /* !(BROADCAST && ONESHOT) */
+#if defined(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST) && defined(CONFIG_HOTPLUG_CPU)
+extern void tick_broadcast_offline(unsigned int cpu);
+#else
+static inline void tick_broadcast_offline(unsigned int cpu) { }
+#endif
+
/* NO_HZ_FULL internal */
#ifdef CONFIG_NO_HZ_FULL
extern void tick_nohz_init(void);
diff --git a/kernel/time/tick-oneshot.c b/kernel/time/tick-oneshot.c
index 6fe615d..f9745d4 100644
--- a/kernel/time/tick-oneshot.c
+++ b/kernel/time/tick-oneshot.c
@@ -1,15 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time/tick-oneshot.c
- *
* This file contains functions which manage high resolution tick
* related events.
*
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
- *
- * This code is licenced under the GPL version 2. For details see
- * kernel-base/COPYING.
*/
#include <linux/cpu.h>
#include <linux/err.h>
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5b33e2f..9558517 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time/tick-sched.c
- *
* Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
* Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
* Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner
@@ -8,8 +7,6 @@
* No idle tick implementation for low and high resolution timers
*
* Started by: Thomas Gleixner and Ingo Molnar
- *
- * Distribute under GPLv2.
*/
#include <linux/cpu.h>
#include <linux/err.h>
@@ -124,10 +121,16 @@
* into a long sleep. If two CPUs happen to assign themselves to
* this duty, then the jiffies update is still serialized by
* jiffies_lock.
+ *
+ * If nohz_full is enabled, this should not happen because the
+ * tick_do_timer_cpu never relinquishes.
*/
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)
- && !tick_nohz_full_cpu(cpu))
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_NONE)) {
+#ifdef CONFIG_NO_HZ_FULL
+ WARN_ON(tick_nohz_full_running);
+#endif
tick_do_timer_cpu = cpu;
+ }
#endif
/* Check, if the jiffies need an update */
@@ -398,8 +401,8 @@
static int tick_nohz_cpu_down(unsigned int cpu)
{
/*
- * The boot CPU handles housekeeping duty (unbound timers,
- * workqueues, timekeeping, ...) on behalf of full dynticks
+ * The tick_do_timer_cpu CPU handles housekeeping duty (unbound
+ * timers, workqueues, timekeeping, ...) on behalf of full dynticks
* CPUs. It must remain online when nohz full is enabled.
*/
if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
@@ -426,12 +429,15 @@
return;
}
- cpu = smp_processor_id();
+ if (IS_ENABLED(CONFIG_PM_SLEEP_SMP) &&
+ !IS_ENABLED(CONFIG_PM_SLEEP_SMP_NONZERO_CPU)) {
+ cpu = smp_processor_id();
- if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
- pr_warn("NO_HZ: Clearing %d from nohz_full range for timekeeping\n",
- cpu);
- cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+ if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
+ pr_warn("NO_HZ: Clearing %d from nohz_full range "
+ "for timekeeping\n", cpu);
+ cpumask_clear_cpu(cpu, tick_nohz_full_mask);
+ }
}
for_each_cpu(cpu, tick_nohz_full_mask)
@@ -628,10 +634,12 @@
/* Forward the time to expire in the future */
hrtimer_forward(&ts->sched_timer, now, tick_period);
- if (ts->nohz_mode == NOHZ_MODE_HIGHRES)
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
- else
+ if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
+ hrtimer_start_expires(&ts->sched_timer,
+ HRTIMER_MODE_ABS_PINNED_HARD);
+ } else {
tick_program_event(hrtimer_get_expires(&ts->sched_timer), 1);
+ }
/*
* Reset to make sure next tick stop doesn't get fooled by past
@@ -648,7 +656,8 @@
static ktime_t tick_nohz_next_event(struct tick_sched *ts, int cpu)
{
u64 basemono, next_tick, next_tmr, next_rcu, delta, expires;
- unsigned long seq, basejiff;
+ unsigned long basejiff;
+ unsigned int seq;
/* Read jiffies and the time when jiffies were updated last */
do {
@@ -775,7 +784,6 @@
*/
if (!ts->tick_stopped) {
calc_load_nohz_start();
- cpu_load_update_nohz_start();
quiet_vmstat();
ts->last_tick = hrtimer_get_expires(&ts->sched_timer);
@@ -796,7 +804,8 @@
}
if (ts->nohz_mode == NOHZ_MODE_HIGHRES) {
- hrtimer_start(&ts->sched_timer, tick, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start(&ts->sched_timer, tick,
+ HRTIMER_MODE_ABS_PINNED_HARD);
} else {
hrtimer_set_expires(&ts->sched_timer, tick);
tick_program_event(tick, 1);
@@ -822,7 +831,6 @@
{
/* Update jiffies first */
tick_do_update_jiffies64(now);
- cpu_load_update_nohz_stop();
/*
* Clear the timer idle flag, so we avoid IPIs on remote queueing and
@@ -885,7 +893,7 @@
if (need_resched())
return false;
- if (unlikely(local_softirq_pending() && cpu_online(cpu))) {
+ if (unlikely(local_softirq_pending())) {
static int ratelimit;
if (ratelimit < 10 &&
@@ -907,8 +915,13 @@
/*
* Boot safety: make sure the timekeeping duty has been
* assigned before entering dyntick-idle mode,
+ * tick_do_timer_cpu is TICK_DO_TIMER_BOOT
*/
- if (tick_do_timer_cpu == TICK_DO_TIMER_NONE)
+ if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
+ return false;
+
+ /* Should not happen for nohz-full */
+ if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
return false;
}
@@ -1026,6 +1039,18 @@
}
/**
+ * tick_nohz_get_next_hrtimer - return the next expiration time for the hrtimer
+ * or the tick, whatever that expires first. Note that, if the tick has been
+ * stopped, it returns the next hrtimer.
+ *
+ * Called from power state control code with interrupts disabled
+ */
+ktime_t tick_nohz_get_next_hrtimer(void)
+{
+ return __this_cpu_read(tick_cpu_device.evtdev)->next_event;
+}
+
+/**
* tick_nohz_get_sleep_length - return the expected length of the current sleep
* @delta_next: duration until the next event if the tick cannot be stopped
*
@@ -1208,7 +1233,7 @@
* Recycle the hrtimer in ts, so we can share the
* hrtimer_forward with the highres code.
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
/* Get the next period */
next = tick_init_jiffy_update();
@@ -1305,7 +1330,7 @@
/*
* Emulate tick processing via per-CPU hrtimers:
*/
- hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+ hrtimer_init(&ts->sched_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
ts->sched_timer.function = tick_sched_timer;
/* Get the next period (per-CPU) */
@@ -1320,7 +1345,7 @@
}
hrtimer_forward(&ts->sched_timer, now, tick_period);
- hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED);
+ hrtimer_start_expires(&ts->sched_timer, HRTIMER_MODE_ABS_PINNED_HARD);
tick_nohz_activate(ts, NOHZ_MODE_HIGHRES);
}
#endif /* HIGH_RES_TIMERS */
diff --git a/kernel/time/tick-sched.h b/kernel/time/tick-sched.h
index 6de959a..4fb0652 100644
--- a/kernel/time/tick-sched.h
+++ b/kernel/time/tick-sched.h
@@ -24,12 +24,19 @@
* struct tick_sched - sched tick emulation and no idle tick control/stats
* @sched_timer: hrtimer to schedule the periodic tick in high
* resolution mode
+ * @check_clocks: Notification mechanism about clocksource changes
+ * @nohz_mode: Mode - one state of tick_nohz_mode
+ * @inidle: Indicator that the CPU is in the tick idle mode
+ * @tick_stopped: Indicator that the idle tick has been stopped
+ * @idle_active: Indicator that the CPU is actively in the tick idle mode;
+ * it is resetted during irq handling phases.
+ * @do_timer_lst: CPU was the last one doing do_timer before going idle
+ * @got_idle_tick: Tick timer function has run with @inidle set
* @last_tick: Store the last tick expiry time when the tick
* timer is modified for nohz sleeps. This is necessary
* to resume the tick timer operation in the timeline
* when the CPU returns from nohz sleep.
* @next_tick: Next tick to be fired when in dynticks mode.
- * @tick_stopped: Indicator that the idle tick has been stopped
* @idle_jiffies: jiffies at the entry to idle for idle time accounting
* @idle_calls: Total number of idle calls
* @idle_sleeps: Number of idle calls, where the sched tick was stopped
@@ -40,8 +47,8 @@
* @iowait_sleeptime: Sum of the time slept in idle with sched tick stopped, with IO outstanding
* @timer_expires: Anticipated timer expiration time (in case sched tick is stopped)
* @timer_expires_base: Base time clock monotonic for @timer_expires
- * @do_timer_lst: CPU was the last one doing do_timer before going idle
- * @got_idle_tick: Tick timer function has run with @inidle set
+ * @next_timer: Expiry time of next expiring timer for debugging purpose only
+ * @tick_dep_mask: Tick dependency mask - is set, if someone needs the tick
*/
struct tick_sched {
struct hrtimer sched_timer;
diff --git a/kernel/time/time.c b/kernel/time/time.c
index ccdb351..5c54ca6 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -1,14 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time.c
- *
* Copyright (C) 1991, 1992 Linus Torvalds
*
- * This file contains the interface functions for the various
- * time related system calls: time, stime, gettimeofday, settimeofday,
- * adjtime
- */
-/*
- * Modification history kernel/time.c
+ * This file contains the interface functions for the various time related
+ * system calls: time, stime, gettimeofday, settimeofday, adjtime
+ *
+ * Modification history:
*
* 1993-09-02 Philip Gladstone
* Created file with time related functions from sched/core.c and adjtimex()
@@ -101,15 +98,15 @@
#endif /* __ARCH_WANT_SYS_TIME */
-#ifdef CONFIG_COMPAT
-#ifdef __ARCH_WANT_COMPAT_SYS_TIME
+#ifdef CONFIG_COMPAT_32BIT_TIME
+#ifdef __ARCH_WANT_SYS_TIME32
-/* compat_time_t is a 32 bit "long" and needs to get converted. */
-COMPAT_SYSCALL_DEFINE1(time, compat_time_t __user *, tloc)
+/* old_time32_t is a 32 bit "long" and needs to get converted. */
+SYSCALL_DEFINE1(time32, old_time32_t __user *, tloc)
{
- compat_time_t i;
+ old_time32_t i;
- i = (compat_time_t)ktime_get_real_seconds();
+ i = (old_time32_t)ktime_get_real_seconds();
if (tloc) {
if (put_user(i,tloc))
@@ -119,7 +116,7 @@
return i;
}
-COMPAT_SYSCALL_DEFINE1(stime, compat_time_t __user *, tptr)
+SYSCALL_DEFINE1(stime32, old_time32_t __user *, tptr)
{
struct timespec64 tv;
int err;
@@ -137,16 +134,18 @@
return 0;
}
-#endif /* __ARCH_WANT_COMPAT_SYS_TIME */
+#endif /* __ARCH_WANT_SYS_TIME32 */
#endif
SYSCALL_DEFINE2(gettimeofday, struct timeval __user *, tv,
struct timezone __user *, tz)
{
if (likely(tv != NULL)) {
- struct timeval ktv;
- do_gettimeofday(&ktv);
- if (copy_to_user(tv, &ktv, sizeof(ktv)))
+ struct timespec64 ts;
+
+ ktime_get_real_ts64(&ts);
+ if (put_user(ts.tv_sec, &tv->tv_sec) ||
+ put_user(ts.tv_nsec / 1000, &tv->tv_usec))
return -EFAULT;
}
if (unlikely(tz != NULL)) {
@@ -172,7 +171,7 @@
static int firsttime = 1;
int error = 0;
- if (tv && !timespec64_valid(tv))
+ if (tv && !timespec64_valid_settod(tv))
return -EINVAL;
error = security_settime64(tv, tz);
@@ -223,14 +222,15 @@
}
#ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE2(gettimeofday, struct compat_timeval __user *, tv,
+COMPAT_SYSCALL_DEFINE2(gettimeofday, struct old_timeval32 __user *, tv,
struct timezone __user *, tz)
{
if (tv) {
- struct timeval ktv;
+ struct timespec64 ts;
- do_gettimeofday(&ktv);
- if (compat_put_timeval(&ktv, tv))
+ ktime_get_real_ts64(&ts);
+ if (put_user(ts.tv_sec, &tv->tv_sec) ||
+ put_user(ts.tv_nsec / 1000, &tv->tv_usec))
return -EFAULT;
}
if (tz) {
@@ -241,7 +241,7 @@
return 0;
}
-COMPAT_SYSCALL_DEFINE2(settimeofday, struct compat_timeval __user *, tv,
+COMPAT_SYSCALL_DEFINE2(settimeofday, struct old_timeval32 __user *, tv,
struct timezone __user *, tz)
{
struct timespec64 new_ts;
@@ -251,6 +251,10 @@
if (tv) {
if (compat_get_timeval(&user_tv, tv))
return -EFAULT;
+
+ if (!timeval_valid(&user_tv))
+ return -EINVAL;
+
new_ts.tv_sec = user_tv.tv_sec;
new_ts.tv_nsec = user_tv.tv_usec * NSEC_PER_USEC;
}
@@ -263,35 +267,99 @@
}
#endif
-SYSCALL_DEFINE1(adjtimex, struct timex __user *, txc_p)
+#if !defined(CONFIG_64BIT_TIME) || defined(CONFIG_64BIT)
+SYSCALL_DEFINE1(adjtimex, struct __kernel_timex __user *, txc_p)
{
- struct timex txc; /* Local copy of parameter */
+ struct __kernel_timex txc; /* Local copy of parameter */
int ret;
/* Copy the user data space into the kernel copy
* structure. But bear in mind that the structures
* may change
*/
- if (copy_from_user(&txc, txc_p, sizeof(struct timex)))
+ if (copy_from_user(&txc, txc_p, sizeof(struct __kernel_timex)))
return -EFAULT;
ret = do_adjtimex(&txc);
- return copy_to_user(txc_p, &txc, sizeof(struct timex)) ? -EFAULT : ret;
+ return copy_to_user(txc_p, &txc, sizeof(struct __kernel_timex)) ? -EFAULT : ret;
+}
+#endif
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
+int get_old_timex32(struct __kernel_timex *txc, const struct old_timex32 __user *utp)
+{
+ struct old_timex32 tx32;
+
+ memset(txc, 0, sizeof(struct __kernel_timex));
+ if (copy_from_user(&tx32, utp, sizeof(struct old_timex32)))
+ return -EFAULT;
+
+ txc->modes = tx32.modes;
+ txc->offset = tx32.offset;
+ txc->freq = tx32.freq;
+ txc->maxerror = tx32.maxerror;
+ txc->esterror = tx32.esterror;
+ txc->status = tx32.status;
+ txc->constant = tx32.constant;
+ txc->precision = tx32.precision;
+ txc->tolerance = tx32.tolerance;
+ txc->time.tv_sec = tx32.time.tv_sec;
+ txc->time.tv_usec = tx32.time.tv_usec;
+ txc->tick = tx32.tick;
+ txc->ppsfreq = tx32.ppsfreq;
+ txc->jitter = tx32.jitter;
+ txc->shift = tx32.shift;
+ txc->stabil = tx32.stabil;
+ txc->jitcnt = tx32.jitcnt;
+ txc->calcnt = tx32.calcnt;
+ txc->errcnt = tx32.errcnt;
+ txc->stbcnt = tx32.stbcnt;
+
+ return 0;
}
-#ifdef CONFIG_COMPAT
-
-COMPAT_SYSCALL_DEFINE1(adjtimex, struct compat_timex __user *, utp)
+int put_old_timex32(struct old_timex32 __user *utp, const struct __kernel_timex *txc)
{
- struct timex txc;
+ struct old_timex32 tx32;
+
+ memset(&tx32, 0, sizeof(struct old_timex32));
+ tx32.modes = txc->modes;
+ tx32.offset = txc->offset;
+ tx32.freq = txc->freq;
+ tx32.maxerror = txc->maxerror;
+ tx32.esterror = txc->esterror;
+ tx32.status = txc->status;
+ tx32.constant = txc->constant;
+ tx32.precision = txc->precision;
+ tx32.tolerance = txc->tolerance;
+ tx32.time.tv_sec = txc->time.tv_sec;
+ tx32.time.tv_usec = txc->time.tv_usec;
+ tx32.tick = txc->tick;
+ tx32.ppsfreq = txc->ppsfreq;
+ tx32.jitter = txc->jitter;
+ tx32.shift = txc->shift;
+ tx32.stabil = txc->stabil;
+ tx32.jitcnt = txc->jitcnt;
+ tx32.calcnt = txc->calcnt;
+ tx32.errcnt = txc->errcnt;
+ tx32.stbcnt = txc->stbcnt;
+ tx32.tai = txc->tai;
+ if (copy_to_user(utp, &tx32, sizeof(struct old_timex32)))
+ return -EFAULT;
+ return 0;
+}
+
+SYSCALL_DEFINE1(adjtimex_time32, struct old_timex32 __user *, utp)
+{
+ struct __kernel_timex txc;
int err, ret;
- err = compat_get_timex(&txc, utp);
+ err = get_old_timex32(&txc, utp);
if (err)
return err;
ret = do_adjtimex(&txc);
- err = compat_put_timex(utp, &txc);
+ err = put_old_timex32(utp, &txc);
if (err)
return err;
@@ -342,30 +410,6 @@
}
EXPORT_SYMBOL(jiffies_to_usecs);
-/**
- * timespec_trunc - Truncate timespec to a granularity
- * @t: Timespec
- * @gran: Granularity in ns.
- *
- * Truncate a timespec to a granularity. Always rounds down. gran must
- * not be 0 nor greater than a second (NSEC_PER_SEC, or 10^9 ns).
- */
-struct timespec timespec_trunc(struct timespec t, unsigned gran)
-{
- /* Avoid division in the common cases 1 ns and 1 s. */
- if (gran == 1) {
- /* nothing */
- } else if (gran == NSEC_PER_SEC) {
- t.tv_nsec = 0;
- } else if (gran > 1 && gran < NSEC_PER_SEC) {
- t.tv_nsec -= t.tv_nsec % gran;
- } else {
- WARN(1, "illegal file time granularity: %u", gran);
- }
- return t;
-}
-EXPORT_SYMBOL(timespec_trunc);
-
/*
* mktime64 - Converts date to seconds.
* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
@@ -408,42 +452,6 @@
EXPORT_SYMBOL(mktime64);
/**
- * set_normalized_timespec - set timespec sec and nsec parts and normalize
- *
- * @ts: pointer to timespec variable to be set
- * @sec: seconds to set
- * @nsec: nanoseconds to set
- *
- * Set seconds and nanoseconds field of a timespec variable and
- * normalize to the timespec storage format
- *
- * Note: The tv_nsec part is always in the range of
- * 0 <= tv_nsec < NSEC_PER_SEC
- * For negative values only the tv_sec field is negative !
- */
-void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec)
-{
- while (nsec >= NSEC_PER_SEC) {
- /*
- * The following asm() prevents the compiler from
- * optimising this loop into a modulo operation. See
- * also __iter_div_u64_rem() in include/linux/time.h
- */
- asm("" : "+rm"(nsec));
- nsec -= NSEC_PER_SEC;
- ++sec;
- }
- while (nsec < 0) {
- asm("" : "+rm"(nsec));
- nsec += NSEC_PER_SEC;
- --sec;
- }
- ts->tv_sec = sec;
- ts->tv_nsec = nsec;
-}
-EXPORT_SYMBOL(set_normalized_timespec);
-
-/**
* ns_to_timespec - Convert nanoseconds to timespec
* @nsec: the nanoseconds value to be converted
*
@@ -779,6 +787,16 @@
}
EXPORT_SYMBOL(jiffies64_to_nsecs);
+u64 jiffies64_to_msecs(const u64 j)
+{
+#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ)
+ return (MSEC_PER_SEC / HZ) * j;
+#else
+ return div_u64(j * HZ_TO_MSEC_NUM, HZ_TO_MSEC_DEN);
+#endif
+}
+EXPORT_SYMBOL(jiffies64_to_msecs);
+
/**
* nsecs_to_jiffies64 - Convert nsecs in u64 to jiffies64
*
@@ -863,7 +881,7 @@
ts->tv_sec = kts.tv_sec;
/* Zero out the padding for 32 bit systems or in compat mode */
- if (IS_ENABLED(CONFIG_64BIT_TIME) && (!IS_ENABLED(CONFIG_64BIT) || in_compat_syscall()))
+ if (IS_ENABLED(CONFIG_64BIT_TIME) && in_compat_syscall())
kts.tv_nsec &= 0xFFFFFFFFUL;
ts->tv_nsec = kts.tv_nsec;
@@ -884,10 +902,10 @@
}
EXPORT_SYMBOL_GPL(put_timespec64);
-int __compat_get_timespec64(struct timespec64 *ts64,
- const struct compat_timespec __user *cts)
+static int __get_old_timespec32(struct timespec64 *ts64,
+ const struct old_timespec32 __user *cts)
{
- struct compat_timespec ts;
+ struct old_timespec32 ts;
int ret;
ret = copy_from_user(&ts, cts, sizeof(ts));
@@ -900,33 +918,33 @@
return 0;
}
-int __compat_put_timespec64(const struct timespec64 *ts64,
- struct compat_timespec __user *cts)
+static int __put_old_timespec32(const struct timespec64 *ts64,
+ struct old_timespec32 __user *cts)
{
- struct compat_timespec ts = {
+ struct old_timespec32 ts = {
.tv_sec = ts64->tv_sec,
.tv_nsec = ts64->tv_nsec
};
return copy_to_user(cts, &ts, sizeof(ts)) ? -EFAULT : 0;
}
-int compat_get_timespec64(struct timespec64 *ts, const void __user *uts)
+int get_old_timespec32(struct timespec64 *ts, const void __user *uts)
{
if (COMPAT_USE_64BIT_TIME)
return copy_from_user(ts, uts, sizeof(*ts)) ? -EFAULT : 0;
else
- return __compat_get_timespec64(ts, uts);
+ return __get_old_timespec32(ts, uts);
}
-EXPORT_SYMBOL_GPL(compat_get_timespec64);
+EXPORT_SYMBOL_GPL(get_old_timespec32);
-int compat_put_timespec64(const struct timespec64 *ts, void __user *uts)
+int put_old_timespec32(const struct timespec64 *ts, void __user *uts)
{
if (COMPAT_USE_64BIT_TIME)
return copy_to_user(uts, ts, sizeof(*ts)) ? -EFAULT : 0;
else
- return __compat_put_timespec64(ts, uts);
+ return __put_old_timespec32(ts, uts);
}
-EXPORT_SYMBOL_GPL(compat_put_timespec64);
+EXPORT_SYMBOL_GPL(put_old_timespec32);
int get_itimerspec64(struct itimerspec64 *it,
const struct __kernel_itimerspec __user *uit)
@@ -958,23 +976,23 @@
}
EXPORT_SYMBOL_GPL(put_itimerspec64);
-int get_compat_itimerspec64(struct itimerspec64 *its,
- const struct compat_itimerspec __user *uits)
+int get_old_itimerspec32(struct itimerspec64 *its,
+ const struct old_itimerspec32 __user *uits)
{
- if (__compat_get_timespec64(&its->it_interval, &uits->it_interval) ||
- __compat_get_timespec64(&its->it_value, &uits->it_value))
+ if (__get_old_timespec32(&its->it_interval, &uits->it_interval) ||
+ __get_old_timespec32(&its->it_value, &uits->it_value))
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL_GPL(get_compat_itimerspec64);
+EXPORT_SYMBOL_GPL(get_old_itimerspec32);
-int put_compat_itimerspec64(const struct itimerspec64 *its,
- struct compat_itimerspec __user *uits)
+int put_old_itimerspec32(const struct itimerspec64 *its,
+ struct old_itimerspec32 __user *uits)
{
- if (__compat_put_timespec64(&its->it_interval, &uits->it_interval) ||
- __compat_put_timespec64(&its->it_value, &uits->it_value))
+ if (__put_old_timespec32(&its->it_interval, &uits->it_interval) ||
+ __put_old_timespec32(&its->it_value, &uits->it_value))
return -EFAULT;
return 0;
}
-EXPORT_SYMBOL_GPL(put_compat_itimerspec64);
+EXPORT_SYMBOL_GPL(put_old_itimerspec32);
diff --git a/kernel/time/timeconst.bc b/kernel/time/timeconst.bc
index f83bbb8..7ed0e0f 100644
--- a/kernel/time/timeconst.bc
+++ b/kernel/time/timeconst.bc
@@ -1,3 +1,5 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
scale=0
define gcd(a,b) {
diff --git a/kernel/time/timeconv.c b/kernel/time/timeconv.c
index 7142580..589e0a5 100644
--- a/kernel/time/timeconv.c
+++ b/kernel/time/timeconv.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: LGPL-2.0+
/*
* Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
* This file is part of the GNU C Library.
diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c
index 8afd789..85b98e7 100644
--- a/kernel/time/timecounter.c
+++ b/kernel/time/timecounter.c
@@ -1,20 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
- * linux/kernel/time/timecounter.c
- *
- * based on code that migrated away from
- * linux/kernel/time/clocksource.c
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
+ * Based on clocksource code. See commit 74d23cc704d1
*/
-
#include <linux/export.h>
#include <linux/timecounter.h>
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index f3b22f4..ca69290 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/time/timekeeping.c
- *
- * Kernel timekeeping code and accessor functions
- *
- * This code was moved from linux/kernel/timer.c.
- * Please see that file for copyright and history logs.
- *
+ * Kernel timekeeping code and accessor functions. Based on code from
+ * timer.c, moved in commit 8524070b7982.
*/
-
#include <linux/timekeeper_internal.h>
#include <linux/module.h>
#include <linux/interrupt.h>
@@ -26,6 +21,7 @@
#include <linux/stop_machine.h>
#include <linux/pvclock_gtod.h>
#include <linux/compiler.h>
+#include <linux/audit.h>
#include "tick-internal.h"
#include "ntp_internal.h"
@@ -50,7 +46,9 @@
static struct {
seqcount_t seq;
struct timekeeper timekeeper;
-} tk_core ____cacheline_aligned;
+} tk_core ____cacheline_aligned = {
+ .seq = SEQCNT_ZERO(tk_core.seq),
+};
static DEFINE_RAW_SPINLOCK(timekeeper_lock);
static struct timekeeper shadow_timekeeper;
@@ -148,6 +146,11 @@
static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
{
tk->offs_boot = ktime_add(tk->offs_boot, delta);
+ /*
+ * Timespec representation for VDSO update to avoid 64bit division
+ * on every update.
+ */
+ tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
}
/*
@@ -723,7 +726,7 @@
void ktime_get_real_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
+ unsigned int seq;
u64 nsecs;
WARN_ON(timekeeping_suspended);
@@ -810,17 +813,18 @@
struct timekeeper *tk = &tk_core.timekeeper;
unsigned int seq;
ktime_t base, *offset = offsets[offs];
+ u64 nsecs;
WARN_ON(timekeeping_suspended);
do {
seq = read_seqcount_begin(&tk_core.seq);
base = ktime_add(tk->tkr_mono.base, *offset);
+ nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
} while (read_seqcount_retry(&tk_core.seq, seq));
- return base;
-
+ return ktime_add_ns(base, nsecs);
}
EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
@@ -832,7 +836,7 @@
ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
{
ktime_t *offset = offsets[offs];
- unsigned long seq;
+ unsigned int seq;
ktime_t tconv;
do {
@@ -963,7 +967,7 @@
void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
+ unsigned int seq;
ktime_t base_raw;
ktime_t base_real;
u64 nsec_raw;
@@ -1125,7 +1129,7 @@
ktime_t base_real, base_raw;
u64 nsec_real, nsec_raw;
u8 cs_was_changed_seq;
- unsigned long seq;
+ unsigned int seq;
bool do_interp;
int ret;
@@ -1212,22 +1216,6 @@
EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
/**
- * do_gettimeofday - Returns the time of day in a timeval
- * @tv: pointer to the timeval to be set
- *
- * NOTE: Users should be converted to using getnstimeofday()
- */
-void do_gettimeofday(struct timeval *tv)
-{
- struct timespec64 now;
-
- getnstimeofday64(&now);
- tv->tv_sec = now.tv_sec;
- tv->tv_usec = now.tv_nsec/1000;
-}
-EXPORT_SYMBOL(do_gettimeofday);
-
-/**
* do_settimeofday64 - Sets the time of day.
* @ts: pointer to the timespec64 variable containing the new time
*
@@ -1240,7 +1228,7 @@
unsigned long flags;
int ret = 0;
- if (!timespec64_valid_strict(ts))
+ if (!timespec64_valid_settod(ts))
return -EINVAL;
raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -1269,6 +1257,9 @@
/* signal hrtimers about time change */
clock_was_set();
+ if (!ret)
+ audit_tk_injoffset(ts_delta);
+
return ret;
}
EXPORT_SYMBOL(do_settimeofday64);
@@ -1297,7 +1288,7 @@
/* Make sure the proposed value is valid */
tmp = timespec64_add(tk_xtime(tk), *ts);
if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 ||
- !timespec64_valid_strict(&tmp)) {
+ !timespec64_valid_settod(&tmp)) {
ret = -EINVAL;
goto error;
}
@@ -1428,7 +1419,7 @@
void ktime_get_raw_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
+ unsigned int seq;
u64 nsecs;
do {
@@ -1450,7 +1441,7 @@
int timekeeping_valid_for_hres(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
+ unsigned int seq;
int ret;
do {
@@ -1469,7 +1460,7 @@
u64 timekeeping_max_deferment(void)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
+ unsigned int seq;
u64 ret;
do {
@@ -1483,7 +1474,7 @@
}
/**
- * read_persistent_clock - Return time from the persistent clock.
+ * read_persistent_clock64 - Return time from the persistent clock.
*
* Weak dummy function for arches that do not yet support it.
* Reads the time from the battery backed persistent clock.
@@ -1491,20 +1482,12 @@
*
* XXX - Do be sure to remove it once all arches implement it.
*/
-void __weak read_persistent_clock(struct timespec *ts)
+void __weak read_persistent_clock64(struct timespec64 *ts)
{
ts->tv_sec = 0;
ts->tv_nsec = 0;
}
-void __weak read_persistent_clock64(struct timespec64 *ts64)
-{
- struct timespec ts;
-
- read_persistent_clock(&ts);
- *ts64 = timespec_to_timespec64(ts);
-}
-
/**
* read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
* from the boot.
@@ -1554,7 +1537,7 @@
unsigned long flags;
read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
- if (timespec64_valid_strict(&wall_time) &&
+ if (timespec64_valid_settod(&wall_time) &&
timespec64_to_ns(&wall_time) > 0) {
persistent_clock_exists = true;
} else if (timespec64_to_ns(&wall_time) != 0) {
@@ -2174,18 +2157,10 @@
}
EXPORT_SYMBOL_GPL(getboottime64);
-unsigned long get_seconds(void)
-{
- struct timekeeper *tk = &tk_core.timekeeper;
-
- return tk->xtime_sec;
-}
-EXPORT_SYMBOL(get_seconds);
-
void ktime_get_coarse_real_ts64(struct timespec64 *ts)
{
struct timekeeper *tk = &tk_core.timekeeper;
- unsigned long seq;
+ unsigned int seq;
do {
seq = read_seqcount_begin(&tk_core.seq);
@@ -2199,7 +2174,7 @@
{
struct timekeeper *tk = &tk_core.timekeeper;
struct timespec64 now, mono;
- unsigned long seq;
+ unsigned int seq;
do {
seq = read_seqcount_begin(&tk_core.seq);
@@ -2269,7 +2244,7 @@
/**
* timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
*/
-static int timekeeping_validate_timex(const struct timex *txc)
+static int timekeeping_validate_timex(const struct __kernel_timex *txc)
{
if (txc->modes & ADJ_ADJTIME) {
/* singleshot must not be used with any other mode bits */
@@ -2335,9 +2310,10 @@
/**
* do_adjtimex() - Accessor function to NTP __do_adjtimex function
*/
-int do_adjtimex(struct timex *txc)
+int do_adjtimex(struct __kernel_timex *txc)
{
struct timekeeper *tk = &tk_core.timekeeper;
+ struct audit_ntp_data ad;
unsigned long flags;
struct timespec64 ts;
s32 orig_tai, tai;
@@ -2357,15 +2333,19 @@
ret = timekeeping_inject_offset(&delta);
if (ret)
return ret;
+
+ audit_tk_injoffset(delta);
}
+ audit_ntp_init(&ad);
+
ktime_get_real_ts64(&ts);
raw_spin_lock_irqsave(&timekeeper_lock, flags);
write_seqcount_begin(&tk_core.seq);
orig_tai = tai = tk->tai_offset;
- ret = __do_adjtimex(txc, &ts, &tai);
+ ret = __do_adjtimex(txc, &ts, &tai, &ad);
if (tai != orig_tai) {
__timekeeping_set_tai_offset(tk, tai);
@@ -2376,6 +2356,8 @@
write_seqcount_end(&tk_core.seq);
raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
+ audit_ntp_log(&ad);
+
/* Update the multiplier immediately if frequency was set directly */
if (txc->modes & (ADJ_FREQUENCY | ADJ_TICK))
timekeeping_advance(TK_ADV_FREQ);
diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h
index 7a9b4eb..141ab3a 100644
--- a/kernel/time/timekeeping.h
+++ b/kernel/time/timekeeping.h
@@ -14,6 +14,13 @@
extern void timekeeping_warp_clock(void);
extern int timekeeping_suspend(void);
extern void timekeeping_resume(void);
+#ifdef CONFIG_GENERIC_SCHED_CLOCK
+extern int sched_clock_suspend(void);
+extern void sched_clock_resume(void);
+#else
+static inline int sched_clock_suspend(void) { return 0; }
+static inline void sched_clock_resume(void) { }
+#endif
extern void do_timer(unsigned long ticks);
extern void update_wall_time(void);
diff --git a/kernel/time/timekeeping_debug.c b/kernel/time/timekeeping_debug.c
index 238e4be..b73e885 100644
--- a/kernel/time/timekeeping_debug.c
+++ b/kernel/time/timekeeping_debug.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0+
/*
* debugfs file to track time spent in suspend
*
* Copyright (c) 2011, Google, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
*/
#include <linux/debugfs.h>
@@ -28,7 +19,7 @@
static unsigned int sleep_time_bin[NUM_BINS] = {0};
-static int tk_debug_show_sleep_time(struct seq_file *s, void *data)
+static int tk_debug_sleep_time_show(struct seq_file *s, void *data)
{
unsigned int bin;
seq_puts(s, " time (secs) count\n");
@@ -42,30 +33,12 @@
}
return 0;
}
-
-static int tk_debug_sleep_time_open(struct inode *inode, struct file *file)
-{
- return single_open(file, tk_debug_show_sleep_time, NULL);
-}
-
-static const struct file_operations tk_debug_sleep_time_fops = {
- .open = tk_debug_sleep_time_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(tk_debug_sleep_time);
static int __init tk_debug_sleep_time_init(void)
{
- struct dentry *d;
-
- d = debugfs_create_file("sleep_time", 0444, NULL, NULL,
- &tk_debug_sleep_time_fops);
- if (!d) {
- pr_err("Failed to create sleep_time debug file\n");
- return -ENOMEM;
- }
-
+ debugfs_create_file("sleep_time", 0444, NULL, NULL,
+ &tk_debug_sleep_time_fops);
return 0;
}
late_initcall(tk_debug_sleep_time_init);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index fa49cd7..4820823 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1,6 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * linux/kernel/timer.c
- *
* Kernel internal timers
*
* Copyright (C) 1991, 1992 Linus Torvalds
@@ -197,6 +196,10 @@
struct timer_base {
raw_spinlock_t lock;
struct timer_list *running_timer;
+#ifdef CONFIG_PREEMPT_RT
+ spinlock_t expiry_lock;
+ atomic_t timer_waiters;
+#endif
unsigned long clk;
unsigned long next_expiry;
unsigned int cpu;
@@ -537,6 +540,8 @@
hlist_add_head(&timer->entry, base->vectors + idx);
__set_bit(idx, base->pending_map);
timer_set_idx(timer, idx);
+
+ trace_timer_start(timer, timer->expires, timer->flags);
}
static void
@@ -648,7 +653,7 @@
case ODEBUG_STATE_ACTIVE:
WARN_ON(1);
-
+ /* fall through */
default:
return false;
}
@@ -758,13 +763,6 @@
trace_timer_init(timer);
}
-static inline void
-debug_activate(struct timer_list *timer, unsigned long expires)
-{
- debug_timer_activate(timer);
- trace_timer_start(timer, expires, timer->flags);
-}
-
static inline void debug_deactivate(struct timer_list *timer)
{
debug_timer_deactivate(timer);
@@ -1038,7 +1036,7 @@
}
}
- debug_activate(timer, expires);
+ debug_timer_activate(timer);
timer->expires = expires;
/*
@@ -1172,7 +1170,7 @@
}
forward_timer_base(base);
- debug_activate(timer, timer->expires);
+ debug_timer_activate(timer);
internal_add_timer(base, timer);
raw_spin_unlock_irqrestore(&base->lock, flags);
}
@@ -1233,7 +1231,78 @@
}
EXPORT_SYMBOL(try_to_del_timer_sync);
-#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT_RT
+static __init void timer_base_init_expiry_lock(struct timer_base *base)
+{
+ spin_lock_init(&base->expiry_lock);
+}
+
+static inline void timer_base_lock_expiry(struct timer_base *base)
+{
+ spin_lock(&base->expiry_lock);
+}
+
+static inline void timer_base_unlock_expiry(struct timer_base *base)
+{
+ spin_unlock(&base->expiry_lock);
+}
+
+/*
+ * The counterpart to del_timer_wait_running().
+ *
+ * If there is a waiter for base->expiry_lock, then it was waiting for the
+ * timer callback to finish. Drop expiry_lock and reaquire it. That allows
+ * the waiter to acquire the lock and make progress.
+ */
+static void timer_sync_wait_running(struct timer_base *base)
+{
+ if (atomic_read(&base->timer_waiters)) {
+ spin_unlock(&base->expiry_lock);
+ spin_lock(&base->expiry_lock);
+ }
+}
+
+/*
+ * This function is called on PREEMPT_RT kernels when the fast path
+ * deletion of a timer failed because the timer callback function was
+ * running.
+ *
+ * This prevents priority inversion, if the softirq thread on a remote CPU
+ * got preempted, and it prevents a life lock when the task which tries to
+ * delete a timer preempted the softirq thread running the timer callback
+ * function.
+ */
+static void del_timer_wait_running(struct timer_list *timer)
+{
+ u32 tf;
+
+ tf = READ_ONCE(timer->flags);
+ if (!(tf & TIMER_MIGRATING)) {
+ struct timer_base *base = get_timer_base(tf);
+
+ /*
+ * Mark the base as contended and grab the expiry lock,
+ * which is held by the softirq across the timer
+ * callback. Drop the lock immediately so the softirq can
+ * expire the next timer. In theory the timer could already
+ * be running again, but that's more than unlikely and just
+ * causes another wait loop.
+ */
+ atomic_inc(&base->timer_waiters);
+ spin_lock_bh(&base->expiry_lock);
+ atomic_dec(&base->timer_waiters);
+ spin_unlock_bh(&base->expiry_lock);
+ }
+}
+#else
+static inline void timer_base_init_expiry_lock(struct timer_base *base) { }
+static inline void timer_base_lock_expiry(struct timer_base *base) { }
+static inline void timer_base_unlock_expiry(struct timer_base *base) { }
+static inline void timer_sync_wait_running(struct timer_base *base) { }
+static inline void del_timer_wait_running(struct timer_list *timer) { }
+#endif
+
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT)
/**
* del_timer_sync - deactivate a timer and wait for the handler to finish.
* @timer: the timer to be deactivated
@@ -1272,6 +1341,8 @@
*/
int del_timer_sync(struct timer_list *timer)
{
+ int ret;
+
#ifdef CONFIG_LOCKDEP
unsigned long flags;
@@ -1289,17 +1360,24 @@
* could lead to deadlock.
*/
WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE));
- for (;;) {
- int ret = try_to_del_timer_sync(timer);
- if (ret >= 0)
- return ret;
- cpu_relax();
- }
+
+ do {
+ ret = try_to_del_timer_sync(timer);
+
+ if (unlikely(ret < 0)) {
+ del_timer_wait_running(timer);
+ cpu_relax();
+ }
+ } while (ret < 0);
+
+ return ret;
}
EXPORT_SYMBOL(del_timer_sync);
#endif
-static void call_timer_fn(struct timer_list *timer, void (*fn)(struct timer_list *))
+static void call_timer_fn(struct timer_list *timer,
+ void (*fn)(struct timer_list *),
+ unsigned long baseclk)
{
int count = preempt_count();
@@ -1322,14 +1400,14 @@
*/
lock_map_acquire(&lockdep_map);
- trace_timer_expire_entry(timer);
+ trace_timer_expire_entry(timer, baseclk);
fn(timer);
trace_timer_expire_exit(timer);
lock_map_release(&lockdep_map);
if (count != preempt_count()) {
- WARN_ONCE(1, "timer: %pF preempt leak: %08x -> %08x\n",
+ WARN_ONCE(1, "timer: %pS preempt leak: %08x -> %08x\n",
fn, count, preempt_count());
/*
* Restore the preempt count. That gives us a decent
@@ -1343,6 +1421,13 @@
static void expire_timers(struct timer_base *base, struct hlist_head *head)
{
+ /*
+ * This value is required only for tracing. base->clk was
+ * incremented directly before expire_timers was called. But expiry
+ * is related to the old base->clk value.
+ */
+ unsigned long baseclk = base->clk - 1;
+
while (!hlist_empty(head)) {
struct timer_list *timer;
void (*fn)(struct timer_list *);
@@ -1356,11 +1441,14 @@
if (timer->flags & TIMER_IRQSAFE) {
raw_spin_unlock(&base->lock);
- call_timer_fn(timer, fn);
+ call_timer_fn(timer, fn, baseclk);
+ base->running_timer = NULL;
raw_spin_lock(&base->lock);
} else {
raw_spin_unlock_irq(&base->lock);
- call_timer_fn(timer, fn);
+ call_timer_fn(timer, fn, baseclk);
+ base->running_timer = NULL;
+ timer_sync_wait_running(base);
raw_spin_lock_irq(&base->lock);
}
}
@@ -1590,24 +1678,26 @@
static int collect_expired_timers(struct timer_base *base,
struct hlist_head *heads)
{
+ unsigned long now = READ_ONCE(jiffies);
+
/*
* NOHZ optimization. After a long idle sleep we need to forward the
* base to current jiffies. Avoid a loop by searching the bitfield for
* the next expiring timer.
*/
- if ((long)(jiffies - base->clk) > 2) {
+ if ((long)(now - base->clk) > 2) {
unsigned long next = __next_timer_interrupt(base);
/*
* If the next timer is ahead of time forward to current
* jiffies, otherwise forward to the next expiry time:
*/
- if (time_after(next, jiffies)) {
+ if (time_after(next, now)) {
/*
* The call site will increment base->clk and then
* terminate the expiry loop immediately.
*/
- base->clk = jiffies;
+ base->clk = now;
return 0;
}
base->clk = next;
@@ -1633,14 +1723,14 @@
/* Note: this timer irq context must be accounted for as well. */
account_process_tick(p, user_tick);
run_local_timers();
- rcu_check_callbacks(user_tick);
+ rcu_sched_clock_irq(user_tick);
#ifdef CONFIG_IRQ_WORK
if (in_irq())
irq_work_tick();
#endif
scheduler_tick();
if (IS_ENABLED(CONFIG_POSIX_TIMERS))
- run_posix_cpu_timers(p);
+ run_posix_cpu_timers();
}
/**
@@ -1655,6 +1745,7 @@
if (!time_after_eq(jiffies, base->clk))
return;
+ timer_base_lock_expiry(base);
raw_spin_lock_irq(&base->lock);
/*
@@ -1681,8 +1772,8 @@
while (levels--)
expire_timers(base, heads + levels);
}
- base->running_timer = NULL;
raw_spin_unlock_irq(&base->lock);
+ timer_base_unlock_expiry(base);
}
/*
@@ -1927,6 +2018,7 @@
base->cpu = cpu;
raw_spin_lock_init(&base->lock);
base->clk = jiffies;
+ timer_base_init_expiry_lock(base);
}
}
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index d647dab..acb326f 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
/*
- * kernel/time/timer_list.c
- *
* List pending timers
*
* Copyright(C) 2006, Red Hat, Inc., Ingo Molnar
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#include <linux/proc_fs.h>
@@ -287,23 +282,6 @@
SEQ_printf(m, "\n");
}
-static int timer_list_show(struct seq_file *m, void *v)
-{
- struct timer_list_iter *iter = v;
-
- if (iter->cpu == -1 && !iter->second_pass)
- timer_list_header(m, iter->now);
- else if (!iter->second_pass)
- print_cpu(m, iter->cpu, iter->now);
-#ifdef CONFIG_GENERIC_CLOCKEVENTS
- else if (iter->cpu == -1 && iter->second_pass)
- timer_list_show_tickdevices_header(m);
- else
- print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
-#endif
- return 0;
-}
-
void sysrq_timer_list_show(void)
{
u64 now = ktime_to_ns(ktime_get());
@@ -322,6 +300,24 @@
return;
}
+#ifdef CONFIG_PROC_FS
+static int timer_list_show(struct seq_file *m, void *v)
+{
+ struct timer_list_iter *iter = v;
+
+ if (iter->cpu == -1 && !iter->second_pass)
+ timer_list_header(m, iter->now);
+ else if (!iter->second_pass)
+ print_cpu(m, iter->cpu, iter->now);
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ else if (iter->cpu == -1 && iter->second_pass)
+ timer_list_show_tickdevices_header(m);
+ else
+ print_tickdevice(m, tick_get_device(iter->cpu), iter->cpu);
+#endif
+ return 0;
+}
+
static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
for (; offset; offset--) {
@@ -381,3 +377,4 @@
return 0;
}
__initcall(init_timer_list_procfs);
+#endif
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
new file mode 100644
index 0000000..5ee0f77
--- /dev/null
+++ b/kernel/time/vsyscall.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2019 ARM Ltd.
+ *
+ * Generic implementation of update_vsyscall and update_vsyscall_tz.
+ *
+ * Based on the x86 specific implementation.
+ */
+
+#include <linux/hrtimer.h>
+#include <linux/timekeeper_internal.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+#include <vdso/vsyscall.h>
+
+static inline void update_vdso_data(struct vdso_data *vdata,
+ struct timekeeper *tk)
+{
+ struct vdso_timestamp *vdso_ts;
+ u64 nsec, sec;
+
+ vdata[CS_HRES_COARSE].cycle_last = tk->tkr_mono.cycle_last;
+ vdata[CS_HRES_COARSE].mask = tk->tkr_mono.mask;
+ vdata[CS_HRES_COARSE].mult = tk->tkr_mono.mult;
+ vdata[CS_HRES_COARSE].shift = tk->tkr_mono.shift;
+ vdata[CS_RAW].cycle_last = tk->tkr_raw.cycle_last;
+ vdata[CS_RAW].mask = tk->tkr_raw.mask;
+ vdata[CS_RAW].mult = tk->tkr_raw.mult;
+ vdata[CS_RAW].shift = tk->tkr_raw.shift;
+
+ /* CLOCK_REALTIME */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
+ /* CLOCK_MONOTONIC */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+
+ nsec = tk->tkr_mono.xtime_nsec;
+ nsec += ((u64)tk->wall_to_monotonic.tv_nsec << tk->tkr_mono.shift);
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ /* Copy MONOTONIC time for BOOTTIME */
+ sec = vdso_ts->sec;
+ /* Add the boot offset */
+ sec += tk->monotonic_to_boot.tv_sec;
+ nsec += (u64)tk->monotonic_to_boot.tv_nsec << tk->tkr_mono.shift;
+
+ /* CLOCK_BOOTTIME */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_BOOTTIME];
+ vdso_ts->sec = sec;
+
+ while (nsec >= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) {
+ nsec -= (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift);
+ vdso_ts->sec++;
+ }
+ vdso_ts->nsec = nsec;
+
+ /* CLOCK_MONOTONIC_RAW */
+ vdso_ts = &vdata[CS_RAW].basetime[CLOCK_MONOTONIC_RAW];
+ vdso_ts->sec = tk->raw_sec;
+ vdso_ts->nsec = tk->tkr_raw.xtime_nsec;
+
+ /* CLOCK_TAI */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
+ vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
+ /*
+ * Read without the seqlock held by clock_getres().
+ * Note: No need to have a second copy.
+ */
+ WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution);
+}
+
+void update_vsyscall(struct timekeeper *tk)
+{
+ struct vdso_data *vdata = __arch_get_k_vdso_data();
+ struct vdso_timestamp *vdso_ts;
+ u64 nsec;
+
+ if (__arch_update_vdso_data()) {
+ /*
+ * Some architectures might want to skip the update of the
+ * data page.
+ */
+ return;
+ }
+
+ /* copy vsyscall data */
+ vdso_write_begin(vdata);
+
+ vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk);
+ vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk);
+
+ /* CLOCK_REALTIME_COARSE */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+
+ /* CLOCK_MONOTONIC_COARSE */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC_COARSE];
+ vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
+ nsec = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
+ nsec = nsec + tk->wall_to_monotonic.tv_nsec;
+ vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec);
+
+ update_vdso_data(vdata, tk);
+
+ __arch_update_vsyscall(vdata, tk);
+
+ vdso_write_end(vdata);
+
+ __arch_sync_vdso_data(vdata);
+}
+
+void update_vsyscall_tz(void)
+{
+ struct vdso_data *vdata = __arch_get_k_vdso_data();
+
+ vdata[CS_HRES_COARSE].tz_minuteswest = sys_tz.tz_minuteswest;
+ vdata[CS_HRES_COARSE].tz_dsttime = sys_tz.tz_dsttime;
+
+ __arch_sync_vdso_data(vdata);
+}