Update Linux to v5.4.148
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.4.148.tar.gz
Change-Id: Ib3d26c5ba9b022e2e03533005c4fed4d7c30b61b
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 451f9d0..0e96c38 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -88,6 +88,8 @@
unsigned long flags;
struct rtc_device *rtc = to_rtc_device(dev);
struct wakeup_source *__ws;
+ struct platform_device *pdev;
+ int ret = 0;
if (rtcdev)
return -EBUSY;
@@ -98,12 +100,14 @@
return -1;
__ws = wakeup_source_register(dev, "alarmtimer");
+ pdev = platform_device_register_data(dev, "alarmtimer",
+ PLATFORM_DEVID_AUTO, NULL, 0);
spin_lock_irqsave(&rtcdev_lock, flags);
- if (!rtcdev) {
+ if (__ws && !IS_ERR(pdev) && !rtcdev) {
if (!try_module_get(rtc->owner)) {
- spin_unlock_irqrestore(&rtcdev_lock, flags);
- return -1;
+ ret = -1;
+ goto unlock;
}
rtcdev = rtc;
@@ -111,12 +115,17 @@
get_device(dev);
ws = __ws;
__ws = NULL;
+ pdev = NULL;
+ } else {
+ ret = -1;
}
+unlock:
spin_unlock_irqrestore(&rtcdev_lock, flags);
+ platform_device_unregister(pdev);
wakeup_source_unregister(__ws);
- return 0;
+ return ret;
}
static inline void alarmtimer_rtc_timer_init(void)
@@ -829,9 +838,9 @@
if (flags == TIMER_ABSTIME)
return -ERESTARTNOHAND;
- restart->fn = alarm_timer_nsleep_restart;
restart->nanosleep.clockid = type;
restart->nanosleep.expires = exp;
+ set_restart_fn(restart, alarm_timer_nsleep_restart);
return ret;
}
@@ -874,8 +883,7 @@
*/
static int __init alarmtimer_init(void)
{
- struct platform_device *pdev;
- int error = 0;
+ int error;
int i;
alarmtimer_rtc_timer_init();
@@ -898,15 +906,7 @@
if (error)
goto out_if;
- pdev = platform_device_register_simple("alarmtimer", -1, NULL, 0);
- if (IS_ERR(pdev)) {
- error = PTR_ERR(pdev);
- goto out_drv;
- }
return 0;
-
-out_drv:
- platform_driver_unregister(&alarmtimer_driver);
out_if:
alarmtimer_rtc_interface_remove();
return error;
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index fff5f64..6863a05 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -124,6 +124,13 @@
#define WATCHDOG_INTERVAL (HZ >> 1)
#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
+/*
+ * Maximum permissible delay between two readouts of the watchdog
+ * clocksource surrounding a read of the clocksource being validated.
+ * This delay could be due to SMIs, NMIs, or to VCPU preemptions.
+ */
+#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+
static void clocksource_watchdog_work(struct work_struct *work)
{
/*
@@ -184,12 +191,45 @@
spin_unlock_irqrestore(&watchdog_lock, flags);
}
+static ulong max_cswd_read_retries = 3;
+module_param(max_cswd_read_retries, ulong, 0644);
+
+static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+{
+ unsigned int nretries;
+ u64 wd_end, wd_delta;
+ int64_t wd_delay;
+
+ for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
+ local_irq_disable();
+ *wdnow = watchdog->read(watchdog);
+ *csnow = cs->read(cs);
+ wd_end = watchdog->read(watchdog);
+ local_irq_enable();
+
+ wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
+ wd_delay = clocksource_cyc2ns(wd_delta, watchdog->mult,
+ watchdog->shift);
+ if (wd_delay <= WATCHDOG_MAX_SKEW) {
+ if (nretries > 1 || nretries >= max_cswd_read_retries) {
+ pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
+ smp_processor_id(), watchdog->name, nretries);
+ }
+ return true;
+ }
+ }
+
+ pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
+ smp_processor_id(), watchdog->name, wd_delay, nretries);
+ return false;
+}
+
static void clocksource_watchdog(struct timer_list *unused)
{
- struct clocksource *cs;
u64 csnow, wdnow, cslast, wdlast, delta;
- int64_t wd_nsec, cs_nsec;
int next_cpu, reset_pending;
+ int64_t wd_nsec, cs_nsec;
+ struct clocksource *cs;
spin_lock(&watchdog_lock);
if (!watchdog_running)
@@ -206,10 +246,11 @@
continue;
}
- local_irq_disable();
- csnow = cs->read(cs);
- wdnow = watchdog->read(watchdog);
- local_irq_enable();
+ if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
+ /* Clock readout unreliable, so give it up. */
+ __clocksource_unstable(cs);
+ continue;
+ }
/* Clocksource initialized ? */
if (!(cs->flags & CLOCK_SOURCE_WATCHDOG) ||
@@ -293,8 +334,15 @@
next_cpu = cpumask_next(raw_smp_processor_id(), cpu_online_mask);
if (next_cpu >= nr_cpu_ids)
next_cpu = cpumask_first(cpu_online_mask);
- watchdog_timer.expires += WATCHDOG_INTERVAL;
- add_timer_on(&watchdog_timer, next_cpu);
+
+ /*
+ * Arm timer if not already pending: could race with concurrent
+ * pair clocksource_stop_watchdog() clocksource_start_watchdog().
+ */
+ if (!timer_pending(&watchdog_timer)) {
+ watchdog_timer.expires += WATCHDOG_INTERVAL;
+ add_timer_on(&watchdog_timer, next_cpu);
+ }
out:
spin_unlock(&watchdog_lock);
}
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 6560553..e1e8d5d 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -547,8 +547,11 @@
}
/*
- * Recomputes cpu_base::*next_timer and returns the earliest expires_next but
- * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram.
+ * Recomputes cpu_base::*next_timer and returns the earliest expires_next
+ * but does not set cpu_base::*expires_next, that is done by
+ * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
+ * cpu_base::*expires_next right away, reprogramming logic would no longer
+ * work.
*
* When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
* those timers will get run whenever the softirq gets handled, at the end of
@@ -589,6 +592,37 @@
return expires_next;
}
+static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
+{
+ ktime_t expires_next, soft = KTIME_MAX;
+
+ /*
+ * If the soft interrupt has already been activated, ignore the
+ * soft bases. They will be handled in the already raised soft
+ * interrupt.
+ */
+ if (!cpu_base->softirq_activated) {
+ soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
+ /*
+ * Update the soft expiry time. clock_settime() might have
+ * affected it.
+ */
+ cpu_base->softirq_expires_next = soft;
+ }
+
+ expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+ /*
+ * If a softirq timer is expiring first, update cpu_base->next_timer
+ * and program the hardware with the soft expiry time.
+ */
+ if (expires_next > soft) {
+ cpu_base->next_timer = cpu_base->softirq_next_timer;
+ expires_next = soft;
+ }
+
+ return expires_next;
+}
+
static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
{
ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
@@ -629,23 +663,7 @@
{
ktime_t expires_next;
- /*
- * Find the current next expiration time.
- */
- expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
-
- if (cpu_base->next_timer && cpu_base->next_timer->is_soft) {
- /*
- * When the softirq is activated, hrtimer has to be
- * programmed with the first hard hrtimer because soft
- * timer interrupt could occur too late.
- */
- if (cpu_base->softirq_activated)
- expires_next = __hrtimer_get_next_event(cpu_base,
- HRTIMER_ACTIVE_HARD);
- else
- cpu_base->softirq_expires_next = expires_next;
- }
+ expires_next = hrtimer_update_next_event(cpu_base);
if (skip_equal && expires_next == cpu_base->expires_next)
return;
@@ -741,22 +759,6 @@
retrigger_next_event(NULL);
}
-static void clock_was_set_work(struct work_struct *work)
-{
- clock_was_set();
-}
-
-static DECLARE_WORK(hrtimer_work, clock_was_set_work);
-
-/*
- * Called from timekeeping and resume code to reprogram the hrtimer
- * interrupt device on all cpus.
- */
-void clock_was_set_delayed(void)
-{
- schedule_work(&hrtimer_work);
-}
-
#else
static inline int hrtimer_is_hres_enabled(void) { return 0; }
@@ -874,6 +876,22 @@
timerfd_clock_was_set();
}
+static void clock_was_set_work(struct work_struct *work)
+{
+ clock_was_set();
+}
+
+static DECLARE_WORK(hrtimer_work, clock_was_set_work);
+
+/*
+ * Called from timekeeping and resume code to reprogram the hrtimer
+ * interrupt device on all cpus and to notify timerfd.
+ */
+void clock_was_set_delayed(void)
+{
+ schedule_work(&hrtimer_work);
+}
+
/*
* During resume we might have to reprogram the high resolution timer
* interrupt on all online CPUs. However, all other CPUs will be
@@ -966,7 +984,8 @@
base->cpu_base->active_bases |= 1 << base->index;
- timer->state = HRTIMER_STATE_ENQUEUED;
+ /* Pairs with the lockless read in hrtimer_is_queued() */
+ WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED);
return timerqueue_add(&base->active, &timer->node);
}
@@ -988,7 +1007,8 @@
struct hrtimer_cpu_base *cpu_base = base->cpu_base;
u8 state = timer->state;
- timer->state = newstate;
+ /* Pairs with the lockless read in hrtimer_is_queued() */
+ WRITE_ONCE(timer->state, newstate);
if (!(state & HRTIMER_STATE_ENQUEUED))
return;
@@ -1011,11 +1031,13 @@
* remove hrtimer, called with base lock held
*/
static inline int
-remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart)
+remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base,
+ bool restart, bool keep_local)
{
- if (hrtimer_is_queued(timer)) {
- u8 state = timer->state;
- int reprogram;
+ u8 state = timer->state;
+
+ if (state & HRTIMER_STATE_ENQUEUED) {
+ bool reprogram;
/*
* Remove the timer and force reprogramming when high
@@ -1028,8 +1050,16 @@
debug_deactivate(timer);
reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
+ /*
+ * If the timer is not restarted then reprogramming is
+ * required if the timer is local. If it is local and about
+ * to be restarted, avoid programming it twice (on removal
+ * and a moment later when it's requeued).
+ */
if (!restart)
state = HRTIMER_STATE_INACTIVE;
+ else
+ reprogram &= !keep_local;
__remove_hrtimer(timer, base, state, reprogram);
return 1;
@@ -1083,9 +1113,31 @@
struct hrtimer_clock_base *base)
{
struct hrtimer_clock_base *new_base;
+ bool force_local, first;
- /* Remove an active timer from the queue: */
- remove_hrtimer(timer, base, true);
+ /*
+ * If the timer is on the local cpu base and is the first expiring
+ * timer then this might end up reprogramming the hardware twice
+ * (on removal and on enqueue). To avoid that by prevent the
+ * reprogram on removal, keep the timer local to the current CPU
+ * and enforce reprogramming after it is queued no matter whether
+ * it is the new first expiring timer again or not.
+ */
+ force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
+ force_local &= base->cpu_base->next_timer == timer;
+
+ /*
+ * Remove an active timer from the queue. In case it is not queued
+ * on the current CPU, make sure that remove_hrtimer() updates the
+ * remote data correctly.
+ *
+ * If it's on the current CPU and the first expiring timer, then
+ * skip reprogramming, keep the timer local and enforce
+ * reprogramming later if it was the first expiring timer. This
+ * avoids programming the underlying clock event twice (once at
+ * removal and once after enqueue).
+ */
+ remove_hrtimer(timer, base, true, force_local);
if (mode & HRTIMER_MODE_REL)
tim = ktime_add_safe(tim, base->get_time());
@@ -1095,9 +1147,24 @@
hrtimer_set_expires_range_ns(timer, tim, delta_ns);
/* Switch the timer base, if necessary: */
- new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED);
+ if (!force_local) {
+ new_base = switch_hrtimer_base(timer, base,
+ mode & HRTIMER_MODE_PINNED);
+ } else {
+ new_base = base;
+ }
- return enqueue_hrtimer(timer, new_base, mode);
+ first = enqueue_hrtimer(timer, new_base, mode);
+ if (!force_local)
+ return first;
+
+ /*
+ * Timer was forced to stay on the current CPU to avoid
+ * reprogramming on removal and enqueue. Force reprogram the
+ * hardware by evaluating the new first expiring timer.
+ */
+ hrtimer_force_reprogram(new_base->cpu_base, 1);
+ return 0;
}
/**
@@ -1163,7 +1230,7 @@
base = lock_hrtimer_base(timer, &flags);
if (!hrtimer_callback_running(timer))
- ret = remove_hrtimer(timer, base, false);
+ ret = remove_hrtimer(timer, base, false, false);
unlock_hrtimer_base(timer, &flags);
@@ -1637,8 +1704,8 @@
__hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
- /* Reevaluate the clock bases for the next expiry */
- expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
+ /* Reevaluate the clock bases for the [soft] next expiry */
+ expires_next = hrtimer_update_next_event(cpu_base);
/*
* Store the new expiry value so the migration code can verify
* against it.
@@ -1932,9 +1999,9 @@
}
restart = ¤t->restart_block;
- restart->fn = hrtimer_nanosleep_restart;
restart->nanosleep.clockid = t.timer.base->clockid;
restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
+ set_restart_fn(restart, hrtimer_nanosleep_restart);
out:
destroy_hrtimer_on_stack(&t.timer);
return ret;
diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c
index 77f1e56..62dc975 100644
--- a/kernel/time/itimer.c
+++ b/kernel/time/itimer.c
@@ -147,10 +147,6 @@
u64 oval, nval, ointerval, ninterval;
struct cpu_itimer *it = &tsk->signal->it[clock_id];
- /*
- * Use the to_ktime conversion because that clamps the maximum
- * value to KTIME_MAX and avoid multiplication overflows.
- */
nval = ktime_to_ns(timeval_to_ktime(value->it_value));
ninterval = ktime_to_ns(timeval_to_ktime(value->it_interval));
diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c
index ec960bb..200fb2d 100644
--- a/kernel/time/posix-clock.c
+++ b/kernel/time/posix-clock.c
@@ -14,8 +14,6 @@
#include "posix-timers.h"
-static void delete_clock(struct kref *kref);
-
/*
* Returns NULL if the posix_clock instance attached to 'fp' is old and stale.
*/
@@ -125,7 +123,7 @@
err = 0;
if (!err) {
- kref_get(&clk->kref);
+ get_device(clk->dev);
fp->private_data = clk;
}
out:
@@ -141,7 +139,7 @@
if (clk->ops.release)
err = clk->ops.release(clk);
- kref_put(&clk->kref, delete_clock);
+ put_device(clk->dev);
fp->private_data = NULL;
@@ -161,38 +159,35 @@
#endif
};
-int posix_clock_register(struct posix_clock *clk, dev_t devid)
+int posix_clock_register(struct posix_clock *clk, struct device *dev)
{
int err;
- kref_init(&clk->kref);
init_rwsem(&clk->rwsem);
cdev_init(&clk->cdev, &posix_clock_file_operations);
+ err = cdev_device_add(&clk->cdev, dev);
+ if (err) {
+ pr_err("%s unable to add device %d:%d\n",
+ dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt));
+ return err;
+ }
clk->cdev.owner = clk->ops.owner;
- err = cdev_add(&clk->cdev, devid, 1);
+ clk->dev = dev;
- return err;
+ return 0;
}
EXPORT_SYMBOL_GPL(posix_clock_register);
-static void delete_clock(struct kref *kref)
-{
- struct posix_clock *clk = container_of(kref, struct posix_clock, kref);
-
- if (clk->release)
- clk->release(clk);
-}
-
void posix_clock_unregister(struct posix_clock *clk)
{
- cdev_del(&clk->cdev);
+ cdev_device_del(&clk->cdev, clk->dev);
down_write(&clk->rwsem);
clk->zombie = true;
up_write(&clk->rwsem);
- kref_put(&clk->kref, delete_clock);
+ put_device(clk->dev);
}
EXPORT_SYMBOL_GPL(posix_clock_unregister);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 42d512f..eacb0ca 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -1335,8 +1335,8 @@
if (flags & TIMER_ABSTIME)
return -ERESTARTNOHAND;
- restart_block->fn = posix_cpu_nsleep_restart;
restart_block->nanosleep.clockid = which_clock;
+ set_restart_fn(restart_block, posix_cpu_nsleep_restart);
}
return error;
}
diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c
index 0ec5b7a..97d4a9d 100644
--- a/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@ -1169,8 +1169,8 @@
err = do_clock_adjtime(which_clock, &ktx);
- if (err >= 0)
- err = put_old_timex32(utp, &ktx);
+ if (err >= 0 && put_old_timex32(utp, &ktx))
+ return -EFAULT;
return err;
}
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index dbd6905..a5538dd 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -207,7 +207,8 @@
if (sched_clock_timer.function != NULL) {
/* update timeout for clock wrap */
- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
+ hrtimer_start(&sched_clock_timer, cd.wrap_kt,
+ HRTIMER_MODE_REL_HARD);
}
r = rate;
@@ -251,9 +252,9 @@
* Start the timer to keep sched_clock() properly updated and
* sets the initial epoch.
*/
- hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init(&sched_clock_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
sched_clock_timer.function = sched_clock_poll;
- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
+ hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD);
}
/*
@@ -290,7 +291,7 @@
struct clock_read_data *rd = &cd.read_data[0];
rd->epoch_cyc = cd.actual_read_sched_clock();
- hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL);
+ hrtimer_start(&sched_clock_timer, cd.wrap_kt, HRTIMER_MODE_REL_HARD);
rd->read_sched_clock = cd.actual_read_sched_clock;
}
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 59225b4..7e5d352 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -11,6 +11,7 @@
#include <linux/err.h>
#include <linux/hrtimer.h>
#include <linux/interrupt.h>
+#include <linux/nmi.h>
#include <linux/percpu.h>
#include <linux/profile.h>
#include <linux/sched.h>
@@ -558,6 +559,7 @@
trace_suspend_resume(TPS("timekeeping_freeze"),
smp_processor_id(), false);
} else {
+ touch_softlockup_watchdog();
tick_resume_local();
}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 7b24961..5294f5b 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -165,3 +165,6 @@
extern u64 get_next_timer_interrupt(unsigned long basej, u64 basem);
void timer_clear_idle(void);
+
+void clock_was_set(void);
+void clock_was_set_delayed(void);
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 9558517..4419486 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -58,8 +58,9 @@
/*
* Do a quick check without holding jiffies_lock:
+ * The READ_ONCE() pairs with two updates done later in this function.
*/
- delta = ktime_sub(now, last_jiffies_update);
+ delta = ktime_sub(now, READ_ONCE(last_jiffies_update));
if (delta < tick_period)
return;
@@ -70,8 +71,9 @@
if (delta >= tick_period) {
delta = ktime_sub(delta, tick_period);
- last_jiffies_update = ktime_add(last_jiffies_update,
- tick_period);
+ /* Pairs with the lockless read in this function. */
+ WRITE_ONCE(last_jiffies_update,
+ ktime_add(last_jiffies_update, tick_period));
/* Slow path for long timeouts */
if (unlikely(delta >= tick_period)) {
@@ -79,8 +81,10 @@
ticks = ktime_divns(delta, incr);
- last_jiffies_update = ktime_add_ns(last_jiffies_update,
- incr * ticks);
+ /* Pairs with the lockless read in this function. */
+ WRITE_ONCE(last_jiffies_update,
+ ktime_add_ns(last_jiffies_update,
+ incr * ticks));
}
do_timer(++ticks);
@@ -912,13 +916,6 @@
*/
if (tick_do_timer_cpu == cpu)
return false;
- /*
- * Boot safety: make sure the timekeeping duty has been
- * assigned before entering dyntick-idle mode,
- * tick_do_timer_cpu is TICK_DO_TIMER_BOOT
- */
- if (unlikely(tick_do_timer_cpu == TICK_DO_TIMER_BOOT))
- return false;
/* Should not happen for nohz-full */
if (WARN_ON_ONCE(tick_do_timer_cpu == TICK_DO_TIMER_NONE))
diff --git a/kernel/time/time.c b/kernel/time/time.c
index 5c54ca6..83f403e 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -881,7 +881,8 @@
ts->tv_sec = kts.tv_sec;
/* Zero out the padding for 32 bit systems or in compat mode */
- if (IS_ENABLED(CONFIG_64BIT_TIME) && in_compat_syscall())
+ if (IS_ENABLED(CONFIG_64BIT_TIME) && (!IS_ENABLED(CONFIG_64BIT) ||
+ in_compat_syscall()))
kts.tv_nsec &= 0xFFFFFFFFUL;
ts->tv_nsec = kts.tv_nsec;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ca69290..4fc2af4 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1005,9 +1005,8 @@
((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
return -EOVERFLOW;
tmp *= mult;
- rem *= mult;
- do_div(rem, div);
+ rem = div64_u64(rem * mult, div);
*base = tmp + rem;
return 0;
}
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 4820823..16a2b62 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -43,6 +43,7 @@
#include <linux/sched/debug.h>
#include <linux/slab.h>
#include <linux/compat.h>
+#include <linux/random.h>
#include <linux/uaccess.h>
#include <asm/unistd.h>
@@ -522,8 +523,8 @@
* Force expire obscene large timeouts to expire at the
* capacity limit of the wheel.
*/
- if (expires >= WHEEL_TIMEOUT_CUTOFF)
- expires = WHEEL_TIMEOUT_MAX;
+ if (delta >= WHEEL_TIMEOUT_CUTOFF)
+ expires = clk + WHEEL_TIMEOUT_MAX;
idx = calc_index(expires, LVL_DEPTH - 1);
}
@@ -585,7 +586,15 @@
* Set the next expiry time and kick the CPU so it can reevaluate the
* wheel:
*/
- base->next_expiry = timer->expires;
+ if (time_before(timer->expires, base->clk)) {
+ /*
+ * Prevent from forward_timer_base() moving the base->clk
+ * backward
+ */
+ base->next_expiry = base->clk;
+ } else {
+ base->next_expiry = timer->expires;
+ }
wake_up_nohz_cpu(base->cpu);
}
@@ -897,10 +906,13 @@
* If the next expiry value is > jiffies, then we fast forward to
* jiffies otherwise we forward to the next expiry value.
*/
- if (time_after(base->next_expiry, jnow))
+ if (time_after(base->next_expiry, jnow)) {
base->clk = jnow;
- else
+ } else {
+ if (WARN_ON_ONCE(time_before(base->next_expiry, base->clk)))
+ return;
base->clk = base->next_expiry;
+ }
#endif
}
@@ -1257,8 +1269,10 @@
static void timer_sync_wait_running(struct timer_base *base)
{
if (atomic_read(&base->timer_waiters)) {
+ raw_spin_unlock_irq(&base->lock);
spin_unlock(&base->expiry_lock);
spin_lock(&base->expiry_lock);
+ raw_spin_lock_irq(&base->lock);
}
}
@@ -1442,14 +1456,14 @@
if (timer->flags & TIMER_IRQSAFE) {
raw_spin_unlock(&base->lock);
call_timer_fn(timer, fn, baseclk);
- base->running_timer = NULL;
raw_spin_lock(&base->lock);
+ base->running_timer = NULL;
} else {
raw_spin_unlock_irq(&base->lock);
call_timer_fn(timer, fn, baseclk);
+ raw_spin_lock_irq(&base->lock);
base->running_timer = NULL;
timer_sync_wait_running(base);
- raw_spin_lock_irq(&base->lock);
}
}
}
diff --git a/kernel/time/vsyscall.c b/kernel/time/vsyscall.c
index 5ee0f77..9577c89 100644
--- a/kernel/time/vsyscall.c
+++ b/kernel/time/vsyscall.c
@@ -28,11 +28,6 @@
vdata[CS_RAW].mult = tk->tkr_raw.mult;
vdata[CS_RAW].shift = tk->tkr_raw.shift;
- /* CLOCK_REALTIME */
- vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
- vdso_ts->sec = tk->xtime_sec;
- vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
-
/* CLOCK_MONOTONIC */
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_MONOTONIC];
vdso_ts->sec = tk->xtime_sec + tk->wall_to_monotonic.tv_sec;
@@ -70,12 +65,6 @@
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_TAI];
vdso_ts->sec = tk->xtime_sec + (s64)tk->tai_offset;
vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
-
- /*
- * Read without the seqlock held by clock_getres().
- * Note: No need to have a second copy.
- */
- WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution);
}
void update_vsyscall(struct timekeeper *tk)
@@ -84,20 +73,17 @@
struct vdso_timestamp *vdso_ts;
u64 nsec;
- if (__arch_update_vdso_data()) {
- /*
- * Some architectures might want to skip the update of the
- * data page.
- */
- return;
- }
-
/* copy vsyscall data */
vdso_write_begin(vdata);
vdata[CS_HRES_COARSE].clock_mode = __arch_get_clock_mode(tk);
vdata[CS_RAW].clock_mode = __arch_get_clock_mode(tk);
+ /* CLOCK_REALTIME also required for time() */
+ vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME];
+ vdso_ts->sec = tk->xtime_sec;
+ vdso_ts->nsec = tk->tkr_mono.xtime_nsec;
+
/* CLOCK_REALTIME_COARSE */
vdso_ts = &vdata[CS_HRES_COARSE].basetime[CLOCK_REALTIME_COARSE];
vdso_ts->sec = tk->xtime_sec;
@@ -110,7 +96,18 @@
nsec = nsec + tk->wall_to_monotonic.tv_nsec;
vdso_ts->sec += __iter_div_u64_rem(nsec, NSEC_PER_SEC, &vdso_ts->nsec);
- update_vdso_data(vdata, tk);
+ /*
+ * Read without the seqlock held by clock_getres().
+ * Note: No need to have a second copy.
+ */
+ WRITE_ONCE(vdata[CS_HRES_COARSE].hrtimer_res, hrtimer_resolution);
+
+ /*
+ * Architectures can opt out of updating the high resolution part
+ * of the VDSO.
+ */
+ if (__arch_update_vdso_data())
+ update_vdso_data(vdata, tk);
__arch_update_vsyscall(vdata, tk);