Update Linux to v5.10.109
Sourced from [1]
[1] https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.109.tar.xz
Change-Id: I19bca9fc6762d4e63bcf3e4cba88bbe560d9c76c
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 6863a05..e34ceb9 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -93,6 +93,20 @@
static int finished_booting;
static u64 suspend_start;
+/*
+ * Threshold: 0.0312s, when doubled: 0.0625s.
+ * Also a default for cs->uncertainty_margin when registering clocks.
+ */
+#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 5)
+
+/*
+ * Maximum permissible delay between two readouts of the watchdog
+ * clocksource surrounding a read of the clocksource being validated.
+ * This delay could be due to SMIs, NMIs, or to VCPU preemptions. Used as
+ * a lower bound for cs->uncertainty_margin values when registering clocks.
+ */
+#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
+
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static void clocksource_watchdog_work(struct work_struct *work);
static void clocksource_select(void);
@@ -119,17 +133,9 @@
static void __clocksource_change_rating(struct clocksource *cs, int rating);
/*
- * Interval: 0.5sec Threshold: 0.0625s
+ * Interval: 0.5sec.
*/
#define WATCHDOG_INTERVAL (HZ >> 1)
-#define WATCHDOG_THRESHOLD (NSEC_PER_SEC >> 4)
-
-/*
- * Maximum permissible delay between two readouts of the watchdog
- * clocksource surrounding a read of the clocksource being validated.
- * This delay could be due to SMIs, NMIs, or to VCPU preemptions.
- */
-#define WATCHDOG_MAX_SKEW (100 * NSEC_PER_USEC)
static void clocksource_watchdog_work(struct work_struct *work)
{
@@ -194,17 +200,24 @@
static ulong max_cswd_read_retries = 3;
module_param(max_cswd_read_retries, ulong, 0644);
-static bool cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
+enum wd_read_status {
+ WD_READ_SUCCESS,
+ WD_READ_UNSTABLE,
+ WD_READ_SKIP
+};
+
+static enum wd_read_status cs_watchdog_read(struct clocksource *cs, u64 *csnow, u64 *wdnow)
{
unsigned int nretries;
- u64 wd_end, wd_delta;
- int64_t wd_delay;
+ u64 wd_end, wd_end2, wd_delta;
+ int64_t wd_delay, wd_seq_delay;
for (nretries = 0; nretries <= max_cswd_read_retries; nretries++) {
local_irq_disable();
*wdnow = watchdog->read(watchdog);
*csnow = cs->read(cs);
wd_end = watchdog->read(watchdog);
+ wd_end2 = watchdog->read(watchdog);
local_irq_enable();
wd_delta = clocksource_delta(wd_end, *wdnow, watchdog->mask);
@@ -215,13 +228,88 @@
pr_warn("timekeeping watchdog on CPU%d: %s retried %d times before success\n",
smp_processor_id(), watchdog->name, nretries);
}
- return true;
+ return WD_READ_SUCCESS;
}
+
+ /*
+ * Now compute delay in consecutive watchdog read to see if
+ * there is too much external interferences that cause
+ * significant delay in reading both clocksource and watchdog.
+ *
+ * If consecutive WD read-back delay > WATCHDOG_MAX_SKEW/2,
+ * report system busy, reinit the watchdog and skip the current
+ * watchdog test.
+ */
+ wd_delta = clocksource_delta(wd_end2, wd_end, watchdog->mask);
+ wd_seq_delay = clocksource_cyc2ns(wd_delta, watchdog->mult, watchdog->shift);
+ if (wd_seq_delay > WATCHDOG_MAX_SKEW/2)
+ goto skip_test;
}
pr_warn("timekeeping watchdog on CPU%d: %s read-back delay of %lldns, attempt %d, marking unstable\n",
smp_processor_id(), watchdog->name, wd_delay, nretries);
- return false;
+ return WD_READ_UNSTABLE;
+
+skip_test:
+ pr_info("timekeeping watchdog on CPU%d: %s wd-wd read-back delay of %lldns\n",
+ smp_processor_id(), watchdog->name, wd_seq_delay);
+ pr_info("wd-%s-wd read-back delay of %lldns, clock-skew test skipped!\n",
+ cs->name, wd_delay);
+ return WD_READ_SKIP;
+}
+
+static u64 csnow_mid;
+static cpumask_t cpus_ahead;
+static cpumask_t cpus_behind;
+
+static void clocksource_verify_one_cpu(void *csin)
+{
+ struct clocksource *cs = (struct clocksource *)csin;
+
+ csnow_mid = cs->read(cs);
+}
+
+static void clocksource_verify_percpu(struct clocksource *cs)
+{
+ int64_t cs_nsec, cs_nsec_max = 0, cs_nsec_min = LLONG_MAX;
+ u64 csnow_begin, csnow_end;
+ int cpu, testcpu;
+ s64 delta;
+
+ cpumask_clear(&cpus_ahead);
+ cpumask_clear(&cpus_behind);
+ preempt_disable();
+ testcpu = smp_processor_id();
+ pr_warn("Checking clocksource %s synchronization from CPU %d.\n", cs->name, testcpu);
+ for_each_online_cpu(cpu) {
+ if (cpu == testcpu)
+ continue;
+ csnow_begin = cs->read(cs);
+ smp_call_function_single(cpu, clocksource_verify_one_cpu, cs, 1);
+ csnow_end = cs->read(cs);
+ delta = (s64)((csnow_mid - csnow_begin) & cs->mask);
+ if (delta < 0)
+ cpumask_set_cpu(cpu, &cpus_behind);
+ delta = (csnow_end - csnow_mid) & cs->mask;
+ if (delta < 0)
+ cpumask_set_cpu(cpu, &cpus_ahead);
+ delta = clocksource_delta(csnow_end, csnow_begin, cs->mask);
+ cs_nsec = clocksource_cyc2ns(delta, cs->mult, cs->shift);
+ if (cs_nsec > cs_nsec_max)
+ cs_nsec_max = cs_nsec;
+ if (cs_nsec < cs_nsec_min)
+ cs_nsec_min = cs_nsec;
+ }
+ preempt_enable();
+ if (!cpumask_empty(&cpus_ahead))
+ pr_warn(" CPUs %*pbl ahead of CPU %d for clocksource %s.\n",
+ cpumask_pr_args(&cpus_ahead), testcpu, cs->name);
+ if (!cpumask_empty(&cpus_behind))
+ pr_warn(" CPUs %*pbl behind CPU %d for clocksource %s.\n",
+ cpumask_pr_args(&cpus_behind), testcpu, cs->name);
+ if (!cpumask_empty(&cpus_ahead) || !cpumask_empty(&cpus_behind))
+ pr_warn(" CPU %d check durations %lldns - %lldns for clocksource %s.\n",
+ testcpu, cs_nsec_min, cs_nsec_max, cs->name);
}
static void clocksource_watchdog(struct timer_list *unused)
@@ -230,6 +318,8 @@
int next_cpu, reset_pending;
int64_t wd_nsec, cs_nsec;
struct clocksource *cs;
+ enum wd_read_status read_ret;
+ u32 md;
spin_lock(&watchdog_lock);
if (!watchdog_running)
@@ -246,9 +336,12 @@
continue;
}
- if (!cs_watchdog_read(cs, &csnow, &wdnow)) {
- /* Clock readout unreliable, so give it up. */
- __clocksource_unstable(cs);
+ read_ret = cs_watchdog_read(cs, &csnow, &wdnow);
+
+ if (read_ret != WD_READ_SUCCESS) {
+ if (read_ret == WD_READ_UNSTABLE)
+ /* Clock readout unreliable, so give it up. */
+ __clocksource_unstable(cs);
continue;
}
@@ -276,7 +369,8 @@
continue;
/* Check the deviation from the watchdog clocksource. */
- if (abs(cs_nsec - wd_nsec) > WATCHDOG_THRESHOLD) {
+ md = cs->uncertainty_margin + watchdog->uncertainty_margin;
+ if (abs(cs_nsec - wd_nsec) > md) {
pr_warn("timekeeping watchdog on CPU%d: Marking clocksource '%s' as unstable because the skew is too large:\n",
smp_processor_id(), cs->name);
pr_warn(" '%s' wd_now: %llx wd_last: %llx mask: %llx\n",
@@ -448,6 +542,12 @@
unsigned long flags;
int select = 0;
+ /* Do any required per-CPU skew verification. */
+ if (curr_clocksource &&
+ curr_clocksource->flags & CLOCK_SOURCE_UNSTABLE &&
+ curr_clocksource->flags & CLOCK_SOURCE_VERIFY_PERCPU)
+ clocksource_verify_percpu(curr_clocksource);
+
spin_lock_irqsave(&watchdog_lock, flags);
list_for_each_entry_safe(cs, tmp, &watchdog_list, wd_list) {
if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
@@ -925,6 +1025,26 @@
clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
NSEC_PER_SEC / scale, sec * scale);
}
+
+ /*
+ * If the uncertainty margin is not specified, calculate it.
+ * If both scale and freq are non-zero, calculate the clock
+ * period, but bound below at 2*WATCHDOG_MAX_SKEW. However,
+ * if either of scale or freq is zero, be very conservative and
+ * take the tens-of-milliseconds WATCHDOG_THRESHOLD value for the
+ * uncertainty margin. Allow stupidly small uncertainty margins
+ * to be specified by the caller for testing purposes, but warn
+ * to discourage production use of this capability.
+ */
+ if (scale && freq && !cs->uncertainty_margin) {
+ cs->uncertainty_margin = NSEC_PER_SEC / (scale * freq);
+ if (cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW)
+ cs->uncertainty_margin = 2 * WATCHDOG_MAX_SKEW;
+ } else if (!cs->uncertainty_margin) {
+ cs->uncertainty_margin = WATCHDOG_THRESHOLD;
+ }
+ WARN_ON_ONCE(cs->uncertainty_margin < 2 * WATCHDOG_MAX_SKEW);
+
/*
* Ensure clocksources that have large 'mult' values don't overflow
* when adjusted.
@@ -969,6 +1089,13 @@
clocksource_arch_init(cs);
+ if (cs->vdso_clock_mode < 0 ||
+ cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
+ pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
+ cs->name, cs->vdso_clock_mode);
+ cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
+ }
+
/* Initialize mult/shift and max_idle_ns */
__clocksource_update_freq_scale(cs, scale, freq);