Blame - kernel/time/timekeeping.c - hafnium/third_party/linux

blob: ca69290bee2a3131358993e9a3bbc32c6a9a9231 [file] [log] [blame]

David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1	// SPDX-License-Identifier: GPL-2.0
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2	/*
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	3	* Kernel timekeeping code and accessor functions. Based on code from
				4	* timer.c, moved in commit 8524070b7982.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	5	*/
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	6	#include <linux/timekeeper_internal.h>
				7	#include <linux/module.h>
				8	#include <linux/interrupt.h>
				9	#include <linux/percpu.h>
				10	#include <linux/init.h>
				11	#include <linux/mm.h>
				12	#include <linux/nmi.h>
				13	#include <linux/sched.h>
				14	#include <linux/sched/loadavg.h>
				15	#include <linux/sched/clock.h>
				16	#include <linux/syscore_ops.h>
				17	#include <linux/clocksource.h>
				18	#include <linux/jiffies.h>
				19	#include <linux/time.h>
				20	#include <linux/tick.h>
				21	#include <linux/stop_machine.h>
				22	#include <linux/pvclock_gtod.h>
				23	#include <linux/compiler.h>
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	24	#include <linux/audit.h>
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	25
				26	#include "tick-internal.h"
				27	#include "ntp_internal.h"
				28	#include "timekeeping_internal.h"
				29
				30	#define TK_CLEAR_NTP (1 << 0)
				31	#define TK_MIRROR (1 << 1)
				32	#define TK_CLOCK_WAS_SET (1 << 2)
				33
				34	enum timekeeping_adv_mode {
				35	/* Update timekeeper when a tick has passed */
				36	TK_ADV_TICK,
				37
				38	/* Update timekeeper on a direct frequency change */
				39	TK_ADV_FREQ
				40	};
				41
				42	/*
				43	* The most important data for readout fits into a single 64 byte
				44	* cache line.
				45	*/
				46	static struct {
				47	seqcount_t seq;
				48	struct timekeeper timekeeper;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	49	} tk_core ____cacheline_aligned = {
				50	.seq = SEQCNT_ZERO(tk_core.seq),
				51	};
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	52
				53	static DEFINE_RAW_SPINLOCK(timekeeper_lock);
				54	static struct timekeeper shadow_timekeeper;
				55
				56	/**
				57	* struct tk_fast - NMI safe timekeeper
				58	* @seq: Sequence counter for protecting updates. The lowest bit
				59	* is the index for the tk_read_base array
				60	* @base: tk_read_base array. Access is indexed by the lowest bit of
				61	* @seq.
				62	*
				63	* See @update_fast_timekeeper() below.
				64	*/
				65	struct tk_fast {
				66	seqcount_t seq;
				67	struct tk_read_base base[2];
				68	};
				69
				70	/* Suspend-time cycles value for halted fast timekeeper. */
				71	static u64 cycles_at_suspend;
				72
				73	static u64 dummy_clock_read(struct clocksource *cs)
				74	{
				75	return cycles_at_suspend;
				76	}
				77
				78	static struct clocksource dummy_clock = {
				79	.read = dummy_clock_read,
				80	};
				81
				82	static struct tk_fast tk_fast_mono ____cacheline_aligned = {
				83	.base[0] = { .clock = &dummy_clock, },
				84	.base[1] = { .clock = &dummy_clock, },
				85	};
				86
				87	static struct tk_fast tk_fast_raw ____cacheline_aligned = {
				88	.base[0] = { .clock = &dummy_clock, },
				89	.base[1] = { .clock = &dummy_clock, },
				90	};
				91
				92	/* flag for if timekeeping is suspended */
				93	int __read_mostly timekeeping_suspended;
				94
				95	static inline void tk_normalize_xtime(struct timekeeper *tk)
				96	{
				97	while (tk->tkr_mono.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_mono.shift)) {
				98	tk->tkr_mono.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
				99	tk->xtime_sec++;
				100	}
				101	while (tk->tkr_raw.xtime_nsec >= ((u64)NSEC_PER_SEC << tk->tkr_raw.shift)) {
				102	tk->tkr_raw.xtime_nsec -= (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
				103	tk->raw_sec++;
				104	}
				105	}
				106
				107	static inline struct timespec64 tk_xtime(const struct timekeeper *tk)
				108	{
				109	struct timespec64 ts;
				110
				111	ts.tv_sec = tk->xtime_sec;
				112	ts.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
				113	return ts;
				114	}
				115
				116	static void tk_set_xtime(struct timekeeper tk, const struct timespec64 ts)
				117	{
				118	tk->xtime_sec = ts->tv_sec;
				119	tk->tkr_mono.xtime_nsec = (u64)ts->tv_nsec << tk->tkr_mono.shift;
				120	}
				121
				122	static void tk_xtime_add(struct timekeeper tk, const struct timespec64 ts)
				123	{
				124	tk->xtime_sec += ts->tv_sec;
				125	tk->tkr_mono.xtime_nsec += (u64)ts->tv_nsec << tk->tkr_mono.shift;
				126	tk_normalize_xtime(tk);
				127	}
				128
				129	static void tk_set_wall_to_mono(struct timekeeper *tk, struct timespec64 wtm)
				130	{
				131	struct timespec64 tmp;
				132
				133	/*
				134	* Verify consistency of: offset_real = -wall_to_monotonic
				135	* before modifying anything
				136	*/
				137	set_normalized_timespec64(&tmp, -tk->wall_to_monotonic.tv_sec,
				138	-tk->wall_to_monotonic.tv_nsec);
				139	WARN_ON_ONCE(tk->offs_real != timespec64_to_ktime(tmp));
				140	tk->wall_to_monotonic = wtm;
				141	set_normalized_timespec64(&tmp, -wtm.tv_sec, -wtm.tv_nsec);
				142	tk->offs_real = timespec64_to_ktime(tmp);
				143	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tk->tai_offset, 0));
				144	}
				145
				146	static inline void tk_update_sleep_time(struct timekeeper *tk, ktime_t delta)
				147	{
				148	tk->offs_boot = ktime_add(tk->offs_boot, delta);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	149	/*
				150	* Timespec representation for VDSO update to avoid 64bit division
				151	* on every update.
				152	*/
				153	tk->monotonic_to_boot = ktime_to_timespec64(tk->offs_boot);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	154	}
				155
				156	/*
				157	* tk_clock_read - atomic clocksource read() helper
				158	*
				159	* This helper is necessary to use in the read paths because, while the
				160	* seqlock ensures we don't return a bad value while structures are updated,
				161	* it doesn't protect from potential crashes. There is the possibility that
				162	* the tkr's clocksource may change between the read reference, and the
				163	* clock reference passed to the read function. This can cause crashes if
				164	* the wrong clocksource is passed to the wrong read function.
				165	* This isn't necessary to use when holding the timekeeper_lock or doing
				166	* a read of the fast-timekeeper tkrs (which is protected by its own locking
				167	* and update logic).
				168	*/
				169	static inline u64 tk_clock_read(const struct tk_read_base *tkr)
				170	{
				171	struct clocksource *clock = READ_ONCE(tkr->clock);
				172
				173	return clock->read(clock);
				174	}
				175
				176	#ifdef CONFIG_DEBUG_TIMEKEEPING
				177	#define WARNING_FREQ (HZ300) / 5 minute rate-limiting */
				178
				179	static void timekeeping_check_update(struct timekeeper *tk, u64 offset)
				180	{
				181
				182	u64 max_cycles = tk->tkr_mono.clock->max_cycles;
				183	const char *name = tk->tkr_mono.clock->name;
				184
				185	if (offset > max_cycles) {
				186	printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n",
				187	offset, name, max_cycles);
				188	printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n");
				189	} else {
				190	if (offset > (max_cycles >> 1)) {
				191	printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n",
				192	offset, name, max_cycles >> 1);
				193	printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n");
				194	}
				195	}
				196
				197	if (tk->underflow_seen) {
				198	if (jiffies - tk->last_warning > WARNING_FREQ) {
				199	printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name);
				200	printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
				201	printk_deferred(" Your kernel is probably still fine.\n");
				202	tk->last_warning = jiffies;
				203	}
				204	tk->underflow_seen = 0;
				205	}
				206
				207	if (tk->overflow_seen) {
				208	if (jiffies - tk->last_warning > WARNING_FREQ) {
				209	printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name);
				210	printk_deferred(" Please report this, consider using a different clocksource, if possible.\n");
				211	printk_deferred(" Your kernel is probably still fine.\n");
				212	tk->last_warning = jiffies;
				213	}
				214	tk->overflow_seen = 0;
				215	}
				216	}
				217
				218	static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
				219	{
				220	struct timekeeper *tk = &tk_core.timekeeper;
				221	u64 now, last, mask, max, delta;
				222	unsigned int seq;
				223
				224	/*
				225	* Since we're called holding a seqlock, the data may shift
				226	* under us while we're doing the calculation. This can cause
				227	* false positives, since we'd note a problem but throw the
				228	* results away. So nest another seqlock here to atomically
				229	* grab the points we are checking with.
				230	*/
				231	do {
				232	seq = read_seqcount_begin(&tk_core.seq);
				233	now = tk_clock_read(tkr);
				234	last = tkr->cycle_last;
				235	mask = tkr->mask;
				236	max = tkr->clock->max_cycles;
				237	} while (read_seqcount_retry(&tk_core.seq, seq));
				238
				239	delta = clocksource_delta(now, last, mask);
				240
				241	/*
				242	* Try to catch underflows by checking if we are seeing small
				243	* mask-relative negative values.
				244	*/
				245	if (unlikely((~delta & mask) < (mask >> 3))) {
				246	tk->underflow_seen = 1;
				247	delta = 0;
				248	}
				249
				250	/* Cap delta value to the max_cycles values to avoid mult overflows */
				251	if (unlikely(delta > max)) {
				252	tk->overflow_seen = 1;
				253	delta = tkr->clock->max_cycles;
				254	}
				255
				256	return delta;
				257	}
				258	#else
				259	static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset)
				260	{
				261	}
				262	static inline u64 timekeeping_get_delta(const struct tk_read_base *tkr)
				263	{
				264	u64 cycle_now, delta;
				265
				266	/* read clocksource */
				267	cycle_now = tk_clock_read(tkr);
				268
				269	/* calculate the delta since the last update_wall_time */
				270	delta = clocksource_delta(cycle_now, tkr->cycle_last, tkr->mask);
				271
				272	return delta;
				273	}
				274	#endif
				275
				276	/**
				277	* tk_setup_internals - Set up internals to use clocksource clock.
				278	*
				279	* @tk: The target timekeeper to setup.
				280	* @clock: Pointer to clocksource.
				281	*
				282	* Calculates a fixed cycle/nsec interval for a given clocksource/adjustment
				283	* pair and interval request.
				284	*
				285	* Unless you're the timekeeping code, you should not be using this!
				286	*/
				287	static void tk_setup_internals(struct timekeeper tk, struct clocksource clock)
				288	{
				289	u64 interval;
				290	u64 tmp, ntpinterval;
				291	struct clocksource *old_clock;
				292
				293	++tk->cs_was_changed_seq;
				294	old_clock = tk->tkr_mono.clock;
				295	tk->tkr_mono.clock = clock;
				296	tk->tkr_mono.mask = clock->mask;
				297	tk->tkr_mono.cycle_last = tk_clock_read(&tk->tkr_mono);
				298
				299	tk->tkr_raw.clock = clock;
				300	tk->tkr_raw.mask = clock->mask;
				301	tk->tkr_raw.cycle_last = tk->tkr_mono.cycle_last;
				302
				303	/* Do the ns -> cycle conversion first, using original mult */
				304	tmp = NTP_INTERVAL_LENGTH;
				305	tmp <<= clock->shift;
				306	ntpinterval = tmp;
				307	tmp += clock->mult/2;
				308	do_div(tmp, clock->mult);
				309	if (tmp == 0)
				310	tmp = 1;
				311
				312	interval = (u64) tmp;
				313	tk->cycle_interval = interval;
				314
				315	/* Go back from cycles -> shifted ns */
				316	tk->xtime_interval = interval * clock->mult;
				317	tk->xtime_remainder = ntpinterval - tk->xtime_interval;
				318	tk->raw_interval = interval * clock->mult;
				319
				320	/* if changing clocks, convert xtime_nsec shift units */
				321	if (old_clock) {
				322	int shift_change = clock->shift - old_clock->shift;
				323	if (shift_change < 0) {
				324	tk->tkr_mono.xtime_nsec >>= -shift_change;
				325	tk->tkr_raw.xtime_nsec >>= -shift_change;
				326	} else {
				327	tk->tkr_mono.xtime_nsec <<= shift_change;
				328	tk->tkr_raw.xtime_nsec <<= shift_change;
				329	}
				330	}
				331
				332	tk->tkr_mono.shift = clock->shift;
				333	tk->tkr_raw.shift = clock->shift;
				334
				335	tk->ntp_error = 0;
				336	tk->ntp_error_shift = NTP_SCALE_SHIFT - clock->shift;
				337	tk->ntp_tick = ntpinterval << tk->ntp_error_shift;
				338
				339	/*
				340	* The timekeeper keeps its own mult values for the currently
				341	* active clocksource. These value will be adjusted via NTP
				342	* to counteract clock drifting.
				343	*/
				344	tk->tkr_mono.mult = clock->mult;
				345	tk->tkr_raw.mult = clock->mult;
				346	tk->ntp_err_mult = 0;
				347	tk->skip_second_overflow = 0;
				348	}
				349
				350	/* Timekeeper helper functions. */
				351
				352	#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
				353	static u32 default_arch_gettimeoffset(void) { return 0; }
				354	u32 (*arch_gettimeoffset)(void) = default_arch_gettimeoffset;
				355	#else
				356	static inline u32 arch_gettimeoffset(void) { return 0; }
				357	#endif
				358
				359	static inline u64 timekeeping_delta_to_ns(const struct tk_read_base *tkr, u64 delta)
				360	{
				361	u64 nsec;
				362
				363	nsec = delta * tkr->mult + tkr->xtime_nsec;
				364	nsec >>= tkr->shift;
				365
				366	/* If arch requires, add in get_arch_timeoffset() */
				367	return nsec + arch_gettimeoffset();
				368	}
				369
				370	static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr)
				371	{
				372	u64 delta;
				373
				374	delta = timekeeping_get_delta(tkr);
				375	return timekeeping_delta_to_ns(tkr, delta);
				376	}
				377
				378	static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles)
				379	{
				380	u64 delta;
				381
				382	/* calculate the delta since the last update_wall_time */
				383	delta = clocksource_delta(cycles, tkr->cycle_last, tkr->mask);
				384	return timekeeping_delta_to_ns(tkr, delta);
				385	}
				386
				387	/**
				388	* update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper.
				389	* @tkr: Timekeeping readout base from which we take the update
				390	*
				391	* We want to use this from any context including NMI and tracing /
				392	* instrumenting the timekeeping code itself.
				393	*
				394	* Employ the latch technique; see @raw_write_seqcount_latch.
				395	*
				396	* So if a NMI hits the update of base[0] then it will use base[1]
				397	* which is still consistent. In the worst case this can result is a
				398	* slightly wrong timestamp (a few nanoseconds). See
				399	* @ktime_get_mono_fast_ns.
				400	*/
				401	static void update_fast_timekeeper(const struct tk_read_base *tkr,
				402	struct tk_fast *tkf)
				403	{
				404	struct tk_read_base *base = tkf->base;
				405
				406	/* Force readers off to base[1] */
				407	raw_write_seqcount_latch(&tkf->seq);
				408
				409	/* Update base[0] */
				410	memcpy(base, tkr, sizeof(*base));
				411
				412	/* Force readers back to base[0] */
				413	raw_write_seqcount_latch(&tkf->seq);
				414
				415	/* Update base[1] */
				416	memcpy(base + 1, base, sizeof(*base));
				417	}
				418
				419	/**
				420	* ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic
				421	*
				422	* This timestamp is not guaranteed to be monotonic across an update.
				423	* The timestamp is calculated by:
				424	*
				425	* now = base_mono + clock_delta * slope
				426	*
				427	* So if the update lowers the slope, readers who are forced to the
				428	* not yet updated second array are still using the old steeper slope.
				429	*
				430	* tmono
				431	* ^
				432	* \| o n
				433	* \| o n
				434	* \| u
				435	* \| o
				436	* \|o
				437	* \|12345678---> reader order
				438	*
				439	* o = old slope
				440	* u = update
				441	* n = new slope
				442	*
				443	* So reader 6 will observe time going backwards versus reader 5.
				444	*
				445	* While other CPUs are likely to be able observe that, the only way
				446	* for a CPU local observation is when an NMI hits in the middle of
				447	* the update. Timestamps taken from that NMI context might be ahead
				448	* of the following timestamps. Callers need to be aware of that and
				449	* deal with it.
				450	*/
				451	static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf)
				452	{
				453	struct tk_read_base *tkr;
				454	unsigned int seq;
				455	u64 now;
				456
				457	do {
				458	seq = raw_read_seqcount_latch(&tkf->seq);
				459	tkr = tkf->base + (seq & 0x01);
				460	now = ktime_to_ns(tkr->base);
				461
				462	now += timekeeping_delta_to_ns(tkr,
				463	clocksource_delta(
				464	tk_clock_read(tkr),
				465	tkr->cycle_last,
				466	tkr->mask));
				467	} while (read_seqcount_retry(&tkf->seq, seq));
				468
				469	return now;
				470	}
				471
				472	u64 ktime_get_mono_fast_ns(void)
				473	{
				474	return __ktime_get_fast_ns(&tk_fast_mono);
				475	}
				476	EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);
				477
				478	u64 ktime_get_raw_fast_ns(void)
				479	{
				480	return __ktime_get_fast_ns(&tk_fast_raw);
				481	}
				482	EXPORT_SYMBOL_GPL(ktime_get_raw_fast_ns);
				483
				484	/**
				485	* ktime_get_boot_fast_ns - NMI safe and fast access to boot clock.
				486	*
				487	* To keep it NMI safe since we're accessing from tracing, we're not using a
				488	* separate timekeeper with updates to monotonic clock and boot offset
				489	* protected with seqlocks. This has the following minor side effects:
				490	*
				491	* (1) Its possible that a timestamp be taken after the boot offset is updated
				492	* but before the timekeeper is updated. If this happens, the new boot offset
				493	* is added to the old timekeeping making the clock appear to update slightly
				494	* earlier:
				495	* CPU 0 CPU 1
				496	* timekeeping_inject_sleeptime64()
				497	* __timekeeping_inject_sleeptime(tk, delta);
				498	* timestamp();
				499	* timekeeping_update(tk, TK_CLEAR_NTP...);
				500	*
				501	* (2) On 32-bit systems, the 64-bit boot offset (tk->offs_boot) may be
				502	* partially updated. Since the tk->offs_boot update is a rare event, this
				503	* should be a rare occurrence which postprocessing should be able to handle.
				504	*/
				505	u64 notrace ktime_get_boot_fast_ns(void)
				506	{
				507	struct timekeeper *tk = &tk_core.timekeeper;
				508
				509	return (ktime_get_mono_fast_ns() + ktime_to_ns(tk->offs_boot));
				510	}
				511	EXPORT_SYMBOL_GPL(ktime_get_boot_fast_ns);
				512
				513
				514	/*
				515	* See comment for __ktime_get_fast_ns() vs. timestamp ordering
				516	*/
				517	static __always_inline u64 __ktime_get_real_fast_ns(struct tk_fast *tkf)
				518	{
				519	struct tk_read_base *tkr;
				520	unsigned int seq;
				521	u64 now;
				522
				523	do {
				524	seq = raw_read_seqcount_latch(&tkf->seq);
				525	tkr = tkf->base + (seq & 0x01);
				526	now = ktime_to_ns(tkr->base_real);
				527
				528	now += timekeeping_delta_to_ns(tkr,
				529	clocksource_delta(
				530	tk_clock_read(tkr),
				531	tkr->cycle_last,
				532	tkr->mask));
				533	} while (read_seqcount_retry(&tkf->seq, seq));
				534
				535	return now;
				536	}
				537
				538	/**
				539	* ktime_get_real_fast_ns: - NMI safe and fast access to clock realtime.
				540	*/
				541	u64 ktime_get_real_fast_ns(void)
				542	{
				543	return __ktime_get_real_fast_ns(&tk_fast_mono);
				544	}
				545	EXPORT_SYMBOL_GPL(ktime_get_real_fast_ns);
				546
				547	/**
				548	* halt_fast_timekeeper - Prevent fast timekeeper from accessing clocksource.
				549	* @tk: Timekeeper to snapshot.
				550	*
				551	* It generally is unsafe to access the clocksource after timekeeping has been
				552	* suspended, so take a snapshot of the readout base of @tk and use it as the
				553	* fast timekeeper's readout base while suspended. It will return the same
				554	* number of cycles every time until timekeeping is resumed at which time the
				555	* proper readout base for the fast timekeeper will be restored automatically.
				556	*/
				557	static void halt_fast_timekeeper(const struct timekeeper *tk)
				558	{
				559	static struct tk_read_base tkr_dummy;
				560	const struct tk_read_base *tkr = &tk->tkr_mono;
				561
				562	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
				563	cycles_at_suspend = tk_clock_read(tkr);
				564	tkr_dummy.clock = &dummy_clock;
				565	tkr_dummy.base_real = tkr->base + tk->offs_real;
				566	update_fast_timekeeper(&tkr_dummy, &tk_fast_mono);
				567
				568	tkr = &tk->tkr_raw;
				569	memcpy(&tkr_dummy, tkr, sizeof(tkr_dummy));
				570	tkr_dummy.clock = &dummy_clock;
				571	update_fast_timekeeper(&tkr_dummy, &tk_fast_raw);
				572	}
				573
				574	static RAW_NOTIFIER_HEAD(pvclock_gtod_chain);
				575
				576	static void update_pvclock_gtod(struct timekeeper *tk, bool was_set)
				577	{
				578	raw_notifier_call_chain(&pvclock_gtod_chain, was_set, tk);
				579	}
				580
				581	/**
				582	* pvclock_gtod_register_notifier - register a pvclock timedata update listener
				583	*/
				584	int pvclock_gtod_register_notifier(struct notifier_block *nb)
				585	{
				586	struct timekeeper *tk = &tk_core.timekeeper;
				587	unsigned long flags;
				588	int ret;
				589
				590	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				591	ret = raw_notifier_chain_register(&pvclock_gtod_chain, nb);
				592	update_pvclock_gtod(tk, true);
				593	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				594
				595	return ret;
				596	}
				597	EXPORT_SYMBOL_GPL(pvclock_gtod_register_notifier);
				598
				599	/**
				600	* pvclock_gtod_unregister_notifier - unregister a pvclock
				601	* timedata update listener
				602	*/
				603	int pvclock_gtod_unregister_notifier(struct notifier_block *nb)
				604	{
				605	unsigned long flags;
				606	int ret;
				607
				608	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				609	ret = raw_notifier_chain_unregister(&pvclock_gtod_chain, nb);
				610	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				611
				612	return ret;
				613	}
				614	EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
				615
				616	/*
				617	* tk_update_leap_state - helper to update the next_leap_ktime
				618	*/
				619	static inline void tk_update_leap_state(struct timekeeper *tk)
				620	{
				621	tk->next_leap_ktime = ntp_get_next_leap();
				622	if (tk->next_leap_ktime != KTIME_MAX)
				623	/* Convert to monotonic time */
				624	tk->next_leap_ktime = ktime_sub(tk->next_leap_ktime, tk->offs_real);
				625	}
				626
				627	/*
				628	* Update the ktime_t based scalar nsec members of the timekeeper
				629	*/
				630	static inline void tk_update_ktime_data(struct timekeeper *tk)
				631	{
				632	u64 seconds;
				633	u32 nsec;
				634
				635	/*
				636	* The xtime based monotonic readout is:
				637	* nsec = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec + now();
				638	* The ktime based monotonic readout is:
				639	* nsec = base_mono + now();
				640	* ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
				641	*/
				642	seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
				643	nsec = (u32) tk->wall_to_monotonic.tv_nsec;
				644	tk->tkr_mono.base = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
				645
				646	/*
				647	* The sum of the nanoseconds portions of xtime and
				648	* wall_to_monotonic can be greater/equal one second. Take
				649	* this into account before updating tk->ktime_sec.
				650	*/
				651	nsec += (u32)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
				652	if (nsec >= NSEC_PER_SEC)
				653	seconds++;
				654	tk->ktime_sec = seconds;
				655
				656	/* Update the monotonic raw base */
				657	tk->tkr_raw.base = ns_to_ktime(tk->raw_sec * NSEC_PER_SEC);
				658	}
				659
				660	/* must hold timekeeper_lock */
				661	static void timekeeping_update(struct timekeeper *tk, unsigned int action)
				662	{
				663	if (action & TK_CLEAR_NTP) {
				664	tk->ntp_error = 0;
				665	ntp_clear();
				666	}
				667
				668	tk_update_leap_state(tk);
				669	tk_update_ktime_data(tk);
				670
				671	update_vsyscall(tk);
				672	update_pvclock_gtod(tk, action & TK_CLOCK_WAS_SET);
				673
				674	tk->tkr_mono.base_real = tk->tkr_mono.base + tk->offs_real;
				675	update_fast_timekeeper(&tk->tkr_mono, &tk_fast_mono);
				676	update_fast_timekeeper(&tk->tkr_raw, &tk_fast_raw);
				677
				678	if (action & TK_CLOCK_WAS_SET)
				679	tk->clock_was_set_seq++;
				680	/*
				681	* The mirroring of the data to the shadow-timekeeper needs
				682	* to happen last here to ensure we don't over-write the
				683	* timekeeper structure on the next update with stale data
				684	*/
				685	if (action & TK_MIRROR)
				686	memcpy(&shadow_timekeeper, &tk_core.timekeeper,
				687	sizeof(tk_core.timekeeper));
				688	}
				689
				690	/**
				691	* timekeeping_forward_now - update clock to the current time
				692	*
				693	* Forward the current clock to update its state since the last call to
				694	* update_wall_time(). This is useful before significant clock changes,
				695	* as it avoids having to deal with this time offset explicitly.
				696	*/
				697	static void timekeeping_forward_now(struct timekeeper *tk)
				698	{
				699	u64 cycle_now, delta;
				700
				701	cycle_now = tk_clock_read(&tk->tkr_mono);
				702	delta = clocksource_delta(cycle_now, tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
				703	tk->tkr_mono.cycle_last = cycle_now;
				704	tk->tkr_raw.cycle_last = cycle_now;
				705
				706	tk->tkr_mono.xtime_nsec += delta * tk->tkr_mono.mult;
				707
				708	/* If arch requires, add in get_arch_timeoffset() */
				709	tk->tkr_mono.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_mono.shift;
				710
				711
				712	tk->tkr_raw.xtime_nsec += delta * tk->tkr_raw.mult;
				713
				714	/* If arch requires, add in get_arch_timeoffset() */
				715	tk->tkr_raw.xtime_nsec += (u64)arch_gettimeoffset() << tk->tkr_raw.shift;
				716
				717	tk_normalize_xtime(tk);
				718	}
				719
				720	/**
				721	* ktime_get_real_ts64 - Returns the time of day in a timespec64.
				722	* @ts: pointer to the timespec to be set
				723	*
				724	* Returns the time of day in a timespec64 (WARN if suspended).
				725	*/
				726	void ktime_get_real_ts64(struct timespec64 *ts)
				727	{
				728	struct timekeeper *tk = &tk_core.timekeeper;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	729	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	730	u64 nsecs;
				731
				732	WARN_ON(timekeeping_suspended);
				733
				734	do {
				735	seq = read_seqcount_begin(&tk_core.seq);
				736
				737	ts->tv_sec = tk->xtime_sec;
				738	nsecs = timekeeping_get_ns(&tk->tkr_mono);
				739
				740	} while (read_seqcount_retry(&tk_core.seq, seq));
				741
				742	ts->tv_nsec = 0;
				743	timespec64_add_ns(ts, nsecs);
				744	}
				745	EXPORT_SYMBOL(ktime_get_real_ts64);
				746
				747	ktime_t ktime_get(void)
				748	{
				749	struct timekeeper *tk = &tk_core.timekeeper;
				750	unsigned int seq;
				751	ktime_t base;
				752	u64 nsecs;
				753
				754	WARN_ON(timekeeping_suspended);
				755
				756	do {
				757	seq = read_seqcount_begin(&tk_core.seq);
				758	base = tk->tkr_mono.base;
				759	nsecs = timekeeping_get_ns(&tk->tkr_mono);
				760
				761	} while (read_seqcount_retry(&tk_core.seq, seq));
				762
				763	return ktime_add_ns(base, nsecs);
				764	}
				765	EXPORT_SYMBOL_GPL(ktime_get);
				766
				767	u32 ktime_get_resolution_ns(void)
				768	{
				769	struct timekeeper *tk = &tk_core.timekeeper;
				770	unsigned int seq;
				771	u32 nsecs;
				772
				773	WARN_ON(timekeeping_suspended);
				774
				775	do {
				776	seq = read_seqcount_begin(&tk_core.seq);
				777	nsecs = tk->tkr_mono.mult >> tk->tkr_mono.shift;
				778	} while (read_seqcount_retry(&tk_core.seq, seq));
				779
				780	return nsecs;
				781	}
				782	EXPORT_SYMBOL_GPL(ktime_get_resolution_ns);
				783
				784	static ktime_t *offsets[TK_OFFS_MAX] = {
				785	[TK_OFFS_REAL] = &tk_core.timekeeper.offs_real,
				786	[TK_OFFS_BOOT] = &tk_core.timekeeper.offs_boot,
				787	[TK_OFFS_TAI] = &tk_core.timekeeper.offs_tai,
				788	};
				789
				790	ktime_t ktime_get_with_offset(enum tk_offsets offs)
				791	{
				792	struct timekeeper *tk = &tk_core.timekeeper;
				793	unsigned int seq;
				794	ktime_t base, *offset = offsets[offs];
				795	u64 nsecs;
				796
				797	WARN_ON(timekeeping_suspended);
				798
				799	do {
				800	seq = read_seqcount_begin(&tk_core.seq);
				801	base = ktime_add(tk->tkr_mono.base, *offset);
				802	nsecs = timekeeping_get_ns(&tk->tkr_mono);
				803
				804	} while (read_seqcount_retry(&tk_core.seq, seq));
				805
				806	return ktime_add_ns(base, nsecs);
				807
				808	}
				809	EXPORT_SYMBOL_GPL(ktime_get_with_offset);
				810
				811	ktime_t ktime_get_coarse_with_offset(enum tk_offsets offs)
				812	{
				813	struct timekeeper *tk = &tk_core.timekeeper;
				814	unsigned int seq;
				815	ktime_t base, *offset = offsets[offs];
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	816	u64 nsecs;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	817
				818	WARN_ON(timekeeping_suspended);
				819
				820	do {
				821	seq = read_seqcount_begin(&tk_core.seq);
				822	base = ktime_add(tk->tkr_mono.base, *offset);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	823	nsecs = tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	824
				825	} while (read_seqcount_retry(&tk_core.seq, seq));
				826
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	827	return ktime_add_ns(base, nsecs);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	828	}
				829	EXPORT_SYMBOL_GPL(ktime_get_coarse_with_offset);
				830
				831	/**
				832	* ktime_mono_to_any() - convert mononotic time to any other time
				833	* @tmono: time to convert.
				834	* @offs: which offset to use
				835	*/
				836	ktime_t ktime_mono_to_any(ktime_t tmono, enum tk_offsets offs)
				837	{
				838	ktime_t *offset = offsets[offs];
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	839	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	840	ktime_t tconv;
				841
				842	do {
				843	seq = read_seqcount_begin(&tk_core.seq);
				844	tconv = ktime_add(tmono, *offset);
				845	} while (read_seqcount_retry(&tk_core.seq, seq));
				846
				847	return tconv;
				848	}
				849	EXPORT_SYMBOL_GPL(ktime_mono_to_any);
				850
				851	/**
				852	* ktime_get_raw - Returns the raw monotonic time in ktime_t format
				853	*/
				854	ktime_t ktime_get_raw(void)
				855	{
				856	struct timekeeper *tk = &tk_core.timekeeper;
				857	unsigned int seq;
				858	ktime_t base;
				859	u64 nsecs;
				860
				861	do {
				862	seq = read_seqcount_begin(&tk_core.seq);
				863	base = tk->tkr_raw.base;
				864	nsecs = timekeeping_get_ns(&tk->tkr_raw);
				865
				866	} while (read_seqcount_retry(&tk_core.seq, seq));
				867
				868	return ktime_add_ns(base, nsecs);
				869	}
				870	EXPORT_SYMBOL_GPL(ktime_get_raw);
				871
				872	/**
				873	* ktime_get_ts64 - get the monotonic clock in timespec64 format
				874	* @ts: pointer to timespec variable
				875	*
				876	* The function calculates the monotonic clock from the realtime
				877	* clock and the wall_to_monotonic offset and stores the result
				878	* in normalized timespec64 format in the variable pointed to by @ts.
				879	*/
				880	void ktime_get_ts64(struct timespec64 *ts)
				881	{
				882	struct timekeeper *tk = &tk_core.timekeeper;
				883	struct timespec64 tomono;
				884	unsigned int seq;
				885	u64 nsec;
				886
				887	WARN_ON(timekeeping_suspended);
				888
				889	do {
				890	seq = read_seqcount_begin(&tk_core.seq);
				891	ts->tv_sec = tk->xtime_sec;
				892	nsec = timekeeping_get_ns(&tk->tkr_mono);
				893	tomono = tk->wall_to_monotonic;
				894
				895	} while (read_seqcount_retry(&tk_core.seq, seq));
				896
				897	ts->tv_sec += tomono.tv_sec;
				898	ts->tv_nsec = 0;
				899	timespec64_add_ns(ts, nsec + tomono.tv_nsec);
				900	}
				901	EXPORT_SYMBOL_GPL(ktime_get_ts64);
				902
				903	/**
				904	* ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
				905	*
				906	* Returns the seconds portion of CLOCK_MONOTONIC with a single non
				907	* serialized read. tk->ktime_sec is of type 'unsigned long' so this
				908	* works on both 32 and 64 bit systems. On 32 bit systems the readout
				909	* covers ~136 years of uptime which should be enough to prevent
				910	* premature wrap arounds.
				911	*/
				912	time64_t ktime_get_seconds(void)
				913	{
				914	struct timekeeper *tk = &tk_core.timekeeper;
				915
				916	WARN_ON(timekeeping_suspended);
				917	return tk->ktime_sec;
				918	}
				919	EXPORT_SYMBOL_GPL(ktime_get_seconds);
				920
				921	/**
				922	* ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME
				923	*
				924	* Returns the wall clock seconds since 1970. This replaces the
				925	* get_seconds() interface which is not y2038 safe on 32bit systems.
				926	*
				927	* For 64bit systems the fast access to tk->xtime_sec is preserved. On
				928	* 32bit systems the access must be protected with the sequence
				929	* counter to provide "atomic" access to the 64bit tk->xtime_sec
				930	* value.
				931	*/
				932	time64_t ktime_get_real_seconds(void)
				933	{
				934	struct timekeeper *tk = &tk_core.timekeeper;
				935	time64_t seconds;
				936	unsigned int seq;
				937
				938	if (IS_ENABLED(CONFIG_64BIT))
				939	return tk->xtime_sec;
				940
				941	do {
				942	seq = read_seqcount_begin(&tk_core.seq);
				943	seconds = tk->xtime_sec;
				944
				945	} while (read_seqcount_retry(&tk_core.seq, seq));
				946
				947	return seconds;
				948	}
				949	EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
				950
				951	/**
				952	* __ktime_get_real_seconds - The same as ktime_get_real_seconds
				953	* but without the sequence counter protect. This internal function
				954	* is called just when timekeeping lock is already held.
				955	*/
				956	time64_t __ktime_get_real_seconds(void)
				957	{
				958	struct timekeeper *tk = &tk_core.timekeeper;
				959
				960	return tk->xtime_sec;
				961	}
				962
				963	/**
				964	* ktime_get_snapshot - snapshots the realtime/monotonic raw clocks with counter
				965	* @systime_snapshot: pointer to struct receiving the system time snapshot
				966	*/
				967	void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot)
				968	{
				969	struct timekeeper *tk = &tk_core.timekeeper;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	970	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	971	ktime_t base_raw;
				972	ktime_t base_real;
				973	u64 nsec_raw;
				974	u64 nsec_real;
				975	u64 now;
				976
				977	WARN_ON_ONCE(timekeeping_suspended);
				978
				979	do {
				980	seq = read_seqcount_begin(&tk_core.seq);
				981	now = tk_clock_read(&tk->tkr_mono);
				982	systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq;
				983	systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq;
				984	base_real = ktime_add(tk->tkr_mono.base,
				985	tk_core.timekeeper.offs_real);
				986	base_raw = tk->tkr_raw.base;
				987	nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono, now);
				988	nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw, now);
				989	} while (read_seqcount_retry(&tk_core.seq, seq));
				990
				991	systime_snapshot->cycles = now;
				992	systime_snapshot->real = ktime_add_ns(base_real, nsec_real);
				993	systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw);
				994	}
				995	EXPORT_SYMBOL_GPL(ktime_get_snapshot);
				996
				997	/* Scale base by mult/div checking for overflow */
				998	static int scale64_check_overflow(u64 mult, u64 div, u64 *base)
				999	{
				1000	u64 tmp, rem;
				1001
				1002	tmp = div64_u64_rem(*base, div, &rem);
				1003
				1004	if (((int)sizeof(u64)*8 - fls64(mult) < fls64(tmp)) \|\|
				1005	((int)sizeof(u64)*8 - fls64(mult) < fls64(rem)))
				1006	return -EOVERFLOW;
				1007	tmp *= mult;
				1008	rem *= mult;
				1009
				1010	do_div(rem, div);
				1011	*base = tmp + rem;
				1012	return 0;
				1013	}
				1014
				1015	/**
				1016	* adjust_historical_crosststamp - adjust crosstimestamp previous to current interval
				1017	* @history: Snapshot representing start of history
				1018	* @partial_history_cycles: Cycle offset into history (fractional part)
				1019	* @total_history_cycles: Total history length in cycles
				1020	* @discontinuity: True indicates clock was set on history period
				1021	* @ts: Cross timestamp that should be adjusted using
				1022	* partial/total ratio
				1023	*
				1024	* Helper function used by get_device_system_crosststamp() to correct the
				1025	* crosstimestamp corresponding to the start of the current interval to the
				1026	* system counter value (timestamp point) provided by the driver. The
				1027	* total_history_* quantities are the total history starting at the provided
				1028	* reference point and ending at the start of the current interval. The cycle
				1029	* count between the driver timestamp point and the start of the current
				1030	* interval is partial_history_cycles.
				1031	*/
				1032	static int adjust_historical_crosststamp(struct system_time_snapshot *history,
				1033	u64 partial_history_cycles,
				1034	u64 total_history_cycles,
				1035	bool discontinuity,
				1036	struct system_device_crosststamp *ts)
				1037	{
				1038	struct timekeeper *tk = &tk_core.timekeeper;
				1039	u64 corr_raw, corr_real;
				1040	bool interp_forward;
				1041	int ret;
				1042
				1043	if (total_history_cycles == 0 \|\| partial_history_cycles == 0)
				1044	return 0;
				1045
				1046	/* Interpolate shortest distance from beginning or end of history */
				1047	interp_forward = partial_history_cycles > total_history_cycles / 2;
				1048	partial_history_cycles = interp_forward ?
				1049	total_history_cycles - partial_history_cycles :
				1050	partial_history_cycles;
				1051
				1052	/*
				1053	* Scale the monotonic raw time delta by:
				1054	* partial_history_cycles / total_history_cycles
				1055	*/
				1056	corr_raw = (u64)ktime_to_ns(
				1057	ktime_sub(ts->sys_monoraw, history->raw));
				1058	ret = scale64_check_overflow(partial_history_cycles,
				1059	total_history_cycles, &corr_raw);
				1060	if (ret)
				1061	return ret;
				1062
				1063	/*
				1064	* If there is a discontinuity in the history, scale monotonic raw
				1065	* correction by:
				1066	* mult(real)/mult(raw) yielding the realtime correction
				1067	* Otherwise, calculate the realtime correction similar to monotonic
				1068	* raw calculation
				1069	*/
				1070	if (discontinuity) {
				1071	corr_real = mul_u64_u32_div
				1072	(corr_raw, tk->tkr_mono.mult, tk->tkr_raw.mult);
				1073	} else {
				1074	corr_real = (u64)ktime_to_ns(
				1075	ktime_sub(ts->sys_realtime, history->real));
				1076	ret = scale64_check_overflow(partial_history_cycles,
				1077	total_history_cycles, &corr_real);
				1078	if (ret)
				1079	return ret;
				1080	}
				1081
				1082	/* Fixup monotonic raw and real time time values */
				1083	if (interp_forward) {
				1084	ts->sys_monoraw = ktime_add_ns(history->raw, corr_raw);
				1085	ts->sys_realtime = ktime_add_ns(history->real, corr_real);
				1086	} else {
				1087	ts->sys_monoraw = ktime_sub_ns(ts->sys_monoraw, corr_raw);
				1088	ts->sys_realtime = ktime_sub_ns(ts->sys_realtime, corr_real);
				1089	}
				1090
				1091	return 0;
				1092	}
				1093
				1094	/*
				1095	* cycle_between - true if test occurs chronologically between before and after
				1096	*/
				1097	static bool cycle_between(u64 before, u64 test, u64 after)
				1098	{
				1099	if (test > before && test < after)
				1100	return true;
				1101	if (test < before && before > after)
				1102	return true;
				1103	return false;
				1104	}
				1105
				1106	/**
				1107	* get_device_system_crosststamp - Synchronously capture system/device timestamp
				1108	* @get_time_fn: Callback to get simultaneous device time and
				1109	* system counter from the device driver
				1110	* @ctx: Context passed to get_time_fn()
				1111	* @history_begin: Historical reference point used to interpolate system
				1112	* time when counter provided by the driver is before the current interval
				1113	* @xtstamp: Receives simultaneously captured system and device time
				1114	*
				1115	* Reads a timestamp from a device and correlates it to system time
				1116	*/
				1117	int get_device_system_crosststamp(int (*get_time_fn)
				1118	(ktime_t *device_time,
				1119	struct system_counterval_t *sys_counterval,
				1120	void *ctx),
				1121	void *ctx,
				1122	struct system_time_snapshot *history_begin,
				1123	struct system_device_crosststamp *xtstamp)
				1124	{
				1125	struct system_counterval_t system_counterval;
				1126	struct timekeeper *tk = &tk_core.timekeeper;
				1127	u64 cycles, now, interval_start;
				1128	unsigned int clock_was_set_seq = 0;
				1129	ktime_t base_real, base_raw;
				1130	u64 nsec_real, nsec_raw;
				1131	u8 cs_was_changed_seq;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1132	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1133	bool do_interp;
				1134	int ret;
				1135
				1136	do {
				1137	seq = read_seqcount_begin(&tk_core.seq);
				1138	/*
				1139	* Try to synchronously capture device time and a system
				1140	* counter value calling back into the device driver
				1141	*/
				1142	ret = get_time_fn(&xtstamp->device, &system_counterval, ctx);
				1143	if (ret)
				1144	return ret;
				1145
				1146	/*
				1147	* Verify that the clocksource associated with the captured
				1148	* system counter value is the same as the currently installed
				1149	* timekeeper clocksource
				1150	*/
				1151	if (tk->tkr_mono.clock != system_counterval.cs)
				1152	return -ENODEV;
				1153	cycles = system_counterval.cycles;
				1154
				1155	/*
				1156	* Check whether the system counter value provided by the
				1157	* device driver is on the current timekeeping interval.
				1158	*/
				1159	now = tk_clock_read(&tk->tkr_mono);
				1160	interval_start = tk->tkr_mono.cycle_last;
				1161	if (!cycle_between(interval_start, cycles, now)) {
				1162	clock_was_set_seq = tk->clock_was_set_seq;
				1163	cs_was_changed_seq = tk->cs_was_changed_seq;
				1164	cycles = interval_start;
				1165	do_interp = true;
				1166	} else {
				1167	do_interp = false;
				1168	}
				1169
				1170	base_real = ktime_add(tk->tkr_mono.base,
				1171	tk_core.timekeeper.offs_real);
				1172	base_raw = tk->tkr_raw.base;
				1173
				1174	nsec_real = timekeeping_cycles_to_ns(&tk->tkr_mono,
				1175	system_counterval.cycles);
				1176	nsec_raw = timekeeping_cycles_to_ns(&tk->tkr_raw,
				1177	system_counterval.cycles);
				1178	} while (read_seqcount_retry(&tk_core.seq, seq));
				1179
				1180	xtstamp->sys_realtime = ktime_add_ns(base_real, nsec_real);
				1181	xtstamp->sys_monoraw = ktime_add_ns(base_raw, nsec_raw);
				1182
				1183	/*
				1184	* Interpolate if necessary, adjusting back from the start of the
				1185	* current interval
				1186	*/
				1187	if (do_interp) {
				1188	u64 partial_history_cycles, total_history_cycles;
				1189	bool discontinuity;
				1190
				1191	/*
				1192	* Check that the counter value occurs after the provided
				1193	* history reference and that the history doesn't cross a
				1194	* clocksource change
				1195	*/
				1196	if (!history_begin \|\|
				1197	!cycle_between(history_begin->cycles,
				1198	system_counterval.cycles, cycles) \|\|
				1199	history_begin->cs_was_changed_seq != cs_was_changed_seq)
				1200	return -EINVAL;
				1201	partial_history_cycles = cycles - system_counterval.cycles;
				1202	total_history_cycles = cycles - history_begin->cycles;
				1203	discontinuity =
				1204	history_begin->clock_was_set_seq != clock_was_set_seq;
				1205
				1206	ret = adjust_historical_crosststamp(history_begin,
				1207	partial_history_cycles,
				1208	total_history_cycles,
				1209	discontinuity, xtstamp);
				1210	if (ret)
				1211	return ret;
				1212	}
				1213
				1214	return 0;
				1215	}
				1216	EXPORT_SYMBOL_GPL(get_device_system_crosststamp);
				1217
				1218	/**
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1219	* do_settimeofday64 - Sets the time of day.
				1220	* @ts: pointer to the timespec64 variable containing the new time
				1221	*
				1222	* Sets the time of day to the new time and update NTP and notify hrtimers
				1223	*/
				1224	int do_settimeofday64(const struct timespec64 *ts)
				1225	{
				1226	struct timekeeper *tk = &tk_core.timekeeper;
				1227	struct timespec64 ts_delta, xt;
				1228	unsigned long flags;
				1229	int ret = 0;
				1230
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1231	if (!timespec64_valid_settod(ts))
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1232	return -EINVAL;
				1233
				1234	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				1235	write_seqcount_begin(&tk_core.seq);
				1236
				1237	timekeeping_forward_now(tk);
				1238
				1239	xt = tk_xtime(tk);
				1240	ts_delta.tv_sec = ts->tv_sec - xt.tv_sec;
				1241	ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec;
				1242
				1243	if (timespec64_compare(&tk->wall_to_monotonic, &ts_delta) > 0) {
				1244	ret = -EINVAL;
				1245	goto out;
				1246	}
				1247
				1248	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
				1249
				1250	tk_set_xtime(tk, ts);
				1251	out:
				1252	timekeeping_update(tk, TK_CLEAR_NTP \| TK_MIRROR \| TK_CLOCK_WAS_SET);
				1253
				1254	write_seqcount_end(&tk_core.seq);
				1255	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				1256
				1257	/* signal hrtimers about time change */
				1258	clock_was_set();
				1259
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1260	if (!ret)
				1261	audit_tk_injoffset(ts_delta);
				1262
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1263	return ret;
				1264	}
				1265	EXPORT_SYMBOL(do_settimeofday64);
				1266
				1267	/**
				1268	* timekeeping_inject_offset - Adds or subtracts from the current time.
				1269	* @tv: pointer to the timespec variable containing the offset
				1270	*
				1271	* Adds or subtracts an offset value from the current time.
				1272	*/
				1273	static int timekeeping_inject_offset(const struct timespec64 *ts)
				1274	{
				1275	struct timekeeper *tk = &tk_core.timekeeper;
				1276	unsigned long flags;
				1277	struct timespec64 tmp;
				1278	int ret = 0;
				1279
				1280	if (ts->tv_nsec < 0 \|\| ts->tv_nsec >= NSEC_PER_SEC)
				1281	return -EINVAL;
				1282
				1283	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				1284	write_seqcount_begin(&tk_core.seq);
				1285
				1286	timekeeping_forward_now(tk);
				1287
				1288	/* Make sure the proposed value is valid */
				1289	tmp = timespec64_add(tk_xtime(tk), *ts);
				1290	if (timespec64_compare(&tk->wall_to_monotonic, ts) > 0 \|\|
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1291	!timespec64_valid_settod(&tmp)) {
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1292	ret = -EINVAL;
				1293	goto error;
				1294	}
				1295
				1296	tk_xtime_add(tk, ts);
				1297	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *ts));
				1298
				1299	error: /* even if we error out, we forwarded the time, so call update */
				1300	timekeeping_update(tk, TK_CLEAR_NTP \| TK_MIRROR \| TK_CLOCK_WAS_SET);
				1301
				1302	write_seqcount_end(&tk_core.seq);
				1303	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				1304
				1305	/* signal hrtimers about time change */
				1306	clock_was_set();
				1307
				1308	return ret;
				1309	}
				1310
				1311	/*
				1312	* Indicates if there is an offset between the system clock and the hardware
				1313	* clock/persistent clock/rtc.
				1314	*/
				1315	int persistent_clock_is_local;
				1316
				1317	/*
				1318	* Adjust the time obtained from the CMOS to be UTC time instead of
				1319	* local time.
				1320	*
				1321	* This is ugly, but preferable to the alternatives. Otherwise we
				1322	* would either need to write a program to do it in /etc/rc (and risk
				1323	* confusion if the program gets run more than once; it would also be
				1324	* hard to make the program warp the clock precisely n hours) or
				1325	* compile in the timezone information into the kernel. Bad, bad....
				1326	*
				1327	* - TYT, 1992-01-01
				1328	*
				1329	* The best thing to do is to keep the CMOS clock in universal time (UTC)
				1330	* as real UNIX machines always do it. This avoids all headaches about
				1331	* daylight saving times and warping kernel clocks.
				1332	*/
				1333	void timekeeping_warp_clock(void)
				1334	{
				1335	if (sys_tz.tz_minuteswest != 0) {
				1336	struct timespec64 adjust;
				1337
				1338	persistent_clock_is_local = 1;
				1339	adjust.tv_sec = sys_tz.tz_minuteswest * 60;
				1340	adjust.tv_nsec = 0;
				1341	timekeeping_inject_offset(&adjust);
				1342	}
				1343	}
				1344
				1345	/**
				1346	* __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic
				1347	*
				1348	*/
				1349	static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset)
				1350	{
				1351	tk->tai_offset = tai_offset;
				1352	tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0));
				1353	}
				1354
				1355	/**
				1356	* change_clocksource - Swaps clocksources if a new one is available
				1357	*
				1358	* Accumulates current time interval and initializes new clocksource
				1359	*/
				1360	static int change_clocksource(void *data)
				1361	{
				1362	struct timekeeper *tk = &tk_core.timekeeper;
				1363	struct clocksource new, old;
				1364	unsigned long flags;
				1365
				1366	new = (struct clocksource *) data;
				1367
				1368	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				1369	write_seqcount_begin(&tk_core.seq);
				1370
				1371	timekeeping_forward_now(tk);
				1372	/*
				1373	* If the cs is in module, get a module reference. Succeeds
				1374	* for built-in code (owner == NULL) as well.
				1375	*/
				1376	if (try_module_get(new->owner)) {
				1377	if (!new->enable \|\| new->enable(new) == 0) {
				1378	old = tk->tkr_mono.clock;
				1379	tk_setup_internals(tk, new);
				1380	if (old->disable)
				1381	old->disable(old);
				1382	module_put(old->owner);
				1383	} else {
				1384	module_put(new->owner);
				1385	}
				1386	}
				1387	timekeeping_update(tk, TK_CLEAR_NTP \| TK_MIRROR \| TK_CLOCK_WAS_SET);
				1388
				1389	write_seqcount_end(&tk_core.seq);
				1390	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				1391
				1392	return 0;
				1393	}
				1394
				1395	/**
				1396	* timekeeping_notify - Install a new clock source
				1397	* @clock: pointer to the clock source
				1398	*
				1399	* This function is called from clocksource.c after a new, better clock
				1400	* source has been registered. The caller holds the clocksource_mutex.
				1401	*/
				1402	int timekeeping_notify(struct clocksource *clock)
				1403	{
				1404	struct timekeeper *tk = &tk_core.timekeeper;
				1405
				1406	if (tk->tkr_mono.clock == clock)
				1407	return 0;
				1408	stop_machine(change_clocksource, clock, NULL);
				1409	tick_clock_notify();
				1410	return tk->tkr_mono.clock == clock ? 0 : -1;
				1411	}
				1412
				1413	/**
				1414	* ktime_get_raw_ts64 - Returns the raw monotonic time in a timespec
				1415	* @ts: pointer to the timespec64 to be set
				1416	*
				1417	* Returns the raw monotonic time (completely un-modified by ntp)
				1418	*/
				1419	void ktime_get_raw_ts64(struct timespec64 *ts)
				1420	{
				1421	struct timekeeper *tk = &tk_core.timekeeper;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1422	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1423	u64 nsecs;
				1424
				1425	do {
				1426	seq = read_seqcount_begin(&tk_core.seq);
				1427	ts->tv_sec = tk->raw_sec;
				1428	nsecs = timekeeping_get_ns(&tk->tkr_raw);
				1429
				1430	} while (read_seqcount_retry(&tk_core.seq, seq));
				1431
				1432	ts->tv_nsec = 0;
				1433	timespec64_add_ns(ts, nsecs);
				1434	}
				1435	EXPORT_SYMBOL(ktime_get_raw_ts64);
				1436
				1437
				1438	/**
				1439	* timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
				1440	*/
				1441	int timekeeping_valid_for_hres(void)
				1442	{
				1443	struct timekeeper *tk = &tk_core.timekeeper;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1444	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1445	int ret;
				1446
				1447	do {
				1448	seq = read_seqcount_begin(&tk_core.seq);
				1449
				1450	ret = tk->tkr_mono.clock->flags & CLOCK_SOURCE_VALID_FOR_HRES;
				1451
				1452	} while (read_seqcount_retry(&tk_core.seq, seq));
				1453
				1454	return ret;
				1455	}
				1456
				1457	/**
				1458	* timekeeping_max_deferment - Returns max time the clocksource can be deferred
				1459	*/
				1460	u64 timekeeping_max_deferment(void)
				1461	{
				1462	struct timekeeper *tk = &tk_core.timekeeper;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1463	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1464	u64 ret;
				1465
				1466	do {
				1467	seq = read_seqcount_begin(&tk_core.seq);
				1468
				1469	ret = tk->tkr_mono.clock->max_idle_ns;
				1470
				1471	} while (read_seqcount_retry(&tk_core.seq, seq));
				1472
				1473	return ret;
				1474	}
				1475
				1476	/**
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1477	* read_persistent_clock64 - Return time from the persistent clock.
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1478	*
				1479	* Weak dummy function for arches that do not yet support it.
				1480	* Reads the time from the battery backed persistent clock.
				1481	* Returns a timespec with tv_sec=0 and tv_nsec=0 if unsupported.
				1482	*
				1483	* XXX - Do be sure to remove it once all arches implement it.
				1484	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1485	void __weak read_persistent_clock64(struct timespec64 *ts)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1486	{
				1487	ts->tv_sec = 0;
				1488	ts->tv_nsec = 0;
				1489	}
				1490
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1491	/**
				1492	* read_persistent_wall_and_boot_offset - Read persistent clock, and also offset
				1493	* from the boot.
				1494	*
				1495	* Weak dummy function for arches that do not yet support it.
				1496	* wall_time - current time as returned by persistent clock
				1497	* boot_offset - offset that is defined as wall_time - boot_time
				1498	* The default function calculates offset based on the current value of
				1499	* local_clock(). This way architectures that support sched_clock() but don't
				1500	* support dedicated boot time clock will provide the best estimate of the
				1501	* boot time.
				1502	*/
				1503	void __weak __init
				1504	read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
				1505	struct timespec64 *boot_offset)
				1506	{
				1507	read_persistent_clock64(wall_time);
				1508	*boot_offset = ns_to_timespec64(local_clock());
				1509	}
				1510
				1511	/*
				1512	* Flag reflecting whether timekeeping_resume() has injected sleeptime.
				1513	*
				1514	* The flag starts of false and is only set when a suspend reaches
				1515	* timekeeping_suspend(), timekeeping_resume() sets it to false when the
				1516	* timekeeper clocksource is not stopping across suspend and has been
				1517	* used to update sleep time. If the timekeeper clocksource has stopped
				1518	* then the flag stays true and is used by the RTC resume code to decide
				1519	* whether sleeptime must be injected and if so the flag gets false then.
				1520	*
				1521	* If a suspend fails before reaching timekeeping_resume() then the flag
				1522	* stays false and prevents erroneous sleeptime injection.
				1523	*/
				1524	static bool suspend_timing_needed;
				1525
				1526	/* Flag for if there is a persistent clock on this platform */
				1527	static bool persistent_clock_exists;
				1528
				1529	/*
				1530	* timekeeping_init - Initializes the clocksource and common timekeeping values
				1531	*/
				1532	void __init timekeeping_init(void)
				1533	{
				1534	struct timespec64 wall_time, boot_offset, wall_to_mono;
				1535	struct timekeeper *tk = &tk_core.timekeeper;
				1536	struct clocksource *clock;
				1537	unsigned long flags;
				1538
				1539	read_persistent_wall_and_boot_offset(&wall_time, &boot_offset);
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	1540	if (timespec64_valid_settod(&wall_time) &&
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	1541	timespec64_to_ns(&wall_time) > 0) {
				1542	persistent_clock_exists = true;
				1543	} else if (timespec64_to_ns(&wall_time) != 0) {
				1544	pr_warn("Persistent clock returned invalid value");
				1545	wall_time = (struct timespec64){0};
				1546	}
				1547
				1548	if (timespec64_compare(&wall_time, &boot_offset) < 0)
				1549	boot_offset = (struct timespec64){0};
				1550
				1551	/*
				1552	* We want set wall_to_mono, so the following is true:
				1553	* wall time + wall_to_mono = boot time
				1554	*/
				1555	wall_to_mono = timespec64_sub(boot_offset, wall_time);
				1556
				1557	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				1558	write_seqcount_begin(&tk_core.seq);
				1559	ntp_init();
				1560
				1561	clock = clocksource_default_clock();
				1562	if (clock->enable)
				1563	clock->enable(clock);
				1564	tk_setup_internals(tk, clock);
				1565
				1566	tk_set_xtime(tk, &wall_time);
				1567	tk->raw_sec = 0;
				1568
				1569	tk_set_wall_to_mono(tk, wall_to_mono);
				1570
				1571	timekeeping_update(tk, TK_MIRROR \| TK_CLOCK_WAS_SET);
				1572
				1573	write_seqcount_end(&tk_core.seq);
				1574	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				1575	}
				1576
				1577	/* time in seconds when suspend began for persistent clock */
				1578	static struct timespec64 timekeeping_suspend_time;
				1579
				1580	/**
				1581	* __timekeeping_inject_sleeptime - Internal function to add sleep interval
				1582	* @delta: pointer to a timespec delta value
				1583	*
				1584	* Takes a timespec offset measuring a suspend interval and properly
				1585	* adds the sleep offset to the timekeeping variables.
				1586	*/
				1587	static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
				1588	const struct timespec64 *delta)
				1589	{
				1590	if (!timespec64_valid_strict(delta)) {
				1591	printk_deferred(KERN_WARNING
				1592	"__timekeeping_inject_sleeptime: Invalid "
				1593	"sleep delta value!\n");
				1594	return;
				1595	}
				1596	tk_xtime_add(tk, delta);
				1597	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, *delta));
				1598	tk_update_sleep_time(tk, timespec64_to_ktime(*delta));
				1599	tk_debug_account_sleep_time(delta);
				1600	}
				1601
				1602	#if defined(CONFIG_PM_SLEEP) && defined(CONFIG_RTC_HCTOSYS_DEVICE)
				1603	/**
				1604	* We have three kinds of time sources to use for sleep time
				1605	* injection, the preference order is:
				1606	* 1) non-stop clocksource
				1607	* 2) persistent clock (ie: RTC accessible when irqs are off)
				1608	* 3) RTC
				1609	*
				1610	* 1) and 2) are used by timekeeping, 3) by RTC subsystem.
				1611	* If system has neither 1) nor 2), 3) will be used finally.
				1612	*
				1613	*
				1614	* If timekeeping has injected sleeptime via either 1) or 2),
				1615	* 3) becomes needless, so in this case we don't need to call
				1616	* rtc_resume(), and this is what timekeeping_rtc_skipresume()
				1617	* means.
				1618	*/
				1619	bool timekeeping_rtc_skipresume(void)
				1620	{
				1621	return !suspend_timing_needed;
				1622	}
				1623
				1624	/**
				1625	* 1) can be determined whether to use or not only when doing
				1626	* timekeeping_resume() which is invoked after rtc_suspend(),
				1627	* so we can't skip rtc_suspend() surely if system has 1).
				1628	*
				1629	* But if system has 2), 2) will definitely be used, so in this
				1630	* case we don't need to call rtc_suspend(), and this is what
				1631	* timekeeping_rtc_skipsuspend() means.
				1632	*/
				1633	bool timekeeping_rtc_skipsuspend(void)
				1634	{
				1635	return persistent_clock_exists;
				1636	}
				1637
				1638	/**
				1639	* timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
				1640	* @delta: pointer to a timespec64 delta value
				1641	*
				1642	* This hook is for architectures that cannot support read_persistent_clock64
				1643	* because their RTC/persistent clock is only accessible when irqs are enabled.
				1644	* and also don't have an effective nonstop clocksource.
				1645	*
				1646	* This function should only be called by rtc_resume(), and allows
				1647	* a suspend offset to be injected into the timekeeping values.
				1648	*/
				1649	void timekeeping_inject_sleeptime64(const struct timespec64 *delta)
				1650	{
				1651	struct timekeeper *tk = &tk_core.timekeeper;
				1652	unsigned long flags;
				1653
				1654	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				1655	write_seqcount_begin(&tk_core.seq);
				1656
				1657	suspend_timing_needed = false;
				1658
				1659	timekeeping_forward_now(tk);
				1660
				1661	__timekeeping_inject_sleeptime(tk, delta);
				1662
				1663	timekeeping_update(tk, TK_CLEAR_NTP \| TK_MIRROR \| TK_CLOCK_WAS_SET);
				1664
				1665	write_seqcount_end(&tk_core.seq);
				1666	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				1667
				1668	/* signal hrtimers about time change */
				1669	clock_was_set();
				1670	}
				1671	#endif
				1672
				1673	/**
				1674	* timekeeping_resume - Resumes the generic timekeeping subsystem.
				1675	*/
				1676	void timekeeping_resume(void)
				1677	{
				1678	struct timekeeper *tk = &tk_core.timekeeper;
				1679	struct clocksource *clock = tk->tkr_mono.clock;
				1680	unsigned long flags;
				1681	struct timespec64 ts_new, ts_delta;
				1682	u64 cycle_now, nsec;
				1683	bool inject_sleeptime = false;
				1684
				1685	read_persistent_clock64(&ts_new);
				1686
				1687	clockevents_resume();
				1688	clocksource_resume();
				1689
				1690	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				1691	write_seqcount_begin(&tk_core.seq);
				1692
				1693	/*
				1694	* After system resumes, we need to calculate the suspended time and
				1695	* compensate it for the OS time. There are 3 sources that could be
				1696	* used: Nonstop clocksource during suspend, persistent clock and rtc
				1697	* device.
				1698	*
				1699	* One specific platform may have 1 or 2 or all of them, and the
				1700	* preference will be:
				1701	* suspend-nonstop clocksource -> persistent clock -> rtc
				1702	* The less preferred source will only be tried if there is no better
				1703	* usable source. The rtc part is handled separately in rtc core code.
				1704	*/
				1705	cycle_now = tk_clock_read(&tk->tkr_mono);
				1706	nsec = clocksource_stop_suspend_timing(clock, cycle_now);
				1707	if (nsec > 0) {
				1708	ts_delta = ns_to_timespec64(nsec);
				1709	inject_sleeptime = true;
				1710	} else if (timespec64_compare(&ts_new, &timekeeping_suspend_time) > 0) {
				1711	ts_delta = timespec64_sub(ts_new, timekeeping_suspend_time);
				1712	inject_sleeptime = true;
				1713	}
				1714
				1715	if (inject_sleeptime) {
				1716	suspend_timing_needed = false;
				1717	__timekeeping_inject_sleeptime(tk, &ts_delta);
				1718	}
				1719
				1720	/* Re-base the last cycle value */
				1721	tk->tkr_mono.cycle_last = cycle_now;
				1722	tk->tkr_raw.cycle_last = cycle_now;
				1723
				1724	tk->ntp_error = 0;
				1725	timekeeping_suspended = 0;
				1726	timekeeping_update(tk, TK_MIRROR \| TK_CLOCK_WAS_SET);
				1727	write_seqcount_end(&tk_core.seq);
				1728	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				1729
				1730	touch_softlockup_watchdog();
				1731
				1732	tick_resume();
				1733	hrtimers_resume();
				1734	}
				1735
				1736	int timekeeping_suspend(void)
				1737	{
				1738	struct timekeeper *tk = &tk_core.timekeeper;
				1739	unsigned long flags;
				1740	struct timespec64 delta, delta_delta;
				1741	static struct timespec64 old_delta;
				1742	struct clocksource *curr_clock;
				1743	u64 cycle_now;
				1744
				1745	read_persistent_clock64(&timekeeping_suspend_time);
				1746
				1747	/*
				1748	* On some systems the persistent_clock can not be detected at
				1749	* timekeeping_init by its return value, so if we see a valid
				1750	* value returned, update the persistent_clock_exists flag.
				1751	*/
				1752	if (timekeeping_suspend_time.tv_sec \|\| timekeeping_suspend_time.tv_nsec)
				1753	persistent_clock_exists = true;
				1754
				1755	suspend_timing_needed = true;
				1756
				1757	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				1758	write_seqcount_begin(&tk_core.seq);
				1759	timekeeping_forward_now(tk);
				1760	timekeeping_suspended = 1;
				1761
				1762	/*
				1763	* Since we've called forward_now, cycle_last stores the value
				1764	* just read from the current clocksource. Save this to potentially
				1765	* use in suspend timing.
				1766	*/
				1767	curr_clock = tk->tkr_mono.clock;
				1768	cycle_now = tk->tkr_mono.cycle_last;
				1769	clocksource_start_suspend_timing(curr_clock, cycle_now);
				1770
				1771	if (persistent_clock_exists) {
				1772	/*
				1773	* To avoid drift caused by repeated suspend/resumes,
				1774	* which each can add ~1 second drift error,
				1775	* try to compensate so the difference in system time
				1776	* and persistent_clock time stays close to constant.
				1777	*/
				1778	delta = timespec64_sub(tk_xtime(tk), timekeeping_suspend_time);
				1779	delta_delta = timespec64_sub(delta, old_delta);
				1780	if (abs(delta_delta.tv_sec) >= 2) {
				1781	/*
				1782	* if delta_delta is too large, assume time correction
				1783	* has occurred and set old_delta to the current delta.
				1784	*/
				1785	old_delta = delta;
				1786	} else {
				1787	/* Otherwise try to adjust old_system to compensate */
				1788	timekeeping_suspend_time =
				1789	timespec64_add(timekeeping_suspend_time, delta_delta);
				1790	}
				1791	}
				1792
				1793	timekeeping_update(tk, TK_MIRROR);
				1794	halt_fast_timekeeper(tk);
				1795	write_seqcount_end(&tk_core.seq);
				1796	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				1797
				1798	tick_suspend();
				1799	clocksource_suspend();
				1800	clockevents_suspend();
				1801
				1802	return 0;
				1803	}
				1804
				1805	/* sysfs resume/suspend bits for timekeeping */
				1806	static struct syscore_ops timekeeping_syscore_ops = {
				1807	.resume = timekeeping_resume,
				1808	.suspend = timekeeping_suspend,
				1809	};
				1810
				1811	static int __init timekeeping_init_ops(void)
				1812	{
				1813	register_syscore_ops(&timekeeping_syscore_ops);
				1814	return 0;
				1815	}
				1816	device_initcall(timekeeping_init_ops);
				1817
				1818	/*
				1819	* Apply a multiplier adjustment to the timekeeper
				1820	*/
				1821	static __always_inline void timekeeping_apply_adjustment(struct timekeeper *tk,
				1822	s64 offset,
				1823	s32 mult_adj)
				1824	{
				1825	s64 interval = tk->cycle_interval;
				1826
				1827	if (mult_adj == 0) {
				1828	return;
				1829	} else if (mult_adj == -1) {
				1830	interval = -interval;
				1831	offset = -offset;
				1832	} else if (mult_adj != 1) {
				1833	interval *= mult_adj;
				1834	offset *= mult_adj;
				1835	}
				1836
				1837	/*
				1838	* So the following can be confusing.
				1839	*
				1840	* To keep things simple, lets assume mult_adj == 1 for now.
				1841	*
				1842	* When mult_adj != 1, remember that the interval and offset values
				1843	* have been appropriately scaled so the math is the same.
				1844	*
				1845	* The basic idea here is that we're increasing the multiplier
				1846	* by one, this causes the xtime_interval to be incremented by
				1847	* one cycle_interval. This is because:
				1848	* xtime_interval = cycle_interval * mult
				1849	* So if mult is being incremented by one:
				1850	* xtime_interval = cycle_interval * (mult + 1)
				1851	* Its the same as:
				1852	* xtime_interval = (cycle_interval * mult) + cycle_interval
				1853	* Which can be shortened to:
				1854	* xtime_interval += cycle_interval
				1855	*
				1856	* So offset stores the non-accumulated cycles. Thus the current
				1857	* time (in shifted nanoseconds) is:
				1858	* now = (offset * adj) + xtime_nsec
				1859	* Now, even though we're adjusting the clock frequency, we have
				1860	* to keep time consistent. In other words, we can't jump back
				1861	* in time, and we also want to avoid jumping forward in time.
				1862	*
				1863	* So given the same offset value, we need the time to be the same
				1864	* both before and after the freq adjustment.
				1865	* now = (offset * adj_1) + xtime_nsec_1
				1866	* now = (offset * adj_2) + xtime_nsec_2
				1867	* So:
				1868	* (offset * adj_1) + xtime_nsec_1 =
				1869	* (offset * adj_2) + xtime_nsec_2
				1870	* And we know:
				1871	* adj_2 = adj_1 + 1
				1872	* So:
				1873	* (offset * adj_1) + xtime_nsec_1 =
				1874	* (offset * (adj_1+1)) + xtime_nsec_2
				1875	* (offset * adj_1) + xtime_nsec_1 =
				1876	* (offset * adj_1) + offset + xtime_nsec_2
				1877	* Canceling the sides:
				1878	* xtime_nsec_1 = offset + xtime_nsec_2
				1879	* Which gives us:
				1880	* xtime_nsec_2 = xtime_nsec_1 - offset
				1881	* Which simplfies to:
				1882	* xtime_nsec -= offset
				1883	*/
				1884	if ((mult_adj > 0) && (tk->tkr_mono.mult + mult_adj < mult_adj)) {
				1885	/* NTP adjustment caused clocksource mult overflow */
				1886	WARN_ON_ONCE(1);
				1887	return;
				1888	}
				1889
				1890	tk->tkr_mono.mult += mult_adj;
				1891	tk->xtime_interval += interval;
				1892	tk->tkr_mono.xtime_nsec -= offset;
				1893	}
				1894
				1895	/*
				1896	* Adjust the timekeeper's multiplier to the correct frequency
				1897	* and also to reduce the accumulated error value.
				1898	*/
				1899	static void timekeeping_adjust(struct timekeeper *tk, s64 offset)
				1900	{
				1901	u32 mult;
				1902
				1903	/*
				1904	* Determine the multiplier from the current NTP tick length.
				1905	* Avoid expensive division when the tick length doesn't change.
				1906	*/
				1907	if (likely(tk->ntp_tick == ntp_tick_length())) {
				1908	mult = tk->tkr_mono.mult - tk->ntp_err_mult;
				1909	} else {
				1910	tk->ntp_tick = ntp_tick_length();
				1911	mult = div64_u64((tk->ntp_tick >> tk->ntp_error_shift) -
				1912	tk->xtime_remainder, tk->cycle_interval);
				1913	}
				1914
				1915	/*
				1916	* If the clock is behind the NTP time, increase the multiplier by 1
				1917	* to catch up with it. If it's ahead and there was a remainder in the
				1918	* tick division, the clock will slow down. Otherwise it will stay
				1919	* ahead until the tick length changes to a non-divisible value.
				1920	*/
				1921	tk->ntp_err_mult = tk->ntp_error > 0 ? 1 : 0;
				1922	mult += tk->ntp_err_mult;
				1923
				1924	timekeeping_apply_adjustment(tk, offset, mult - tk->tkr_mono.mult);
				1925
				1926	if (unlikely(tk->tkr_mono.clock->maxadj &&
				1927	(abs(tk->tkr_mono.mult - tk->tkr_mono.clock->mult)
				1928	> tk->tkr_mono.clock->maxadj))) {
				1929	printk_once(KERN_WARNING
				1930	"Adjusting %s more than 11%% (%ld vs %ld)\n",
				1931	tk->tkr_mono.clock->name, (long)tk->tkr_mono.mult,
				1932	(long)tk->tkr_mono.clock->mult + tk->tkr_mono.clock->maxadj);
				1933	}
				1934
				1935	/*
				1936	* It may be possible that when we entered this function, xtime_nsec
				1937	* was very small. Further, if we're slightly speeding the clocksource
				1938	* in the code above, its possible the required corrective factor to
				1939	* xtime_nsec could cause it to underflow.
				1940	*
				1941	* Now, since we have already accumulated the second and the NTP
				1942	* subsystem has been notified via second_overflow(), we need to skip
				1943	* the next update.
				1944	*/
				1945	if (unlikely((s64)tk->tkr_mono.xtime_nsec < 0)) {
				1946	tk->tkr_mono.xtime_nsec += (u64)NSEC_PER_SEC <<
				1947	tk->tkr_mono.shift;
				1948	tk->xtime_sec--;
				1949	tk->skip_second_overflow = 1;
				1950	}
				1951	}
				1952
				1953	/**
				1954	* accumulate_nsecs_to_secs - Accumulates nsecs into secs
				1955	*
				1956	* Helper function that accumulates the nsecs greater than a second
				1957	* from the xtime_nsec field to the xtime_secs field.
				1958	* It also calls into the NTP code to handle leapsecond processing.
				1959	*
				1960	*/
				1961	static inline unsigned int accumulate_nsecs_to_secs(struct timekeeper *tk)
				1962	{
				1963	u64 nsecps = (u64)NSEC_PER_SEC << tk->tkr_mono.shift;
				1964	unsigned int clock_set = 0;
				1965
				1966	while (tk->tkr_mono.xtime_nsec >= nsecps) {
				1967	int leap;
				1968
				1969	tk->tkr_mono.xtime_nsec -= nsecps;
				1970	tk->xtime_sec++;
				1971
				1972	/*
				1973	* Skip NTP update if this second was accumulated before,
				1974	* i.e. xtime_nsec underflowed in timekeeping_adjust()
				1975	*/
				1976	if (unlikely(tk->skip_second_overflow)) {
				1977	tk->skip_second_overflow = 0;
				1978	continue;
				1979	}
				1980
				1981	/* Figure out if its a leap sec and apply if needed */
				1982	leap = second_overflow(tk->xtime_sec);
				1983	if (unlikely(leap)) {
				1984	struct timespec64 ts;
				1985
				1986	tk->xtime_sec += leap;
				1987
				1988	ts.tv_sec = leap;
				1989	ts.tv_nsec = 0;
				1990	tk_set_wall_to_mono(tk,
				1991	timespec64_sub(tk->wall_to_monotonic, ts));
				1992
				1993	__timekeeping_set_tai_offset(tk, tk->tai_offset - leap);
				1994
				1995	clock_set = TK_CLOCK_WAS_SET;
				1996	}
				1997	}
				1998	return clock_set;
				1999	}
				2000
				2001	/**
				2002	* logarithmic_accumulation - shifted accumulation of cycles
				2003	*
				2004	* This functions accumulates a shifted interval of cycles into
				2005	* into a shifted interval nanoseconds. Allows for O(log) accumulation
				2006	* loop.
				2007	*
				2008	* Returns the unconsumed cycles.
				2009	*/
				2010	static u64 logarithmic_accumulation(struct timekeeper *tk, u64 offset,
				2011	u32 shift, unsigned int *clock_set)
				2012	{
				2013	u64 interval = tk->cycle_interval << shift;
				2014	u64 snsec_per_sec;
				2015
				2016	/* If the offset is smaller than a shifted interval, do nothing */
				2017	if (offset < interval)
				2018	return offset;
				2019
				2020	/* Accumulate one shifted interval */
				2021	offset -= interval;
				2022	tk->tkr_mono.cycle_last += interval;
				2023	tk->tkr_raw.cycle_last += interval;
				2024
				2025	tk->tkr_mono.xtime_nsec += tk->xtime_interval << shift;
				2026	*clock_set \|= accumulate_nsecs_to_secs(tk);
				2027
				2028	/* Accumulate raw time */
				2029	tk->tkr_raw.xtime_nsec += tk->raw_interval << shift;
				2030	snsec_per_sec = (u64)NSEC_PER_SEC << tk->tkr_raw.shift;
				2031	while (tk->tkr_raw.xtime_nsec >= snsec_per_sec) {
				2032	tk->tkr_raw.xtime_nsec -= snsec_per_sec;
				2033	tk->raw_sec++;
				2034	}
				2035
				2036	/* Accumulate error between NTP and clock interval */
				2037	tk->ntp_error += tk->ntp_tick << shift;
				2038	tk->ntp_error -= (tk->xtime_interval + tk->xtime_remainder) <<
				2039	(tk->ntp_error_shift + shift);
				2040
				2041	return offset;
				2042	}
				2043
				2044	/*
				2045	* timekeeping_advance - Updates the timekeeper to the current time and
				2046	* current NTP tick length
				2047	*/
				2048	static void timekeeping_advance(enum timekeeping_adv_mode mode)
				2049	{
				2050	struct timekeeper *real_tk = &tk_core.timekeeper;
				2051	struct timekeeper *tk = &shadow_timekeeper;
				2052	u64 offset;
				2053	int shift = 0, maxshift;
				2054	unsigned int clock_set = 0;
				2055	unsigned long flags;
				2056
				2057	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				2058
				2059	/* Make sure we're fully resumed: */
				2060	if (unlikely(timekeeping_suspended))
				2061	goto out;
				2062
				2063	#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET
				2064	offset = real_tk->cycle_interval;
				2065
				2066	if (mode != TK_ADV_TICK)
				2067	goto out;
				2068	#else
				2069	offset = clocksource_delta(tk_clock_read(&tk->tkr_mono),
				2070	tk->tkr_mono.cycle_last, tk->tkr_mono.mask);
				2071
				2072	/* Check if there's really nothing to do */
				2073	if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK)
				2074	goto out;
				2075	#endif
				2076
				2077	/* Do some additional sanity checking */
				2078	timekeeping_check_update(tk, offset);
				2079
				2080	/*
				2081	* With NO_HZ we may have to accumulate many cycle_intervals
				2082	* (think "ticks") worth of time at once. To do this efficiently,
				2083	* we calculate the largest doubling multiple of cycle_intervals
				2084	* that is smaller than the offset. We then accumulate that
				2085	* chunk in one go, and then try to consume the next smaller
				2086	* doubled multiple.
				2087	*/
				2088	shift = ilog2(offset) - ilog2(tk->cycle_interval);
				2089	shift = max(0, shift);
				2090	/* Bound shift to one less than what overflows tick_length */
				2091	maxshift = (64 - (ilog2(ntp_tick_length())+1)) - 1;
				2092	shift = min(shift, maxshift);
				2093	while (offset >= tk->cycle_interval) {
				2094	offset = logarithmic_accumulation(tk, offset, shift,
				2095	&clock_set);
				2096	if (offset < tk->cycle_interval<<shift)
				2097	shift--;
				2098	}
				2099
				2100	/* Adjust the multiplier to correct NTP error */
				2101	timekeeping_adjust(tk, offset);
				2102
				2103	/*
				2104	* Finally, make sure that after the rounding
				2105	* xtime_nsec isn't larger than NSEC_PER_SEC
				2106	*/
				2107	clock_set \|= accumulate_nsecs_to_secs(tk);
				2108
				2109	write_seqcount_begin(&tk_core.seq);
				2110	/*
				2111	* Update the real timekeeper.
				2112	*
				2113	* We could avoid this memcpy by switching pointers, but that
				2114	* requires changes to all other timekeeper usage sites as
				2115	* well, i.e. move the timekeeper pointer getter into the
				2116	* spinlocked/seqcount protected sections. And we trade this
				2117	* memcpy under the tk_core.seq against one before we start
				2118	* updating.
				2119	*/
				2120	timekeeping_update(tk, clock_set);
				2121	memcpy(real_tk, tk, sizeof(*tk));
				2122	/* The memcpy must come last. Do not put anything here! */
				2123	write_seqcount_end(&tk_core.seq);
				2124	out:
				2125	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				2126	if (clock_set)
				2127	/* Have to call _delayed version, since in irq context*/
				2128	clock_was_set_delayed();
				2129	}
				2130
				2131	/**
				2132	* update_wall_time - Uses the current clocksource to increment the wall time
				2133	*
				2134	*/
				2135	void update_wall_time(void)
				2136	{
				2137	timekeeping_advance(TK_ADV_TICK);
				2138	}
				2139
				2140	/**
				2141	* getboottime64 - Return the real time of system boot.
				2142	* @ts: pointer to the timespec64 to be set
				2143	*
				2144	* Returns the wall-time of boot in a timespec64.
				2145	*
				2146	* This is based on the wall_to_monotonic offset and the total suspend
				2147	* time. Calls to settimeofday will affect the value returned (which
				2148	* basically means that however wrong your real time clock is at boot time,
				2149	* you get the right time here).
				2150	*/
				2151	void getboottime64(struct timespec64 *ts)
				2152	{
				2153	struct timekeeper *tk = &tk_core.timekeeper;
				2154	ktime_t t = ktime_sub(tk->offs_real, tk->offs_boot);
				2155
				2156	*ts = ktime_to_timespec64(t);
				2157	}
				2158	EXPORT_SYMBOL_GPL(getboottime64);
				2159
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2160	void ktime_get_coarse_real_ts64(struct timespec64 *ts)
				2161	{
				2162	struct timekeeper *tk = &tk_core.timekeeper;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2163	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2164
				2165	do {
				2166	seq = read_seqcount_begin(&tk_core.seq);
				2167
				2168	*ts = tk_xtime(tk);
				2169	} while (read_seqcount_retry(&tk_core.seq, seq));
				2170	}
				2171	EXPORT_SYMBOL(ktime_get_coarse_real_ts64);
				2172
				2173	void ktime_get_coarse_ts64(struct timespec64 *ts)
				2174	{
				2175	struct timekeeper *tk = &tk_core.timekeeper;
				2176	struct timespec64 now, mono;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2177	unsigned int seq;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2178
				2179	do {
				2180	seq = read_seqcount_begin(&tk_core.seq);
				2181
				2182	now = tk_xtime(tk);
				2183	mono = tk->wall_to_monotonic;
				2184	} while (read_seqcount_retry(&tk_core.seq, seq));
				2185
				2186	set_normalized_timespec64(ts, now.tv_sec + mono.tv_sec,
				2187	now.tv_nsec + mono.tv_nsec);
				2188	}
				2189	EXPORT_SYMBOL(ktime_get_coarse_ts64);
				2190
				2191	/*
				2192	* Must hold jiffies_lock
				2193	*/
				2194	void do_timer(unsigned long ticks)
				2195	{
				2196	jiffies_64 += ticks;
				2197	calc_global_load(ticks);
				2198	}
				2199
				2200	/**
				2201	* ktime_get_update_offsets_now - hrtimer helper
				2202	* @cwsseq: pointer to check and store the clock was set sequence number
				2203	* @offs_real: pointer to storage for monotonic -> realtime offset
				2204	* @offs_boot: pointer to storage for monotonic -> boottime offset
				2205	* @offs_tai: pointer to storage for monotonic -> clock tai offset
				2206	*
				2207	* Returns current monotonic time and updates the offsets if the
				2208	* sequence number in @cwsseq and timekeeper.clock_was_set_seq are
				2209	* different.
				2210	*
				2211	* Called from hrtimer_interrupt() or retrigger_next_event()
				2212	*/
				2213	ktime_t ktime_get_update_offsets_now(unsigned int cwsseq, ktime_t offs_real,
				2214	ktime_t offs_boot, ktime_t offs_tai)
				2215	{
				2216	struct timekeeper *tk = &tk_core.timekeeper;
				2217	unsigned int seq;
				2218	ktime_t base;
				2219	u64 nsecs;
				2220
				2221	do {
				2222	seq = read_seqcount_begin(&tk_core.seq);
				2223
				2224	base = tk->tkr_mono.base;
				2225	nsecs = timekeeping_get_ns(&tk->tkr_mono);
				2226	base = ktime_add_ns(base, nsecs);
				2227
				2228	if (*cwsseq != tk->clock_was_set_seq) {
				2229	*cwsseq = tk->clock_was_set_seq;
				2230	*offs_real = tk->offs_real;
				2231	*offs_boot = tk->offs_boot;
				2232	*offs_tai = tk->offs_tai;
				2233	}
				2234
				2235	/* Handle leapsecond insertion adjustments */
				2236	if (unlikely(base >= tk->next_leap_ktime))
				2237	*offs_real = ktime_sub(tk->offs_real, ktime_set(1, 0));
				2238
				2239	} while (read_seqcount_retry(&tk_core.seq, seq));
				2240
				2241	return base;
				2242	}
				2243
				2244	/**
				2245	* timekeeping_validate_timex - Ensures the timex is ok for use in do_adjtimex
				2246	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2247	static int timekeeping_validate_timex(const struct __kernel_timex *txc)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2248	{
				2249	if (txc->modes & ADJ_ADJTIME) {
				2250	/* singleshot must not be used with any other mode bits */
				2251	if (!(txc->modes & ADJ_OFFSET_SINGLESHOT))
				2252	return -EINVAL;
				2253	if (!(txc->modes & ADJ_OFFSET_READONLY) &&
				2254	!capable(CAP_SYS_TIME))
				2255	return -EPERM;
				2256	} else {
				2257	/* In order to modify anything, you gotta be super-user! */
				2258	if (txc->modes && !capable(CAP_SYS_TIME))
				2259	return -EPERM;
				2260	/*
				2261	* if the quartz is off by more than 10% then
				2262	* something is VERY wrong!
				2263	*/
				2264	if (txc->modes & ADJ_TICK &&
				2265	(txc->tick < 900000/USER_HZ \|\|
				2266	txc->tick > 1100000/USER_HZ))
				2267	return -EINVAL;
				2268	}
				2269
				2270	if (txc->modes & ADJ_SETOFFSET) {
				2271	/* In order to inject time, you gotta be super-user! */
				2272	if (!capable(CAP_SYS_TIME))
				2273	return -EPERM;
				2274
				2275	/*
				2276	* Validate if a timespec/timeval used to inject a time
				2277	* offset is valid. Offsets can be postive or negative, so
				2278	* we don't check tv_sec. The value of the timeval/timespec
				2279	* is the sum of its fields,but NOTE:
				2280	* The field tv_usec/tv_nsec must always be non-negative and
				2281	* we can't have more nanoseconds/microseconds than a second.
				2282	*/
				2283	if (txc->time.tv_usec < 0)
				2284	return -EINVAL;
				2285
				2286	if (txc->modes & ADJ_NANO) {
				2287	if (txc->time.tv_usec >= NSEC_PER_SEC)
				2288	return -EINVAL;
				2289	} else {
				2290	if (txc->time.tv_usec >= USEC_PER_SEC)
				2291	return -EINVAL;
				2292	}
				2293	}
				2294
				2295	/*
				2296	* Check for potential multiplication overflows that can
				2297	* only happen on 64-bit systems:
				2298	*/
				2299	if ((txc->modes & ADJ_FREQUENCY) && (BITS_PER_LONG == 64)) {
				2300	if (LLONG_MIN / PPM_SCALE > txc->freq)
				2301	return -EINVAL;
				2302	if (LLONG_MAX / PPM_SCALE < txc->freq)
				2303	return -EINVAL;
				2304	}
				2305
				2306	return 0;
				2307	}
				2308
				2309
				2310	/**
				2311	* do_adjtimex() - Accessor function to NTP __do_adjtimex function
				2312	*/
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2313	int do_adjtimex(struct __kernel_timex *txc)
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2314	{
				2315	struct timekeeper *tk = &tk_core.timekeeper;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2316	struct audit_ntp_data ad;
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2317	unsigned long flags;
				2318	struct timespec64 ts;
				2319	s32 orig_tai, tai;
				2320	int ret;
				2321
				2322	/* Validate the data before disabling interrupts */
				2323	ret = timekeeping_validate_timex(txc);
				2324	if (ret)
				2325	return ret;
				2326
				2327	if (txc->modes & ADJ_SETOFFSET) {
				2328	struct timespec64 delta;
				2329	delta.tv_sec = txc->time.tv_sec;
				2330	delta.tv_nsec = txc->time.tv_usec;
				2331	if (!(txc->modes & ADJ_NANO))
				2332	delta.tv_nsec *= 1000;
				2333	ret = timekeeping_inject_offset(&delta);
				2334	if (ret)
				2335	return ret;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2336
				2337	audit_tk_injoffset(delta);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2338	}
				2339
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2340	audit_ntp_init(&ad);
				2341
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2342	ktime_get_real_ts64(&ts);
				2343
				2344	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				2345	write_seqcount_begin(&tk_core.seq);
				2346
				2347	orig_tai = tai = tk->tai_offset;
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2348	ret = __do_adjtimex(txc, &ts, &tai, &ad);
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2349
				2350	if (tai != orig_tai) {
				2351	__timekeeping_set_tai_offset(tk, tai);
				2352	timekeeping_update(tk, TK_MIRROR \| TK_CLOCK_WAS_SET);
				2353	}
				2354	tk_update_leap_state(tk);
				2355
				2356	write_seqcount_end(&tk_core.seq);
				2357	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				2358
David Brazdil	0f672f6	2019-12-10 10:32:29 +0000	[diff] [blame]	2359	audit_ntp_log(&ad);
				2360
Andrew Scull	b4b6d4a	2019-01-02 15:54:55 +0000	[diff] [blame]	2361	/* Update the multiplier immediately if frequency was set directly */
				2362	if (txc->modes & (ADJ_FREQUENCY \| ADJ_TICK))
				2363	timekeeping_advance(TK_ADV_FREQ);
				2364
				2365	if (tai != orig_tai)
				2366	clock_was_set();
				2367
				2368	ntp_notify_cmos_timer();
				2369
				2370	return ret;
				2371	}
				2372
				2373	#ifdef CONFIG_NTP_PPS
				2374	/**
				2375	* hardpps() - Accessor function to NTP __hardpps function
				2376	*/
				2377	void hardpps(const struct timespec64 phase_ts, const struct timespec64 raw_ts)
				2378	{
				2379	unsigned long flags;
				2380
				2381	raw_spin_lock_irqsave(&timekeeper_lock, flags);
				2382	write_seqcount_begin(&tk_core.seq);
				2383
				2384	__hardpps(phase_ts, raw_ts);
				2385
				2386	write_seqcount_end(&tk_core.seq);
				2387	raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
				2388	}
				2389	EXPORT_SYMBOL(hardpps);
				2390	#endif /* CONFIG_NTP_PPS */
				2391
				2392	/**
				2393	* xtime_update() - advances the timekeeping infrastructure
				2394	* @ticks: number of ticks, that have elapsed since the last call.
				2395	*
				2396	* Must be called with interrupts disabled.
				2397	*/
				2398	void xtime_update(unsigned long ticks)
				2399	{
				2400	write_seqlock(&jiffies_lock);
				2401	do_timer(ticks);
				2402	write_sequnlock(&jiffies_lock);
				2403	update_wall_time();
				2404	}