531551
From 4a44a54b3caf77923f0e3f1d5bdf5eda6ef07f62 Mon Sep 17 00:00:00 2001
531551
From: Chris MacGregor <chrismacgregor@google.com>
531551
Date: Thu, 27 Feb 2014 10:40:59 -0800
531551
Subject: [PATCH] hwclock: fix possible hang and other
531551
 set_hardware_clock_exact() issues
531551
531551
In sys-utils/hwclock.c, set_hardware_clock_exact() has some problems when the
531551
process gets pre-empted (for more than 100ms) before reaching the time for
531551
which it waits:
531551
531551
1. The "continue" statement causes execution to skip the final tdiff
531551
assignment at the end of the do...while loop, leading to the while condition
531551
using the wrong value of tdiff, and thus always exiting the loop once
531551
newhwtime != sethwtime (e.g., after 1 second).  This masks bug # 2, below.
531551
531551
2. The previously-existing bug is that because it starts over waiting for the
531551
desired time whenever two successive calls to gettimeofday() return values >
531551
100ms apart, the loop will never terminate unless the process holds the CPU
531551
(without losing it for more than 100ms) for at least 500ms.  This can happen
531551
on a heavily loaded machine or on a virtual machine (or on a heavily loaded
531551
virtual machine).  This has been observed to occur, preventing a machine from
531551
completing the shutdown or reboot process due to a "hwclock --systohc" call in
531551
a shutdown script.
531551
531551
The new implementation presented in this patch takes a somewhat different
531551
approach, intended to accomplish the same goals:
531551
531551
It computes the desired target system time (at which the requested hardware
531551
clock time will be applied to the hardware clock), and waits for that time to
531551
arrive.  If it misses the time (such as due to being pre-empted for too long),
531551
it recalculates the target time, and increases the tolerance (how late it can
531551
be relative to the target time, and still be "close enough".  Thus, if all is
531551
well, the time will be set *very* precisely.  On a machine where the hwclock
531551
process is repeatedly pre-empted, it will set the time as precisely as is
531551
possible under the conditions present on that particular machine.  In any
531551
case, it will always terminate eventually (and pretty quickly); it will never
531551
hang forever.
531551
531551
[kzak@redhat.com: - tiny coding style changes]
531551
531551
Signed-off-by: Chris MacGregor <chrismacgregor@google.com>
531551
Signed-off-by: Karel Zak <kzak@redhat.com>
531551
---
531551
 sys-utils/hwclock.c | 170 ++++++++++++++++++++++++++++++++++++++++------------
531551
 1 file changed, 131 insertions(+), 39 deletions(-)
531551
531551
diff --git a/sys-utils/hwclock.c b/sys-utils/hwclock.c
531551
index 30660d4..395b5c3 100644
531551
--- a/sys-utils/hwclock.c
531551
+++ b/sys-utils/hwclock.c
531551
@@ -125,7 +125,7 @@ struct adjtime {
531551
  * We are running in debug mode, wherein we put a lot of information about
531551
  * what we're doing to standard output.
531551
  */
531551
-bool debug;
531551
+int debug;
531551
 
531551
 /* Workaround for Award 4.50g BIOS bug: keep the year in a file. */
531551
 bool badyear;
531551
@@ -526,43 +526,141 @@ set_hardware_clock_exact(const time_t sethwtime,
531551
 			 const struct timeval refsystime,
531551
 			 const bool universal, const bool testing)
531551
 {
531551
-	time_t newhwtime = sethwtime;
531551
-	struct timeval beginsystime, nowsystime;
531551
-	double tdiff;
531551
-	int time_resync = 1;
531551
-
531551
 	/*
531551
-	 * Now delay some more until Hardware Clock time newhwtime arrives.
531551
-	 * The 0.5 s is because the Hardware Clock always sets to your set
531551
-	 * time plus 500 ms (because it is designed to update to the next
531551
-	 * second precisely 500 ms after you finish the setting).
531551
+	 * The Hardware Clock can only be set to any integer time plus one
531551
+	 * half second.	 The integer time is required because there is no
531551
+	 * interface to set or get a fractional second.	 The additional half
531551
+	 * second is because the Hardware Clock updates to the following
531551
+	 * second precisely 500 ms (not 1 second!) after you release the
531551
+	 * divider reset (after setting the new time) - see description of
531551
+	 * DV2, DV1, DV0 in Register A in the MC146818A data sheet (and note
531551
+	 * that although that document doesn't say so, real-world code seems
531551
+	 * to expect that the SET bit in Register B functions the same way).
531551
+	 * That means that, e.g., when you set the clock to 1:02:03, it
531551
+	 * effectively really sets it to 1:02:03.5, because it will update to
531551
+	 * 1:02:04 only half a second later.  Our caller passes the desired
531551
+	 * integer Hardware Clock time in sethwtime, and the corresponding
531551
+	 * system time (which may have a fractional part, and which may or may
531551
+	 * not be the same!) in refsystime.  In an ideal situation, we would
531551
+	 * then apply sethwtime to the Hardware Clock at refsystime+500ms, so
531551
+	 * that when the Hardware Clock ticks forward to sethwtime+1s half a
531551
+	 * second later at refsystime+1000ms, everything is in sync.  So we
531551
+	 * spin, waiting for gettimeofday() to return a time at or after that
531551
+	 * time (refsystime+500ms) up to a tolerance value, initially 1ms.  If
531551
+	 * we miss that time due to being preempted for some other process,
531551
+	 * then we increase the margin a little bit (initially 1ms, doubling
531551
+	 * each time), add 1 second (or more, if needed to get a time that is
531551
+	 * in the future) to both the time for which we are waiting and the
531551
+	 * time that we will apply to the Hardware Clock, and start waiting
531551
+	 * again.
531551
+	 * 
531551
+	 * For example, the caller requests that we set the Hardware Clock to
531551
+	 * 1:02:03, with reference time (current system time) = 6:07:08.250.
531551
+	 * We want the Hardware Clock to update to 1:02:04 at 6:07:09.250 on
531551
+	 * the system clock, and the first such update will occur 0.500
531551
+	 * seconds after we write to the Hardware Clock, so we spin until the
531551
+	 * system clock reads 6:07:08.750.  If we get there, great, but let's
531551
+	 * imagine the system is so heavily loaded that our process is
531551
+	 * preempted and by the time we get to run again, the system clock
531551
+	 * reads 6:07:11.990.  We now want to wait until the next xx:xx:xx.750
531551
+	 * time, which is 6:07:12.750 (4.5 seconds after the reference time),
531551
+	 * at which point we will set the Hardware Clock to 1:02:07 (4 seconds
531551
+	 * after the originally requested time).  If we do that successfully,
531551
+	 * then at 6:07:13.250 (5 seconds after the reference time), the
531551
+	 * Hardware Clock will update to 1:02:08 (5 seconds after the
531551
+	 * originally requested time), and all is well thereafter.
531551
 	 */
531551
-	do {
531551
-		if (time_resync) {
531551
-			gettimeofday(&beginsystime, NULL);
531551
-			tdiff = time_diff(beginsystime, refsystime);
531551
-			newhwtime = sethwtime + (int)(tdiff + 0.5);
531551
-			if (debug)
531551
-				printf(_
531551
-				       ("Time elapsed since reference time has been %.6f seconds.\n"
531551
-					"Delaying further to reach the new time.\n"),
531551
-				       tdiff);
531551
-			time_resync = 0;
531551
+
531551
+	time_t newhwtime = sethwtime;
531551
+	double target_time_tolerance_secs = 0.001;  /* initial value */
531551
+	double tolerance_incr_secs = 0.001;	    /* initial value */
531551
+	const double RTC_SET_DELAY_SECS = 0.5;	    /* 500 ms */
531551
+	const struct timeval RTC_SET_DELAY_TV = { 0, RTC_SET_DELAY_SECS * 1E6 };
531551
+
531551
+	struct timeval targetsystime;
531551
+	struct timeval nowsystime;
531551
+	struct timeval prevsystime = refsystime;
531551
+	double deltavstarget;
531551
+
531551
+	timeradd(&refsystime, &RTC_SET_DELAY_TV, &targetsystime);
531551
+
531551
+	while (1) {
531551
+		double ticksize;
531551
+
531551
+		/* FOR TESTING ONLY: inject random delays of up to 1000ms */
531551
+		if (debug >= 10) {
531551
+			int usec = random() % 1000000;
531551
+			printf(_("sleeping ~%d usec\n"), usec);
531551
+			usleep(usec);
531551
 		}
531551
 
531551
 		gettimeofday(&nowsystime, NULL);
531551
-		tdiff = time_diff(nowsystime, beginsystime);
531551
-		if (tdiff < 0) {
531551
-			time_resync = 1;	/* probably backward time reset */
531551
-			continue;
531551
-		}
531551
-		if (tdiff > 0.1) {
531551
-			time_resync = 1;	/* probably forward time reset */
531551
-			continue;
531551
+		deltavstarget = time_diff(nowsystime, targetsystime);
531551
+		ticksize = time_diff(nowsystime, prevsystime);
531551
+		prevsystime = nowsystime;
531551
+
531551
+		if (ticksize < 0) {
531551
+			if (debug)
531551
+				printf(_("time jumped backward %.6f seconds "
531551
+					 "to %ld.%06d - retargeting\n"),
531551
+				       ticksize, (long)nowsystime.tv_sec,
531551
+				       (int)nowsystime.tv_usec);
531551
+			/* The retarget is handled at the end of the loop. */
531551
+		} else if (deltavstarget < 0) {
531551
+			/* deltavstarget < 0 if current time < target time */
531551
+			if (debug >= 2)
531551
+				printf(_("%ld.%06d < %ld.%06d (%.6f)\n"),
531551
+				       (long)nowsystime.tv_sec,
531551
+				       (int)nowsystime.tv_usec,
531551
+				       (long)targetsystime.tv_sec,
531551
+				       (int)targetsystime.tv_usec,
531551
+				       deltavstarget);
531551
+			continue;  /* not there yet - keep spinning */
531551
+		} else if (deltavstarget <= target_time_tolerance_secs) {
531551
+			/* Close enough to the target time; done waiting. */
531551
+			break;
531551
+		} else /* (deltavstarget > target_time_tolerance_secs) */ {
531551
+			/*
531551
+			 * We missed our window.  Increase the tolerance and
531551
+			 * aim for the next opportunity.
531551
+			 */
531551
+			if (debug)
531551
+				printf(_("missed it - %ld.%06d is too far "
531551
+					 "past %ld.%06d (%.6f > %.6f)\n"),
531551
+				       (long)nowsystime.tv_sec,
531551
+				       (int)nowsystime.tv_usec,
531551
+				       (long)targetsystime.tv_sec,
531551
+				       (int)targetsystime.tv_usec,
531551
+				       deltavstarget,
531551
+				       target_time_tolerance_secs);
531551
+			target_time_tolerance_secs += tolerance_incr_secs;
531551
+			tolerance_incr_secs *= 2;
531551
 		}
531551
-		beginsystime = nowsystime;
531551
-		tdiff = time_diff(nowsystime, refsystime);
531551
-	} while (newhwtime == sethwtime + (int)(tdiff + 0.5));
531551
+
531551
+		/*
531551
+		 * Aim for the same offset (tv_usec) within the second in
531551
+		 * either the current second (if that offset hasn't arrived
531551
+		 * yet), or the next second.
531551
+		 */
531551
+		if (nowsystime.tv_usec < targetsystime.tv_usec)
531551
+			targetsystime.tv_sec = nowsystime.tv_sec;
531551
+		else
531551
+			targetsystime.tv_sec = nowsystime.tv_sec + 1;
531551
+	}
531551
+
531551
+	newhwtime = sethwtime
531551
+		    + (int)(time_diff(nowsystime, refsystime)
531551
+			    - RTC_SET_DELAY_SECS /* don't count this */
531551
+			    + 0.5 /* for rounding */);
531551
+	if (debug)
531551
+		printf(_("%ld.%06d is close enough to %ld.%06d (%.6f < %.6f)\n"
531551
+			 "Set RTC to %ld (%ld + %d; refsystime = %ld.%06d)\n"),
531551
+		       (long)nowsystime.tv_sec, (int)nowsystime.tv_usec,
531551
+		       (long)targetsystime.tv_sec, (int)targetsystime.tv_usec,
531551
+		       deltavstarget, target_time_tolerance_secs,
531551
+		       (long)newhwtime, (long)sethwtime,
531551
+		       (int)(newhwtime - sethwtime),
531551
+		       (long)refsystime.tv_sec, (int)refsystime.tv_usec);
531551
 
531551
 	set_hardware_clock(newhwtime, universal, testing);
531551
 }
531551
@@ -1636,7 +1734,7 @@ int main(int argc, char **argv)
531551
 
531551
 		switch (c) {
531551
 		case 'D':
531551
-			debug = TRUE;
531551
+			++debug;
531551
 			break;
531551
 		case 'a':
531551
 			adjust = TRUE;
531551
@@ -1953,10 +2051,4 @@ void __attribute__((__noreturn__)) hwaudit_exit(int status)
531551
  *
531551
  * hwclock uses this method, and considers the Hardware Clock to have
531551
  * infinite precision.
531551
- *
531551
- * TODO: Enhancements needed:
531551
- *
531551
- *  - When waiting for whole second boundary in set_hardware_clock_exact,
531551
- *    fail if we miss the goal by more than .1 second, as could happen if we
531551
- *    get pre-empted (by the kernel dispatcher).
531551
  */
531551
-- 
531551
1.9.3
531551