Blob Blame History Raw
diff --git a/clock.c b/clock.c
index 0615187..6237bcf 100644
--- a/clock.c
+++ b/clock.c
@@ -19,6 +19,7 @@
 #include <errno.h>
 #include <time.h>
 #include <linux/net_tstamp.h>
+#include <math.h>
 #include <poll.h>
 #include <stdlib.h>
 #include <string.h>
@@ -48,6 +49,8 @@
 #define N_CLOCK_PFD (N_POLLFD + 1) /* one extra per port, for the fault timer */
 #define POW2_41 ((double)(1ULL << 41))
 
+#define OFFSETS_ARRAY_SIZE 30
+
 struct interface {
 	STAILQ_ENTRY(interface) list;
 };
@@ -114,6 +117,32 @@ struct clock {
 	int utc_offset;
 	int time_flags;  /* grand master role */
 	int time_source; /* grand master role */
+
+	int64_t min_offset_locked;
+	int64_t max_freq_change;
+	int64_t max_offset_locked;
+	int max_offset_skipped_count;
+	int offset_skipped_count;
+	int64_t max_offset_locked_init;
+	int64_t offsets_array[OFFSETS_ARRAY_SIZE];
+	double freqs_array[OFFSETS_ARRAY_SIZE];
+	int offset_ptr;
+	int large_offset_ptr;
+	int offset_count;
+	int64_t offset_stdev;
+	int64_t offset_sigma_sq;
+	int offset_stdev_factor;
+	int64_t offset_mean; // this should be close to 0. We can raise warning if it is becoming too high.
+	int freq_ptr;
+	int freq_count;
+	double freq_stdev;
+	double freq_sigma_sq;
+	int freq_stdev_factor;
+	double freq_mean;
+	tmv_t last_correction;
+	int64_t min_offset_stddev;
+	double min_offset_freq_mean;
+
 	UInteger8 clock_class_threshold;
 	UInteger8 max_steps_removed;
 	enum servo_state servo_state;
@@ -145,6 +174,7 @@ static void handle_state_decision_event(struct clock *c);
 static int clock_resize_pollfd(struct clock *c, int new_nports);
 static void clock_remove_port(struct clock *c, struct port *p);
 static void clock_stats_display(struct clock_stats *s);
+static void clock_synchronize_locked(struct clock *c, double adj);
 
 static void remove_subscriber(struct clock_subscriber *s)
 {
@@ -270,6 +300,10 @@ void clock_destroy(struct clock *c)
 {
 	struct port *p, *tmp;
 
+	/* set last known mean freq on destroy */
+	if (c->min_offset_stddev != INT64_MAX)
+		clock_synchronize_locked(c, c->min_offset_freq_mean);
+
 	interface_destroy(c->uds_rw_if);
 	interface_destroy(c->uds_ro_if);
 	clock_flush_subscriptions(c);
@@ -1105,6 +1139,28 @@ struct clock *clock_create(enum clock_type type, struct config *config,
 	c->time_source = config_get_int(config, NULL, "timeSource");
 	c->step_window = config_get_int(config, NULL, "step_window");
 
+	c->min_offset_locked = config_get_int(config, NULL, "min_offset_locked");
+	c->max_freq_change = config_get_int(config, NULL, "max_freq_change");
+	c->max_offset_skipped_count = config_get_int(config, NULL, "max_offset_skipped_count");
+	c->max_offset_locked_init = config_get_int(config, NULL, "max_offset_locked_init");
+	c->offset_stdev_factor = config_get_int(config, NULL, "offset_stdev_factor");
+	c->freq_stdev_factor = config_get_int(config, NULL, "freq_stdev_factor");
+	c->offset_ptr = 0;
+	c->large_offset_ptr = 0;
+	c->offset_count = 0;
+	c->offset_stdev = 0;
+	c->offset_sigma_sq = 0;
+	c->offset_mean = 0;
+	c->freq_ptr = 0;
+	c->freq_count = 0;
+	c->freq_stdev = 0;
+	c->freq_sigma_sq = 0;
+	c->freq_mean = 0;
+	c->offset_skipped_count = 0;
+	c->last_correction = tmv_zero();
+	c->min_offset_freq_mean = 0;
+	c->min_offset_stddev = INT64_MAX;
+
 	if (c->free_running) {
 		c->clkid = CLOCK_INVALID;
 		if (timestamping == TS_SOFTWARE || timestamping == TS_LEGACY_HW) {
@@ -1798,11 +1854,77 @@ static void clock_synchronize_locked(struct clock *c, double adj)
 	}
 }
 
+// update the sigma_sq and mean and pointer
+void init_clock_spike_filter(struct clock *c){
+	c->offset_ptr = 0;
+	c->offset_count = 0;
+	c->offset_stdev = 0;
+	c->offset_sigma_sq = 0;
+	c->offset_mean = 0;
+	c->freq_ptr = 0;
+	c->freq_count = 0;
+	c->freq_stdev = 0;
+	c->freq_sigma_sq = 0;
+	c->freq_mean = 0;
+	c->offset_skipped_count = 0;
+	pr_notice("Reset spike filter variables");
+}
+void update_clock_offset_stats(struct clock *c, int64_t offset)
+{
+	if (c->offset_count < OFFSETS_ARRAY_SIZE){
+		c->offset_mean = (c->offset_mean*c->offset_count+offset)/(c->offset_count+1);
+		c->offset_sigma_sq = c->offset_sigma_sq + pow(offset,2);
+		c->offset_stdev = sqrt(c->offset_sigma_sq/(c->offset_count+1));
+	} else{
+		c->offset_ptr = c->offset_ptr % OFFSETS_ARRAY_SIZE;
+		c->offset_mean = (c->offset_mean * OFFSETS_ARRAY_SIZE - c->offsets_array[c->offset_ptr]+offset)/OFFSETS_ARRAY_SIZE;
+		c->offset_sigma_sq = c->offset_sigma_sq - pow(c->offsets_array[c->offset_ptr],2) + pow(offset,2);
+		c->offset_stdev = sqrt(c->offset_sigma_sq/OFFSETS_ARRAY_SIZE);
+		if (c->offset_stdev > 0 && c->offset_stdev < c->min_offset_stddev)
+			c->min_offset_stddev = c->offset_stdev;
+	}
+	if (c->offset_stdev < 0) {
+		init_clock_spike_filter(c);
+		return;
+	}
+	c->offsets_array[c->offset_ptr] = offset;
+	c->offset_count+=1;
+	c->offset_ptr+=1;
+}
+
+void update_clock_freq_stats(struct clock *c, double freq)
+{
+	if (c->freq_count < OFFSETS_ARRAY_SIZE){
+		c->freq_mean = (c->freq_mean*c->freq_count+freq)/(c->freq_count+1);
+		c->freq_sigma_sq = c->freq_sigma_sq + pow(freq,2);
+		c->freq_stdev = sqrt(c->freq_sigma_sq/(c->freq_count+1) - pow(c->freq_mean,2));
+	} else{
+		c->freq_ptr = c->freq_ptr % OFFSETS_ARRAY_SIZE;
+		c->freq_mean = (c->freq_mean * OFFSETS_ARRAY_SIZE - c->freqs_array[c->freq_ptr]+freq)/OFFSETS_ARRAY_SIZE;
+		c->freq_sigma_sq = c->freq_sigma_sq - pow(c->freqs_array[c->freq_ptr],2) + pow(freq,2);
+		c->freq_stdev = sqrt(c->freq_sigma_sq/OFFSETS_ARRAY_SIZE - pow(c->freq_mean,2));
+		if (c->offset_stdev == c->min_offset_stddev) {
+			c->min_offset_freq_mean = c->freq_mean;
+			pr_notice("Best offset stddev = %ld, new mean freq = %lf", c->min_offset_stddev, c->min_offset_freq_mean);
+		}
+	}
+	c->freqs_array[c->freq_ptr] = freq;
+	c->freq_count+=1;
+	c->freq_ptr+=1;
+	c->last_correction = c->ingress_ts;
+}
+
+int64_t max_func(int64_t num1, int64_t num2)
+{
+    return (num1 > num2 ) ? num1 : num2;
+}
+
 enum servo_state clock_synchronize(struct clock *c, tmv_t ingress, tmv_t origin)
 {
 	enum servo_state state = SERVO_UNLOCKED;
 	double adj, weight;
-	int64_t offset;
+	tmv_t master_offset;
+	int64_t offset, unsync_seconds;
 
 	if (c->step_window_counter) {
 		c->step_window_counter--;
@@ -1816,7 +1938,7 @@ enum servo_state clock_synchronize(struct clock *c, tmv_t ingress, tmv_t origin)
 
 	tsproc_down_ts(c->tsproc, origin, ingress);
 
-	if (tsproc_update_offset(c->tsproc, &c->master_offset, &weight)) {
+	if (tsproc_update_offset(c->tsproc, &master_offset, &weight)) {
 		if (c->free_running) {
 			return clock_no_adjust(c, ingress, origin);
 		} else {
@@ -1824,6 +1946,60 @@ enum servo_state clock_synchronize(struct clock *c, tmv_t ingress, tmv_t origin)
 		}
 	}
 
+	offset = tmv_to_nanoseconds(master_offset);
+
+	if (c->servo_state == SERVO_LOCKED) {
+		pr_debug("mean freq: %lf", c->min_offset_freq_mean);
+		if (c->offset_count < OFFSETS_ARRAY_SIZE){
+			c->offset_skipped_count = 0;
+			// update the statistics of the clock
+			update_clock_offset_stats(c, offset);
+		} else {
+			// the last term is assuming that we have freq error RATE difference meanining that the freq is increasing max_freq_change every 1s.
+			// the middle term is assuming that at the time that we got bad ingress sync packet, we have a frequency error of (c->freq_stdev_factor*c->freq_stdev)
+			c->max_offset_locked = c->offset_stdev_factor * c->offset_stdev;
+			unsync_seconds = (tmv_to_nanoseconds(tmv_sub(c->ingress_ts, c->last_correction)) / NS_PER_SEC);
+			if (unsync_seconds > 5 || unsync_seconds < 0) {
+				pr_notice("seconds without sync: %ld", unsync_seconds);
+			}
+			c->max_offset_locked += unsync_seconds * c->freq_stdev_factor * ((int64_t) floor(c->freq_stdev)) + (c->max_freq_change/2) * pow(unsync_seconds,2);
+			// Overflow protection. Sometimes window grows too big resulting in ptp4l entering a limbo state
+			if (c->max_offset_locked < 0) {
+				pr_notice("max_offset_locked: %ld, offset_stdev_factor: %d, offset_stdev: %ld", c->max_offset_locked, c->offset_stdev_factor, c->offset_stdev);
+				pr_notice("unsync_seconds: %ld, freq_stdev_factor: %d, freq_stdev: %lf, max_freq_change: %ld", unsync_seconds, c->freq_stdev_factor, c->freq_stdev, c->max_freq_change);
+				c->servo_state = SERVO_UNLOCKED;
+				return c->servo_state;
+			}
+
+			bool is_spike = llabs(offset) > llabs(max_func(c->max_offset_locked, c->min_offset_locked));
+			if (is_spike) {
+				adj = c->min_offset_freq_mean;
+				c->master_offset = nanoseconds_to_tmv(c->max_offset_locked);
+				pr_notice("spike detected => max_offset_locked: %ld, setting offset to min_offset_freq_mean: %lf", c->max_offset_locked, adj);
+				clock_synchronize_locked(c, adj);
+				if (c->offset_skipped_count < c->max_offset_skipped_count) {
+					c->offset_skipped_count++;
+					pr_notice("skip %d/%d large offset (>%ld) %ld", c->offset_skipped_count,
+							c->max_offset_skipped_count, c->max_offset_locked, offset);
+					// we should consider changing freq to the best mean in case of spike
+					return c->servo_state;
+				} else {
+					// I am not totally sure if we should go to unlocked case or not. It may be better to just keep track of how many we missed.
+					c->servo_state = SERVO_UNLOCKED;
+					return c->servo_state;
+				}
+			} else {
+				pr_debug("NO spike detected => max_offset_locked: %ld", c->max_offset_locked);
+				c->offset_skipped_count = 0;
+				// update the statistics of the clock
+				update_clock_offset_stats(c, offset);
+			}
+		}
+	} else {
+		init_clock_spike_filter(c);
+	}
+	c->master_offset = master_offset;
+
 	if (clock_utc_correct(c, ingress)) {
 		return c->servo_state;
 	}
@@ -1836,7 +2012,6 @@ enum servo_state clock_synchronize(struct clock *c, tmv_t ingress, tmv_t origin)
 		return state;
 	}
 
-	offset = tmv_to_nanoseconds(c->master_offset);
 	if (offset * tmv_sign(c->master_offset) > 10000) {
 		tsproc_dump_state(c->tsproc);
 	}
@@ -1863,6 +2038,7 @@ enum servo_state clock_synchronize(struct clock *c, tmv_t ingress, tmv_t origin)
 		break;
 	case SERVO_LOCKED:
 		clock_synchronize_locked(c, adj);
+		update_clock_freq_stats(c, adj);
 		break;
 	case SERVO_LOCKED_STABLE:
 		if (c->write_phase_mode) {
@@ -1871,6 +2047,7 @@ enum servo_state clock_synchronize(struct clock *c, tmv_t ingress, tmv_t origin)
 		} else {
 			clock_synchronize_locked(c, adj);
 		}
+		update_clock_freq_stats(c, adj);
 		break;
 	}
 
@@ -2025,6 +2202,10 @@ static void handle_state_decision_event(struct clock *c)
 		if (cid_eq(&best_id, &c->dds.clockIdentity)) {
 			pr_notice("selected local clock %s as best master",
 					cid2str(&best_id));
+			// let's set estimated mean freq while we are free running
+			if (c->min_offset_stddev != INT64_MAX) {
+				clockadj_set_freq(c->clkid, -c->min_offset_freq_mean);
+			}
 		} else {
 			pr_notice("selected best master clock %s",
 					cid2str(&best_id));
diff --git a/clock.h b/clock.h
index 17b2e3b..f86229b 100644
--- a/clock.h
+++ b/clock.h
@@ -361,6 +361,8 @@ struct timePropertiesDS clock_time_properties(struct clock *c);
  */
 void clock_update_time_properties(struct clock *c, struct timePropertiesDS tds);
 
+void init_clock_spike_filter(struct clock *c);
+
 /**
  * Obtain a clock's description.
  * @param c  The clock instance.
diff --git a/config.c b/config.c
index 747a735..718d880 100644
--- a/config.c
+++ b/config.c
@@ -346,6 +346,13 @@ struct config_item config_tab[] = {
 	GLOB_ITEM_INT("utc_offset", CURRENT_UTC_OFFSET, 0, INT_MAX),
 	GLOB_ITEM_INT("verbose", 0, 0, 1),
 	GLOB_ITEM_INT("write_phase_mode", 0, 0, 1),
+
+	GLOB_ITEM_INT("max_freq_change", 20, 0, INT_MAX),
+	GLOB_ITEM_INT("max_offset_skipped_count", 15, 0, INT_MAX),
+	GLOB_ITEM_INT("max_offset_locked_init", 500000, 0, INT_MAX),
+	GLOB_ITEM_INT("offset_stdev_factor", 3, 0, INT_MAX),
+	GLOB_ITEM_INT("freq_stdev_factor", 3, 0, INT_MAX),
+	GLOB_ITEM_INT("min_offset_locked", 15000, 0, INT_MAX),
 };
 
 static struct unicast_master_table *current_uc_mtab;