196963
Add support for use of the system timezone database, rather
196963
than embedding a copy.  Discussed upstream but was not desired.
196963
196963
History:
196963
r10 : make timezone case insensitive
196963
r9: fix another compile error without --with-system-tzdata configured (Michael Heimpold)
196963
r8: fix compile error without --with-system-tzdata configured
196963
r7: improve check for valid timezone id to exclude directories
196963
r6: fix fd leak in r5, fix country code/BC flag use in 
196963
    timezone_identifiers_list() using system db,
196963
    fix use of PECL timezonedb to override system db,
196963
r5: reverts addition of "System/Localtime" fake tzname.
196963
    updated for 5.3.0, parses zone.tab to pick up mapping between
196963
    timezone name, country code and long/lat coords
196963
r4: added "System/Localtime" tzname which uses /etc/localtime
196963
r3: fix a crash if /usr/share/zoneinfo doesn't exist (Raphael Geissert)
196963
r2: add filesystem trawl to set up name alias index
196963
r1: initial revision
196963
196963
--- a/ext/date/lib/parse_tz.c
196963
+++ b/ext/date/lib/parse_tz.c
196963
@@ -20,6 +20,16 @@
196963
 
196963
 #include "timelib.h"
196963
 
196963
+#ifdef HAVE_SYSTEM_TZDATA
196963
+#include <sys/mman.h>
196963
+#include <sys/stat.h>
196963
+#include <limits.h>
196963
+#include <fcntl.h>
196963
+#include <unistd.h>
196963
+
196963
+#include "php_scandir.h"
196963
+#endif
196963
+
196963
 #include <stdio.h>
196963
 
196963
 #ifdef HAVE_LOCALE_H
196963
@@ -31,7 +41,12 @@
196963
 #else
196963
 #include <strings.h>
196963
 #endif
196963
+
196963
+#ifndef HAVE_SYSTEM_TZDATA
196963
 #include "timezonedb.h"
196963
+#endif
196963
+
196963
+#include <ctype.h>
196963
 
196963
 #if (defined(__APPLE__) || defined(__APPLE_CC__)) && (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__))
196963
 # if defined(__LITTLE_ENDIAN__)
196963
@@ -51,9 +66,14 @@
196963
 
196963
 static void read_preamble(const unsigned char **tzf, timelib_tzinfo *tz)
196963
 {
196963
-	/* skip ID */
196963
-	*tzf += 4;
196963
-	
196963
+        if (memcmp(tzf, "TZif", 4) == 0) {
196963
+                *tzf += 20;
196963
+                return;
196963
+        }
196963
+        
196963
+        /* skip ID */
196963
+        *tzf += 4;
196963
+                
196963
 	/* read BC flag */
196963
 	tz->bc = (**tzf == '\1');
196963
 	*tzf += 1;
196963
@@ -256,7 +276,405 @@
196963
 	}
196963
 }
196963
 
196963
-static int seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
196963
+#ifdef HAVE_SYSTEM_TZDATA
196963
+
196963
+#ifdef HAVE_SYSTEM_TZDATA_PREFIX
196963
+#define ZONEINFO_PREFIX HAVE_SYSTEM_TZDATA_PREFIX
196963
+#else
196963
+#define ZONEINFO_PREFIX "/usr/share/zoneinfo"
196963
+#endif
196963
+
196963
+/* System timezone database pointer. */
196963
+static const timelib_tzdb *timezonedb_system;
196963
+
196963
+/* Hash table entry for the cache of the zone.tab mapping table. */
196963
+struct location_info {
196963
+        char code[2];
196963
+        double latitude, longitude;
196963
+        char name[64];
196963
+        char *comment;
196963
+        struct location_info *next;
196963
+};
196963
+
196963
+/* Cache of zone.tab. */
196963
+static struct location_info **system_location_table;
196963
+
196963
+/* Size of the zone.tab hash table; a random-ish prime big enough to
196963
+ * prevent too many collisions. */
196963
+#define LOCINFO_HASH_SIZE (1021)
196963
+
196963
+/* Compute a case insensitive hash of str */
196963
+static uint32_t tz_hash(const char *str)
196963
+{
196963
+    const unsigned char *p = (const unsigned char *)str;
196963
+    uint32_t hash = 5381;
196963
+    int c;
196963
+    
196963
+    while ((c = tolower(*p++)) != '\0') {
196963
+        hash = (hash << 5) ^ hash ^ c;
196963
+    }
196963
+    
196963
+    return hash % LOCINFO_HASH_SIZE;
196963
+}
196963
+
196963
+/* Parse an ISO-6709 date as used in zone.tab. Returns end of the
196963
+ * parsed string on success, or NULL on parse error.  On success,
196963
+ * writes the parsed number to *result. */
196963
+static char *parse_iso6709(char *p, double *result)
196963
+{
196963
+    double v, sign;
196963
+    char *pend;
196963
+    size_t len;
196963
+
196963
+    if (*p == '+')
196963
+        sign = 1.0;
196963
+    else if (*p == '-')
196963
+        sign = -1.0;
196963
+    else
196963
+        return NULL;
196963
+
196963
+    p++;
196963
+    for (pend = p; *pend >= '0' && *pend <= '9'; pend++)
196963
+        ;;
196963
+
196963
+    /* Annoying encoding used by zone.tab has no decimal point, so use
196963
+     * the length to determine the format:
196963
+     * 
196963
+     * 4 = DDMM
196963
+     * 5 = DDDMM
196963
+     * 6 = DDMMSS
196963
+     * 7 = DDDMMSS
196963
+     */
196963
+    len = pend - p;
196963
+    if (len < 4 || len > 7) {
196963
+        return NULL;
196963
+    }
196963
+
196963
+    /* p => [D]DD */
196963
+    v = (p[0] - '0') * 10.0 + (p[1] - '0');
196963
+    p += 2;
196963
+    if (len == 5 || len == 7)
196963
+        v = v * 10.0 + (*p++ - '0');
196963
+    /* p => MM[SS] */
196963
+    v += (10.0 * (p[0] - '0')
196963
+          + p[1] - '0') / 60.0;
196963
+    p += 2;
196963
+    /* p => [SS] */
196963
+    if (len > 5) {
196963
+        v += (10.0 * (p[0] - '0')
196963
+              + p[1] - '0') / 3600.0;
196963
+        p += 2;
196963
+    }
196963
+
196963
+    /* Round to five decimal place, not because it's a good idea,
196963
+     * but, because the builtin data uses rounded data, so, match
196963
+     * that. */
196963
+    *result = round(v * sign * 100000.0) / 100000.0;
196963
+
196963
+    return p;
196963
+}
196963
+
196963
+/* This function parses the zone.tab file to build up the mapping of
196963
+ * timezone to country code and geographic location, and returns a
196963
+ * hash table.  The hash table is indexed by the function:
196963
+ *
196963
+ *   tz_hash(timezone-name)
196963
+ */
196963
+static struct location_info **create_location_table(void)
196963
+{
196963
+    struct location_info **li, *i;
196963
+    char zone_tab[PATH_MAX];
196963
+    char line[512];
196963
+    FILE *fp;
196963
+
196963
+    strncpy(zone_tab, ZONEINFO_PREFIX "/zone.tab", sizeof zone_tab);
196963
+
196963
+    fp = fopen(zone_tab, "r");
196963
+    if (!fp) {
196963
+        return NULL;
196963
+    }
196963
+
196963
+    li = calloc(LOCINFO_HASH_SIZE, sizeof *li);
196963
+
196963
+    while (fgets(line, sizeof line, fp)) {
196963
+        char *p = line, *code, *name, *comment;
196963
+        uint32_t hash;
196963
+        double latitude, longitude;
196963
+
196963
+        while (isspace(*p))
196963
+            p++;
196963
+
196963
+        if (*p == '#' || *p == '\0' || *p == '\n')
196963
+            continue;
196963
+        
196963
+        if (!isalpha(p[0]) || !isalpha(p[1]) || p[2] != '\t')
196963
+            continue;
196963
+        
196963
+        /* code => AA */
196963
+        code = p;
196963
+        p[2] = 0;
196963
+        p += 3;
196963
+
196963
+        /* coords => [+-][D]DDMM[SS][+-][D]DDMM[SS] */
196963
+        p = parse_iso6709(p, &latitude);
196963
+        if (!p) {
196963
+            continue;
196963
+        }
196963
+        p = parse_iso6709(p, &longitude);
196963
+        if (!p) {
196963
+            continue;
196963
+        }
196963
+
196963
+        if (!p || *p != '\t') {
196963
+            continue;
196963
+        }
196963
+
196963
+        /* name = string */
196963
+        name = ++p;
196963
+        while (*p != '\t' && *p && *p != '\n')
196963
+            p++;
196963
+
196963
+        *p++ = '\0';
196963
+
196963
+        /* comment = string */
196963
+        comment = p;
196963
+        while (*p != '\t' && *p && *p != '\n')
196963
+            p++;
196963
+
196963
+        if (*p == '\n' || *p == '\t')
196963
+            *p = '\0';
196963
+        
196963
+        hash = tz_hash(name);
196963
+        i = malloc(sizeof *i);
196963
+        memcpy(i->code, code, 2);
196963
+        strncpy(i->name, name, sizeof i->name);
196963
+        i->comment = strdup(comment);
196963
+        i->longitude = longitude;
196963
+        i->latitude = latitude;
196963
+        i->next = li[hash];
196963
+        li[hash] = i;
196963
+        /* printf("%s [%u, %f, %f]\n", name, hash, latitude, longitude); */
196963
+    }
196963
+
196963
+    fclose(fp);
196963
+
196963
+    return li;
196963
+}
196963
+
196963
+/* Return location info from hash table, using given timezone name.
196963
+ * Returns NULL if the name could not be found. */
196963
+const struct location_info *find_zone_info(struct location_info **li, 
196963
+                                           const char *name)
196963
+{
196963
+    uint32_t hash = tz_hash(name);
196963
+    const struct location_info *l;
196963
+
196963
+    if (!li) {
196963
+        return NULL;
196963
+    }
196963
+
196963
+    for (l = li[hash]; l; l = l->next) {
196963
+        if (strcasecmp(l->name, name) == 0)
196963
+            return l;
196963
+    }
196963
+
196963
+    return NULL;
196963
+}    
196963
+
196963
+/* Filter out some non-tzdata files and the posix/right databases, if
196963
+ * present. */
196963
+static int index_filter(const struct dirent *ent)
196963
+{
196963
+	return strcmp(ent->d_name, ".") != 0
196963
+		&& strcmp(ent->d_name, "..") != 0
196963
+		&& strcmp(ent->d_name, "posix") != 0
196963
+		&& strcmp(ent->d_name, "posixrules") != 0
196963
+		&& strcmp(ent->d_name, "right") != 0
196963
+		&& strstr(ent->d_name, ".tab") == NULL;
196963
+}
196963
+
196963
+static int sysdbcmp(const void *first, const void *second)
196963
+{
196963
+        const timelib_tzdb_index_entry *alpha = first, *beta = second;
196963
+
196963
+        return strcmp(alpha->id, beta->id);
196963
+}
196963
+
196963
+
196963
+/* Create the zone identifier index by trawling the filesystem. */
196963
+static void create_zone_index(timelib_tzdb *db)
196963
+{
196963
+	size_t dirstack_size,  dirstack_top;
196963
+	size_t index_size, index_next;
196963
+	timelib_tzdb_index_entry *db_index;
196963
+	char **dirstack;
196963
+
196963
+	/* LIFO stack to hold directory entries to scan; each slot is a
196963
+	 * directory name relative to the zoneinfo prefix. */
196963
+	dirstack_size = 32;
196963
+	dirstack = malloc(dirstack_size * sizeof *dirstack);
196963
+	dirstack_top = 1;
196963
+	dirstack[0] = strdup("");
196963
+	
196963
+	/* Index array. */
196963
+	index_size = 64;
196963
+	db_index = malloc(index_size * sizeof *db_index);
196963
+	index_next = 0;
196963
+
196963
+	do {
196963
+		struct dirent **ents;
196963
+		char name[PATH_MAX], *top;
196963
+		int count;
196963
+
196963
+		/* Pop the top stack entry, and iterate through its contents. */
196963
+		top = dirstack[--dirstack_top];
196963
+		snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s", top);
196963
+
196963
+		count = php_scandir(name, &ents, index_filter, php_alphasort);
196963
+
196963
+		while (count > 0) {
196963
+			struct stat st;
196963
+			const char *leaf = ents[count - 1]->d_name;
196963
+
196963
+			snprintf(name, sizeof name, ZONEINFO_PREFIX "/%s/%s", 
196963
+				 top, leaf);
196963
+			
196963
+			if (strlen(name) && stat(name, &st) == 0) {
196963
+				/* Name, relative to the zoneinfo prefix. */
196963
+				const char *root = top;
196963
+
196963
+				if (root[0] == '/') root++;
196963
+
196963
+				snprintf(name, sizeof name, "%s%s%s", root, 
196963
+					 *root ? "/": "", leaf);
196963
+
196963
+				if (S_ISDIR(st.st_mode)) {
196963
+					if (dirstack_top == dirstack_size) {
196963
+						dirstack_size *= 2;
196963
+						dirstack = realloc(dirstack, 
196963
+								   dirstack_size * sizeof *dirstack);
196963
+					}
196963
+					dirstack[dirstack_top++] = strdup(name);
196963
+				}
196963
+				else {
196963
+					if (index_next == index_size) {
196963
+						index_size *= 2;
196963
+						db_index = realloc(db_index,
196963
+								   index_size * sizeof *db_index);
196963
+					}
196963
+
196963
+					db_index[index_next++].id = strdup(name);
196963
+				}
196963
+			}
196963
+
196963
+			free(ents[--count]);
196963
+		}
196963
+		
196963
+		if (count != -1) free(ents);
196963
+		free(top);
196963
+	} while (dirstack_top);
196963
+
196963
+        qsort(db_index, index_next, sizeof *db_index, sysdbcmp);
196963
+
196963
+	db->index = db_index;
196963
+	db->index_size = index_next;
196963
+
196963
+	free(dirstack);
196963
+}
196963
+
196963
+#define FAKE_HEADER "1234\0??\1??"
196963
+#define FAKE_UTC_POS (7 - 4)
196963
+
196963
+/* Create a fake data segment for database 'sysdb'. */
196963
+static void fake_data_segment(timelib_tzdb *sysdb,
196963
+                              struct location_info **info)
196963
+{
196963
+        size_t n;
196963
+        char *data, *p;
196963
+        
196963
+        data = malloc(3 * sysdb->index_size + 7);
196963
+
196963
+        p = mempcpy(data, FAKE_HEADER, sizeof(FAKE_HEADER) - 1);
196963
+
196963
+        for (n = 0; n < sysdb->index_size; n++) {
196963
+                const struct location_info *li;
196963
+                timelib_tzdb_index_entry *ent;
196963
+
196963
+                ent = (timelib_tzdb_index_entry *)&sysdb->index[n];
196963
+
196963
+                /* Lookup the timezone name in the hash table. */
196963
+                if (strcmp(ent->id, "UTC") == 0) {
196963
+                        ent->pos = FAKE_UTC_POS;
196963
+                        continue;
196963
+                }
196963
+
196963
+                li = find_zone_info(info, ent->id);
196963
+                if (li) {
196963
+                        /* If found, append the BC byte and the
196963
+                         * country code; set the position for this
196963
+                         * section of timezone data.  */
196963
+                        ent->pos = (p - data) - 4;
196963
+                        *p++ = '\1';
196963
+                        *p++ = li->code[0];
196963
+                        *p++ = li->code[1];
196963
+                }
196963
+                else {
196963
+                        /* If not found, the timezone data can
196963
+                         * point at the header. */
196963
+                        ent->pos = 0;
196963
+                }
196963
+        }
196963
+        
196963
+        sysdb->data = (unsigned char *)data;
196963
+}
196963
+
196963
+/* Returns true if the passed-in stat structure describes a
196963
+ * probably-valid timezone file. */
196963
+static int is_valid_tzfile(const struct stat *st)
196963
+{
196963
+	return S_ISREG(st->st_mode) && st->st_size > 20;
196963
+}
196963
+
196963
+/* Return the mmap()ed tzfile if found, else NULL.  On success, the
196963
+ * length of the mapped data is placed in *length. */
196963
+static char *map_tzfile(const char *timezone, size_t *length)
196963
+{
196963
+	char fname[PATH_MAX];
196963
+	struct stat st;
196963
+	char *p;
196963
+	int fd;
196963
+	
196963
+	if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
196963
+		return NULL;
196963
+	}
196963
+
196963
+    if (system_location_table) {
196963
+        const struct location_info *li;
196963
+        if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
196963
+            /* Use the stored name to avoid case issue */
196963
+            timezone = li->name;
196963
+        }
196963
+    }
196963
+	snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
196963
+	
196963
+	fd = open(fname, O_RDONLY);
196963
+	if (fd == -1) {
196963
+		return NULL;
196963
+	} else if (fstat(fd, &st) != 0 || !is_valid_tzfile(&st)) {
196963
+		close(fd);
196963
+		return NULL;
196963
+	}
196963
+
196963
+	*length = st.st_size;
196963
+	p = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
196963
+	close(fd);
196963
+	
196963
+	return p != MAP_FAILED ? p : NULL;
196963
+}
196963
+
196963
+#endif
196963
+
196963
+static int inmem_seek_to_tz_position(const unsigned char **tzf, char *timezone, const timelib_tzdb *tzdb)
196963
 {
196963
 	int left = 0, right = tzdb->index_size - 1;
196963
 #ifdef HAVE_SETLOCALE
196963
@@ -295,36 +713,135 @@
196963
 	return 0;
196963
 }
196963
 
196963
+static int seek_to_tz_position(const unsigned char **tzf, char *timezone, 
196963
+			       char **map, size_t *maplen,
196963
+			       const timelib_tzdb *tzdb)
196963
+{
196963
+#ifdef HAVE_SYSTEM_TZDATA
196963
+	if (tzdb == timezonedb_system) {
196963
+		char *orig;
196963
+
196963
+		orig = map_tzfile(timezone, maplen);
196963
+		if (orig == NULL) {
196963
+			return 0;
196963
+		}
196963
+		
196963
+		(*tzf) = (unsigned char *)orig ;
196963
+		*map = orig;
196963
+                
196963
+                return 1;
196963
+	}
196963
+       else
196963
+#endif
196963
+       {
196963
+		return inmem_seek_to_tz_position(tzf, timezone, tzdb);
196963
+	}
196963
+}
196963
+
196963
 const timelib_tzdb *timelib_builtin_db(void)
196963
 {
196963
+#ifdef HAVE_SYSTEM_TZDATA
196963
+	if (timezonedb_system == NULL) {
196963
+		timelib_tzdb *tmp = malloc(sizeof *tmp);
196963
+
196963
+		tmp->version = "0.system";
196963
+		tmp->data = NULL;
196963
+		create_zone_index(tmp);
196963
+		system_location_table = create_location_table();
196963
+                fake_data_segment(tmp, system_location_table);
196963
+		timezonedb_system = tmp;
196963
+	}
196963
+
196963
+			
196963
+	return timezonedb_system;
196963
+#else
196963
 	return &timezonedb_builtin;
196963
+#endif
196963
 }
196963
 
196963
 const timelib_tzdb_index_entry *timelib_timezone_builtin_identifiers_list(int *count)
196963
 {
196963
+#ifdef HAVE_SYSTEM_TZDATA
196963
+	*count = timezonedb_system->index_size;
196963
+	return timezonedb_system->index;
196963
+#else
196963
 	*count = sizeof(timezonedb_idx_builtin) / sizeof(*timezonedb_idx_builtin);
196963
 	return timezonedb_idx_builtin;
196963
+#endif
196963
 }
196963
 
196963
 int timelib_timezone_id_is_valid(char *timezone, const timelib_tzdb *tzdb)
196963
 {
196963
 	const unsigned char *tzf;
196963
-	return (seek_to_tz_position(&tzf, timezone, tzdb));
196963
+
196963
+#ifdef HAVE_SYSTEM_TZDATA
196963
+        if (tzdb == timezonedb_system) {
196963
+            char fname[PATH_MAX];
196963
+            struct stat st;
196963
+
196963
+            if (timezone[0] == '\0' || strstr(timezone, "..") != NULL) {
196963
+		        return 0;
196963
+            }
196963
+
196963
+            if (system_location_table) {
196963
+                if (find_zone_info(system_location_table, timezone) != NULL) {
196963
+                    /* found in cache */
196963
+                    return 1;
196963
+                }
196963
+            }
196963
+            
196963
+            snprintf(fname, sizeof fname, ZONEINFO_PREFIX "/%s", timezone);
196963
+            
196963
+            return stat(fname, &st) == 0 && is_valid_tzfile(&st);
196963
+        }
196963
+#endif
196963
+
196963
+	return (inmem_seek_to_tz_position(&tzf, timezone, tzdb));
196963
 }
196963
 
196963
 timelib_tzinfo *timelib_parse_tzfile(char *timezone, const timelib_tzdb *tzdb)
196963
 {
196963
 	const unsigned char *tzf;
196963
+	char *memmap = NULL;
196963
+	size_t maplen;
196963
 	timelib_tzinfo *tmp;
196963
 
196963
-	if (seek_to_tz_position(&tzf, timezone, tzdb)) {
196963
+	if (seek_to_tz_position(&tzf, timezone, &memmap, &maplen, tzdb)) {
196963
 		tmp = timelib_tzinfo_ctor(timezone);
196963
 
196963
 		read_preamble(&tzf, tmp);
196963
 		read_header(&tzf, tmp);
196963
 		read_transistions(&tzf, tmp);
196963
 		read_types(&tzf, tmp);
196963
-		read_location(&tzf, tmp);
196963
+
196963
+#ifdef HAVE_SYSTEM_TZDATA
196963
+		if (memmap) {
196963
+			const struct location_info *li;
196963
+
196963
+			/* TZif-style - grok the location info from the system database,
196963
+			 * if possible. */
196963
+
196963
+			if ((li = find_zone_info(system_location_table, timezone)) != NULL) {
196963
+				tmp->location.comments = strdup(li->comment);
196963
+                                strncpy(tmp->location.country_code, li->code, 2);
196963
+				tmp->location.longitude = li->longitude;
196963
+				tmp->location.latitude = li->latitude;
196963
+				tmp->bc = 1;
196963
+			}
196963
+			else {
196963
+				strcpy(tmp->location.country_code, "??");
196963
+				tmp->bc = 0;
196963
+				tmp->location.comments = strdup("");
196963
+			}
196963
+
196963
+			/* Now done with the mmap segment - discard it. */
196963
+			munmap(memmap, maplen);
196963
+		} else
196963
+#endif
196963
+		{
196963
+			/* PHP-style - use the embedded info. */
196963
+			read_location(&tzf, tmp);
196963
+		}
196963
 	} else {
196963
 		tmp = NULL;
196963
 	}
196963
--- a/ext/date/lib/timelib.m4
196963
+++ b/ext/date/lib/timelib.m4
196963
@@ -78,3 +78,17 @@ stdlib.h
196963
 
196963
 dnl Check for strtoll, atoll
196963
 AC_CHECK_FUNCS(strtoll atoll strftime)
196963
+
196963
+PHP_ARG_WITH(system-tzdata, for use of system timezone data,
196963
+[  --with-system-tzdata[=DIR]      to specify use of system timezone data],
196963
+no, no)
196963
+
196963
+if test "$PHP_SYSTEM_TZDATA" != "no"; then
196963
+   AC_DEFINE(HAVE_SYSTEM_TZDATA, 1, [Define if system timezone data is used])
196963
+
196963
+   if test "$PHP_SYSTEM_TZDATA" != "yes"; then
196963
+      AC_DEFINE_UNQUOTED(HAVE_SYSTEM_TZDATA_PREFIX, "$PHP_SYSTEM_TZDATA",
196963
+                         [Define for location of system timezone data])
196963
+   fi
196963
+fi
196963
+