Blame SOURCES/file-5.39-regex-caching-1.patch

840325
diff --git a/src/apprentice.c b/src/apprentice.c
840325
index b609dd1..21eac1e 100644
840325
--- a/src/apprentice.c
840325
+++ b/src/apprentice.c
840325
@@ -423,7 +423,15 @@ add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
840325
 	ml->map = idx == 0 ? map : NULL;
840325
 	ml->magic = map->magic[idx];
840325
 	ml->nmagic = map->nmagic[idx];
840325
-
840325
+	if (ml->nmagic) {
840325
+		ml->magic_rxcomp = CAST(file_regex_t **,
840325
+		    calloc(ml->nmagic, sizeof(*ml->magic_rxcomp)));
840325
+		if (ml->magic_rxcomp == NULL) {
840325
+			free(ml);
840325
+			return -1;
840325
+		}
840325
+	} else
840325
+		ml->magic_rxcomp = NULL;
840325
 	mlp->prev->next = ml;
840325
 	ml->prev = mlp->prev;
840325
 	ml->next = mlp;
840325
@@ -607,8 +615,19 @@ mlist_free_all(struct magic_set *ms)
840325
 private void
840325
 mlist_free_one(struct mlist *ml)
840325
 {
840325
+	size_t i;
840325
+
840325
 	if (ml->map)
840325
 		apprentice_unmap(CAST(struct magic_map *, ml->map));
840325
+
840325
+	for (i = 0; i < ml->nmagic; ++i) {
840325
+		if (ml->magic_rxcomp[i]) {
840325
+			file_regfree(ml->magic_rxcomp[i]);
840325
+			free(ml->magic_rxcomp[i]);
840325
+		}
840325
+	}
840325
+	free(ml->magic_rxcomp);
840325
+	ml->magic_rxcomp = NULL;
840325
 	free(ml);
840325
 }
840325
 
840325
@@ -3492,16 +3511,16 @@ file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
840325
 
840325
 	for (ml = mlist->next; ml != mlist; ml = ml->next) {
840325
 		struct magic *ma = ml->magic;
840325
-		uint32_t nma = ml->nmagic;
840325
-		for (i = 0; i < nma; i++) {
840325
+		for (i = 0; i < ml->nmagic; i++) {
840325
 			if (ma[i].type != FILE_NAME)
840325
 				continue;
840325
 			if (strcmp(ma[i].value.s, name) == 0) {
840325
 				v->magic = &ma[i];
840325
-				for (j = i + 1; j < nma; j++)
840325
+				for (j = i + 1; j < ml->nmagic; j++)
840325
 				    if (ma[j].cont_level == 0)
840325
 					    break;
840325
 				v->nmagic = j - i;
840325
+				v->magic_rxcomp = ml->magic_rxcomp;
840325
 				return 0;
840325
 			}
840325
 		}
840325
diff --git a/src/file.h b/src/file.h
840325
index 48f4b69..c0b5a7c 100644
840325
--- a/src/file.h
840325
+++ b/src/file.h
840325
@@ -88,6 +88,10 @@
840325
 /* Do this here and now, because struct stat gets re-defined on solaris */
840325
 #include <sys/stat.h>
840325
 #include <stdarg.h>
840325
+#include <locale.h>
840325
+#if defined(HAVE_XLOCALE_H)
840325
+#include <xlocale.h>
840325
+#endif
840325
 
840325
 #define ENABLE_CONDITIONALS
840325
 
840325
@@ -167,6 +171,19 @@
840325
 #define FILE_COMPILE	2
840325
 #define FILE_LIST	3
840325
 
840325
+typedef struct {
840325
+	const char *pat;
840325
+#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
840325
+#define USE_C_LOCALE
840325
+	locale_t old_lc_ctype;
840325
+	locale_t c_lc_ctype;
840325
+#else
840325
+	char *old_lc_ctype;
840325
+#endif
840325
+	int rc;
840325
+	regex_t rx;
840325
+} file_regex_t;
840325
+
840325
 struct buffer {
840325
 	int fd;
840325
 	struct stat st;
840325
@@ -394,7 +411,8 @@ struct magic {
840325
 /* list of magic entries */
840325
 struct mlist {
840325
 	struct magic *magic;		/* array of magic entries */
840325
-	uint32_t nmagic;		/* number of entries in array */
840325
+	file_regex_t **magic_rxcomp;	/* array of compiled regexps */
840325
+	size_t nmagic;			/* number of entries in array */
840325
 	void *map;			/* internal resources used by entry */
840325
 	struct mlist *next, *prev;
840325
 };
840325
@@ -554,23 +572,7 @@ protected void buffer_init(struct buffer *, int, const struct stat *,
840325
 protected void buffer_fini(struct buffer *);
840325
 protected int buffer_fill(const struct buffer *);
840325
 
840325
-#include <locale.h>
840325
-#if defined(HAVE_XLOCALE_H)
840325
-#include <xlocale.h>
840325
-#endif
840325
 
840325
-typedef struct {
840325
-	const char *pat;
840325
-#if defined(HAVE_NEWLOCALE) && defined(HAVE_USELOCALE) && defined(HAVE_FREELOCALE)
840325
-#define USE_C_LOCALE
840325
-	locale_t old_lc_ctype;
840325
-	locale_t c_lc_ctype;
840325
-#else
840325
-	char *old_lc_ctype;
840325
-#endif
840325
-	int rc;
840325
-	regex_t rx;
840325
-} file_regex_t;
840325
 
840325
 protected int file_regcomp(file_regex_t *, const char *, int);
840325
 protected int file_regexec(file_regex_t *, const char *, size_t, regmatch_t *,
840325
diff --git a/src/softmagic.c b/src/softmagic.c
840325
index 95061e5..834dfe3 100644
840325
--- a/src/softmagic.c
840325
+++ b/src/softmagic.c
840325
@@ -43,7 +43,7 @@ FILE_RCSID("@(#)$File: softmagic.c,v 1.299 2020/06/07 21:58:01 christos Exp $")
840325
 #include <time.h>
840325
 #include "der.h"
840325
 
840325
-private int match(struct magic_set *, struct magic *, uint32_t,
840325
+private int match(struct magic_set *, struct magic *, file_regex_t **, uint32_t,
840325
     const struct buffer *, size_t, int, int, int, uint16_t *,
840325
     uint16_t *, int *, int *, int *, int *);
840325
 private int mget(struct magic_set *, struct magic *, const struct buffer *,
840325
@@ -52,7 +52,7 @@ private int mget(struct magic_set *, struct magic *, const struct buffer *,
840325
     uint16_t *, int *, int *, int *, int *);
840325
 private int msetoffset(struct magic_set *, struct magic *, struct buffer *,
840325
     const struct buffer *, size_t, unsigned int);
840325
-private int magiccheck(struct magic_set *, struct magic *);
840325
+private int magiccheck(struct magic_set *, struct magic *, file_regex_t **);
840325
 private int32_t mprint(struct magic_set *, struct magic *);
840325
 private int moffset(struct magic_set *, struct magic *, const struct buffer *,
840325
     int32_t *);
840325
@@ -131,8 +131,8 @@ file_softmagic(struct magic_set *ms, const struct buffer *b,
840325
 	}
840325
 
840325
 	for (ml = ms->mlist[0]->next; ml != ms->mlist[0]; ml = ml->next)
840325
-		if ((rv = match(ms, ml->magic, ml->nmagic, b, 0, mode,
840325
-		    text, 0, indir_count, name_count,
840325
+		if ((rv = match(ms, ml->magic, ml->magic_rxcomp, ml->nmagic, b,
840325
+		    0, mode, text, 0, indir_count, name_count,
840325
 		    &printed_something, &need_separator, NULL, NULL)) != 0)
840325
 			return rv;
840325
 
840325
@@ -191,8 +191,8 @@ file_fmtcheck(struct magic_set *ms, const char *desc, const char *def,
840325
  *	so that higher-level continuations are processed.
840325
  */
840325
 private int
840325
-match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
840325
-    const struct buffer *b, size_t offset, int mode, int text,
840325
+match(struct magic_set *ms, struct magic *magic, file_regex_t **magic_rxcomp,
840325
+    uint32_t nmagic, const struct buffer *b, size_t offset, int mode, int text,
840325
     int flip, uint16_t *indir_count, uint16_t *name_count,
840325
     int *printed_something, int *need_separator, int *returnval,
840325
     int *found_match)
840325
@@ -220,6 +220,7 @@ match(struct magic_set *ms, struct magic *magic, uint32_t nmagic,
840325
 	for (magindex = 0; magindex < nmagic; magindex++) {
840325
 		int flush = 0;
840325
 		struct magic *m = &magic[magindex];
840325
+		file_regex_t **m_rxcomp = &magic_rxcomp[magindex];
840325
 
840325
 		if (m->type != FILE_NAME)
840325
 		if ((IS_STRING(m->type) &&
840325
@@ -257,7 +258,7 @@ flush:
840325
 				*returnval = 1;
840325
 			}
840325
 
840325
-			switch (magiccheck(ms, m)) {
840325
+			switch (magiccheck(ms, m, m_rxcomp)) {
840325
 			case -1:
840325
 				return -1;
840325
 			case 0:
840325
@@ -317,6 +318,7 @@ flush:
840325
 		while (magindex + 1 < nmagic &&
840325
 		    magic[magindex + 1].cont_level != 0) {
840325
 			m = &magic[++magindex];
840325
+			m_rxcomp = &magic_rxcomp[magindex];
840325
 			ms->line = m->lineno; /* for messages */
840325
 
840325
 			if (cont_level < m->cont_level)
840325
@@ -370,7 +372,7 @@ flush:
840325
 				break;
840325
 			}
840325
 
840325
-			switch (flush ? 1 : magiccheck(ms, m)) {
840325
+			switch (flush ? 1 : magiccheck(ms, m, m_rxcomp)) {
840325
 			case -1:
840325
 				return -1;
840325
 			case 0:
840325
@@ -1880,8 +1882,8 @@ mget(struct magic_set *ms, struct magic *m, const struct buffer *b,
840325
 		oneed_separator = *need_separator;
840325
 		if (m->flag & NOSPACE)
840325
 			*need_separator = 0;
840325
-		rv = match(ms, ml.magic, ml.nmagic, b, offset + o,
840325
-		    mode, text, flip, indir_count, name_count,
840325
+		rv = match(ms, ml.magic, ml.magic_rxcomp, ml.nmagic, b,
840325
+		    offset + o, mode, text, flip, indir_count, name_count,
840325
 		    printed_something, need_separator, returnval, found_match);
840325
 		(*name_count)--;
840325
 		if (rv != 1)
840325
@@ -1989,8 +1991,31 @@ file_strncmp16(const char *a, const char *b, size_t len, size_t maxlen,
840325
 	return file_strncmp(a, b, len, maxlen, flags);
840325
 }
840325
 
840325
+private file_regex_t *
840325
+alloc_regex(struct magic_set *ms, struct magic *m)
840325
+{
840325
+	int rc;
840325
+	file_regex_t *rx = CAST(file_regex_t *, malloc(sizeof(*rx)));
840325
+
840325
+	if (rx == NULL) {
840325
+		file_error(ms, errno, "can't allocate %" SIZE_T_FORMAT
840325
+		    "u bytes", sizeof(*rx));
840325
+		return NULL;
840325
+	}
840325
+
840325
+	rc = file_regcomp(rx, m->value.s, REG_EXTENDED | REG_NEWLINE |
840325
+	    ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
840325
+	if (rc == 0)
840325
+		return rx;
840325
+
840325
+	file_regerror(rx, rc, ms);
840325
+	file_regfree(rx);
840325
+	free(rx);
840325
+	return NULL;
840325
+}
840325
+
840325
 private int
840325
-magiccheck(struct magic_set *ms, struct magic *m)
840325
+magiccheck(struct magic_set *ms, struct magic *m, file_regex_t **m_cache)
840325
 {
840325
 	uint64_t l = m->value.q;
840325
 	uint64_t v;
840325
@@ -2068,8 +2093,8 @@ magiccheck(struct magic_set *ms, struct magic *m)
840325
 			break;
840325
 
840325
 		default:
840325
-			file_magerror(ms, "cannot happen with float: invalid relation `%c'",
840325
-			    m->reln);
840325
+			file_magerror(ms, "cannot happen with float: "
840325
+			    "invalid relation `%c'", m->reln);
840325
 			return -1;
840325
 		}
840325
 		return matched;
840325
@@ -2101,7 +2126,8 @@ magiccheck(struct magic_set *ms, struct magic *m)
840325
 			break;
840325
 
840325
 		default:
840325
-			file_magerror(ms, "cannot happen with double: invalid relation `%c'", m->reln);
840325
+			file_magerror(ms, "cannot happen with double: "
840325
+			    "invalid relation `%c'", m->reln);
840325
 			return -1;
840325
 		}
840325
 		return matched;
840325
@@ -2169,62 +2195,57 @@ magiccheck(struct magic_set *ms, struct magic *m)
840325
 	}
840325
 	case FILE_REGEX: {
840325
 		int rc;
840325
-		file_regex_t rx;
840325
+		file_regex_t *rx = *m_cache;
840325
 		const char *search;
840325
+		regmatch_t pmatch;
840325
+		size_t slen = ms->search.s_len;
840325
+		char *copy;
840325
 
840325
 		if (ms->search.s == NULL)
840325
 			return 0;
840325
 
840325
+		if (rx == NULL) {
840325
+			rx = *m_cache = alloc_regex(ms, m);
840325
+			if (rx == NULL)
840325
+				return -1;
840325
+		}
840325
 		l = 0;
840325
-		rc = file_regcomp(&rx, m->value.s,
840325
-		    REG_EXTENDED|REG_NEWLINE|
840325
-		    ((m->str_flags & STRING_IGNORE_CASE) ? REG_ICASE : 0));
840325
-		if (rc) {
840325
-			file_regerror(&rx, rc, ms);
840325
-			v = CAST(uint64_t, -1);
840325
+		if (slen != 0) {
840325
+		    copy = CAST(char *, malloc(slen));
840325
+		    if (copy == NULL)  {
840325
+			file_error(ms, errno,
840325
+			    "can't allocate %" SIZE_T_FORMAT "u bytes",
840325
+			    slen);
840325
+			return -1;
840325
+		    }
840325
+		    memcpy(copy, ms->search.s, slen);
840325
+		    copy[--slen] = '\0';
840325
+		    search = copy;
840325
 		} else {
840325
-			regmatch_t pmatch;
840325
-			size_t slen = ms->search.s_len;
840325
-			char *copy;
840325
-			if (slen != 0) {
840325
-			    copy = CAST(char *, malloc(slen));
840325
-			    if (copy == NULL)  {
840325
-				file_regfree(&rx);
840325
-				file_error(ms, errno,
840325
-				    "can't allocate %" SIZE_T_FORMAT "u bytes",
840325
-				    slen);
840325
-				return -1;
840325
-			    }
840325
-			    memcpy(copy, ms->search.s, slen);
840325
-			    copy[--slen] = '\0';
840325
-			    search = copy;
840325
-			} else {
840325
-			    search = CCAST(char *, "");
840325
-			    copy = NULL;
840325
-			}
840325
-			rc = file_regexec(&rx, RCAST(const char *, search),
840325
-			    1, &pmatch, 0);
840325
-			free(copy);
840325
-			switch (rc) {
840325
-			case 0:
840325
-				ms->search.s += CAST(int, pmatch.rm_so);
840325
-				ms->search.offset += CAST(size_t, pmatch.rm_so);
840325
-				ms->search.rm_len = CAST(size_t, 
840325
-				    pmatch.rm_eo - pmatch.rm_so);
840325
-				v = 0;
840325
-				break;
840325
+		    search = CCAST(char *, "");
840325
+		    copy = NULL;
840325
+		}
840325
+		rc = file_regexec(rx, RCAST(const char *, search),
840325
+		    1, &pmatch, 0);
840325
+		free(copy);
840325
+		switch (rc) {
840325
+		case 0:
840325
+			ms->search.s += CAST(int, pmatch.rm_so);
840325
+			ms->search.offset += CAST(size_t, pmatch.rm_so);
840325
+			ms->search.rm_len = CAST(size_t,
840325
+			    pmatch.rm_eo - pmatch.rm_so);
840325
+			v = 0;
840325
+			break;
840325
 
840325
-			case REG_NOMATCH:
840325
-				v = 1;
840325
-				break;
840325
+		case REG_NOMATCH:
840325
+			v = 1;
840325
+			break;
840325
 
840325
-			default:
840325
-				file_regerror(&rx, rc, ms);
840325
-				v = CAST(uint64_t, -1);
840325
-				break;
840325
-			}
840325
+		default:
840325
+			file_regerror(rx, rc, ms);
840325
+			v = CAST(uint64_t, -1);
840325
+			break;
840325
 		}
840325
-		file_regfree(&rx);
840325
 		if (v == CAST(uint64_t, -1))
840325
 			return -1;
840325
 		break;