|
|
652aeb |
From e49e11e069fe7f214263be1782242b9b50f71eaa Mon Sep 17 00:00:00 2001
|
|
|
652aeb |
From: Paul Smith <psmith@gnu.org>
|
|
|
652aeb |
Date: Thu, 12 Nov 2020 17:00:39 -0500
|
|
|
652aeb |
Subject: [SV 59093] Rewrite filter/filter-out to avoid large stack usage
|
|
|
652aeb |
|
|
|
652aeb |
* src/function.c (func_filter_filterout): Allocate arrays to hold
|
|
|
652aeb |
pattern and word information rather than creating linked lists on
|
|
|
652aeb |
the stack.
|
|
|
652aeb |
* tests/scripts/functions/filter-out: Test large filters.
|
|
|
652aeb |
|
|
|
652aeb |
diff --git a/src/function.c b/src/function.c
|
|
|
652aeb |
index 0917e0cd..5edfe8b3 100644
|
|
|
652aeb |
--- a/src/function.c
|
|
|
652aeb |
+++ b/src/function.c
|
|
|
652aeb |
@@ -910,7 +910,6 @@ func_foreach (char *o, char **argv, const char *funcname UNUSED)
|
|
|
652aeb |
|
|
|
652aeb |
struct a_word
|
|
|
652aeb |
{
|
|
|
652aeb |
- struct a_word *next;
|
|
|
652aeb |
struct a_word *chain;
|
|
|
652aeb |
char *str;
|
|
|
652aeb |
size_t length;
|
|
|
652aeb |
@@ -941,7 +940,6 @@ a_word_hash_cmp (const void *x, const void *y)
|
|
|
652aeb |
|
|
|
652aeb |
struct a_pattern
|
|
|
652aeb |
{
|
|
|
652aeb |
- struct a_pattern *next;
|
|
|
652aeb |
char *str;
|
|
|
652aeb |
char *percent;
|
|
|
652aeb |
size_t length;
|
|
|
652aeb |
@@ -950,78 +948,84 @@ struct a_pattern
|
|
|
652aeb |
static char *
|
|
|
652aeb |
func_filter_filterout (char *o, char **argv, const char *funcname)
|
|
|
652aeb |
{
|
|
|
652aeb |
- struct a_word *wordhead;
|
|
|
652aeb |
- struct a_word **wordtail;
|
|
|
652aeb |
+ struct a_word *words;
|
|
|
652aeb |
+ struct a_word *word_end;
|
|
|
652aeb |
struct a_word *wp;
|
|
|
652aeb |
- struct a_pattern *pathead;
|
|
|
652aeb |
- struct a_pattern **pattail;
|
|
|
652aeb |
+ struct a_pattern *patterns;
|
|
|
652aeb |
+ struct a_pattern *pat_end;
|
|
|
652aeb |
struct a_pattern *pp;
|
|
|
652aeb |
+ size_t pat_count = 0, word_count = 0;
|
|
|
652aeb |
|
|
|
652aeb |
struct hash_table a_word_table;
|
|
|
652aeb |
int is_filter = funcname[CSTRLEN ("filter")] == '\0';
|
|
|
652aeb |
- const char *pat_iterator = argv[0];
|
|
|
652aeb |
- const char *word_iterator = argv[1];
|
|
|
652aeb |
+ const char *cp;
|
|
|
652aeb |
int literals = 0;
|
|
|
652aeb |
- int words = 0;
|
|
|
652aeb |
int hashing = 0;
|
|
|
652aeb |
char *p;
|
|
|
652aeb |
size_t len;
|
|
|
652aeb |
+ int doneany = 0;
|
|
|
652aeb |
|
|
|
652aeb |
- /* Chop ARGV[0] up into patterns to match against the words.
|
|
|
652aeb |
- We don't need to preserve it because our caller frees all the
|
|
|
652aeb |
- argument memory anyway. */
|
|
|
652aeb |
+ /* Find the number of words and get memory for them. */
|
|
|
652aeb |
+ cp = argv[1];
|
|
|
652aeb |
+ while ((p = find_next_token (&cp, NULL)) != 0)
|
|
|
652aeb |
+ ++word_count;
|
|
|
652aeb |
|
|
|
652aeb |
- pattail = &pathead;
|
|
|
652aeb |
- while ((p = find_next_token (&pat_iterator, &len)) != 0)
|
|
|
652aeb |
- {
|
|
|
652aeb |
- struct a_pattern *pat = alloca (sizeof (struct a_pattern));
|
|
|
652aeb |
+ if (!word_count)
|
|
|
652aeb |
+ return o;
|
|
|
652aeb |
+
|
|
|
652aeb |
+ words = xcalloc (word_count * sizeof (struct a_word));
|
|
|
652aeb |
+ word_end = words + word_count;
|
|
|
652aeb |
|
|
|
652aeb |
- *pattail = pat;
|
|
|
652aeb |
- pattail = &pat->next;
|
|
|
652aeb |
+ /* Find the number of patterns and get memory for them. */
|
|
|
652aeb |
+ cp = argv[0];
|
|
|
652aeb |
+ while ((p = find_next_token (&cp, NULL)) != 0)
|
|
|
652aeb |
+ ++pat_count;
|
|
|
652aeb |
|
|
|
652aeb |
- if (*pat_iterator != '\0')
|
|
|
652aeb |
- ++pat_iterator;
|
|
|
652aeb |
+ patterns = xcalloc (pat_count * sizeof (struct a_pattern));
|
|
|
652aeb |
+ pat_end = patterns + pat_count;
|
|
|
652aeb |
+
|
|
|
652aeb |
+ /* Chop argv[0] up into patterns to match against the words. */
|
|
|
652aeb |
+
|
|
|
652aeb |
+ cp = argv[0];
|
|
|
652aeb |
+ pp = patterns;
|
|
|
652aeb |
+ while ((p = find_next_token (&cp, &len)) != 0)
|
|
|
652aeb |
+ {
|
|
|
652aeb |
+ if (*cp != '\0')
|
|
|
652aeb |
+ ++cp;
|
|
|
652aeb |
|
|
|
652aeb |
- pat->str = p;
|
|
|
652aeb |
p[len] = '\0';
|
|
|
652aeb |
- pat->percent = find_percent (p);
|
|
|
652aeb |
- if (pat->percent == 0)
|
|
|
652aeb |
+ pp->str = p;
|
|
|
652aeb |
+ pp->percent = find_percent (p);
|
|
|
652aeb |
+ if (pp->percent == 0)
|
|
|
652aeb |
literals++;
|
|
|
652aeb |
-
|
|
|
652aeb |
/* find_percent() might shorten the string so LEN is wrong. */
|
|
|
652aeb |
- pat->length = strlen (pat->str);
|
|
|
652aeb |
+ pp->length = strlen (pp->str);
|
|
|
652aeb |
+
|
|
|
652aeb |
+ ++pp;
|
|
|
652aeb |
}
|
|
|
652aeb |
- *pattail = 0;
|
|
|
652aeb |
|
|
|
652aeb |
/* Chop ARGV[1] up into words to match against the patterns. */
|
|
|
652aeb |
|
|
|
652aeb |
- wordtail = &wordhead;
|
|
|
652aeb |
- while ((p = find_next_token (&word_iterator, &len)) != 0)
|
|
|
652aeb |
+ cp = argv[1];
|
|
|
652aeb |
+ wp = words;
|
|
|
652aeb |
+ while ((p = find_next_token (&cp, &len)) != 0)
|
|
|
652aeb |
{
|
|
|
652aeb |
- struct a_word *word = alloca (sizeof (struct a_word));
|
|
|
652aeb |
-
|
|
|
652aeb |
- *wordtail = word;
|
|
|
652aeb |
- wordtail = &word->next;
|
|
|
652aeb |
-
|
|
|
652aeb |
- if (*word_iterator != '\0')
|
|
|
652aeb |
- ++word_iterator;
|
|
|
652aeb |
+ if (*cp != '\0')
|
|
|
652aeb |
+ ++cp;
|
|
|
652aeb |
|
|
|
652aeb |
p[len] = '\0';
|
|
|
652aeb |
- word->str = p;
|
|
|
652aeb |
- word->length = len;
|
|
|
652aeb |
- word->matched = 0;
|
|
|
652aeb |
- word->chain = 0;
|
|
|
652aeb |
- words++;
|
|
|
652aeb |
+ wp->str = p;
|
|
|
652aeb |
+ wp->length = len;
|
|
|
652aeb |
+ ++wp;
|
|
|
652aeb |
}
|
|
|
652aeb |
- *wordtail = 0;
|
|
|
652aeb |
|
|
|
652aeb |
/* Only use a hash table if arg list lengths justifies the cost. */
|
|
|
652aeb |
- hashing = (literals >= 2 && (literals * words) >= 10);
|
|
|
652aeb |
+ hashing = (literals > 1 && (literals * word_count) >= 10);
|
|
|
652aeb |
if (hashing)
|
|
|
652aeb |
{
|
|
|
652aeb |
- hash_init (&a_word_table, words, a_word_hash_1, a_word_hash_2,
|
|
|
652aeb |
+ hash_init (&a_word_table, word_count, a_word_hash_1, a_word_hash_2,
|
|
|
652aeb |
a_word_hash_cmp);
|
|
|
652aeb |
- for (wp = wordhead; wp != 0; wp = wp->next)
|
|
|
652aeb |
+ for (wp = words; wp < word_end; ++wp)
|
|
|
652aeb |
{
|
|
|
652aeb |
struct a_word *owp = hash_insert (&a_word_table, wp);
|
|
|
652aeb |
if (owp)
|
|
|
652aeb |
@@ -1029,51 +1033,49 @@ func_filter_filterout (char *o, char **argv, const char *funcname)
|
|
|
652aeb |
}
|
|
|
652aeb |
}
|
|
|
652aeb |
|
|
|
652aeb |
- if (words)
|
|
|
652aeb |
+ /* Run each pattern through the words, killing words. */
|
|
|
652aeb |
+ for (pp = patterns; pp < pat_end; ++pp)
|
|
|
652aeb |
{
|
|
|
652aeb |
- int doneany = 0;
|
|
|
652aeb |
-
|
|
|
652aeb |
- /* Run each pattern through the words, killing words. */
|
|
|
652aeb |
- for (pp = pathead; pp != 0; pp = pp->next)
|
|
|
652aeb |
+ if (pp->percent)
|
|
|
652aeb |
+ for (wp = words; wp < word_end; ++wp)
|
|
|
652aeb |
+ wp->matched |= pattern_matches (pp->str, pp->percent, wp->str);
|
|
|
652aeb |
+ else if (hashing)
|
|
|
652aeb |
{
|
|
|
652aeb |
- if (pp->percent)
|
|
|
652aeb |
- for (wp = wordhead; wp != 0; wp = wp->next)
|
|
|
652aeb |
- wp->matched |= pattern_matches (pp->str, pp->percent, wp->str);
|
|
|
652aeb |
- else if (hashing)
|
|
|
652aeb |
+ struct a_word a_word_key;
|
|
|
652aeb |
+ a_word_key.str = pp->str;
|
|
|
652aeb |
+ a_word_key.length = pp->length;
|
|
|
652aeb |
+ wp = hash_find_item (&a_word_table, &a_word_key);
|
|
|
652aeb |
+ while (wp)
|
|
|
652aeb |
{
|
|
|
652aeb |
- struct a_word a_word_key;
|
|
|
652aeb |
- a_word_key.str = pp->str;
|
|
|
652aeb |
- a_word_key.length = pp->length;
|
|
|
652aeb |
- wp = hash_find_item (&a_word_table, &a_word_key);
|
|
|
652aeb |
- while (wp)
|
|
|
652aeb |
- {
|
|
|
652aeb |
- wp->matched |= 1;
|
|
|
652aeb |
- wp = wp->chain;
|
|
|
652aeb |
- }
|
|
|
652aeb |
+ wp->matched |= 1;
|
|
|
652aeb |
+ wp = wp->chain;
|
|
|
652aeb |
}
|
|
|
652aeb |
- else
|
|
|
652aeb |
- for (wp = wordhead; wp != 0; wp = wp->next)
|
|
|
652aeb |
- wp->matched |= (wp->length == pp->length
|
|
|
652aeb |
- && strneq (pp->str, wp->str, wp->length));
|
|
|
652aeb |
}
|
|
|
652aeb |
+ else
|
|
|
652aeb |
+ for (wp = words; wp < word_end; ++wp)
|
|
|
652aeb |
+ wp->matched |= (wp->length == pp->length
|
|
|
652aeb |
+ && strneq (pp->str, wp->str, wp->length));
|
|
|
652aeb |
+ }
|
|
|
652aeb |
|
|
|
652aeb |
- /* Output the words that matched (or didn't, for filter-out). */
|
|
|
652aeb |
- for (wp = wordhead; wp != 0; wp = wp->next)
|
|
|
652aeb |
- if (is_filter ? wp->matched : !wp->matched)
|
|
|
652aeb |
- {
|
|
|
652aeb |
- o = variable_buffer_output (o, wp->str, strlen (wp->str));
|
|
|
652aeb |
- o = variable_buffer_output (o, " ", 1);
|
|
|
652aeb |
- doneany = 1;
|
|
|
652aeb |
- }
|
|
|
652aeb |
+ /* Output the words that matched (or didn't, for filter-out). */
|
|
|
652aeb |
+ for (wp = words; wp < word_end; ++wp)
|
|
|
652aeb |
+ if (is_filter ? wp->matched : !wp->matched)
|
|
|
652aeb |
+ {
|
|
|
652aeb |
+ o = variable_buffer_output (o, wp->str, strlen (wp->str));
|
|
|
652aeb |
+ o = variable_buffer_output (o, " ", 1);
|
|
|
652aeb |
+ doneany = 1;
|
|
|
652aeb |
+ }
|
|
|
652aeb |
|
|
|
652aeb |
- if (doneany)
|
|
|
652aeb |
- /* Kill the last space. */
|
|
|
652aeb |
- --o;
|
|
|
652aeb |
- }
|
|
|
652aeb |
+ if (doneany)
|
|
|
652aeb |
+ /* Kill the last space. */
|
|
|
652aeb |
+ --o;
|
|
|
652aeb |
|
|
|
652aeb |
if (hashing)
|
|
|
652aeb |
hash_free (&a_word_table, 0);
|
|
|
652aeb |
|
|
|
652aeb |
+ free (patterns);
|
|
|
652aeb |
+ free (words);
|
|
|
652aeb |
+
|
|
|
652aeb |
return o;
|
|
|
652aeb |
}
|
|
|
652aeb |
|
|
|
652aeb |
diff --git a/tests/scripts/functions/filter-out b/tests/scripts/functions/filter-out
|
|
|
652aeb |
index 1fe4819d..dec5343e 100644
|
|
|
652aeb |
--- a/tests/scripts/functions/filter-out
|
|
|
652aeb |
+++ b/tests/scripts/functions/filter-out
|
|
|
652aeb |
@@ -27,6 +27,22 @@ all: ; @echo '$(files1) $(files2)'
|
|
|
652aeb |
!,
|
|
|
652aeb |
'', "foo.elc foo.elc\n");
|
|
|
652aeb |
|
|
|
652aeb |
+# Force use of hash (see function.c:func_filter_filterout for params)
|
|
|
652aeb |
+
|
|
|
652aeb |
+my $base = 'foo.1 foo.2 foo.3 foo.4 foo.5 foo.6 foo.7 foo.8 foo.9 foo.10';
|
|
|
652aeb |
+
|
|
|
652aeb |
+my $base10 = join(' ', ($base) x 10);
|
|
|
652aeb |
+my $out3 = join(' ', ('foo.3') x 10);
|
|
|
652aeb |
+my $out456 = join(' ', ('foo.4 foo.5 foo.6') x 10);
|
|
|
652aeb |
+
|
|
|
652aeb |
+run_make_test("words := $base10" . q!
|
|
|
652aeb |
+files1 := $(filter %.3, $(words))
|
|
|
652aeb |
+files2 := $(filter %.4 foo.5 foo.6, $(words))
|
|
|
652aeb |
+all: ; @echo '$(files1) $(files2)'
|
|
|
652aeb |
+!,
|
|
|
652aeb |
+ '', "$out3 $out456\n");
|
|
|
652aeb |
+
|
|
|
652aeb |
+
|
|
|
652aeb |
# Escaped patterns
|
|
|
652aeb |
run_make_test(q!all:;@echo '$(filter foo\%bar,foo%bar fooXbar)'!,
|
|
|
652aeb |
'', "foo%bar\n");
|