Blob Blame History Raw
From d157f330acfe94a1f61bf766b485beb0e0dd7177 Mon Sep 17 00:00:00 2001
From: David Lutterkort <lutter@watzmann.net>
Date: Fri, 4 Aug 2017 17:13:52 -0700
Subject: [PATCH] * src/pathx.c (parse_name): correctly handle trailing
 whitespace in names

When a name ended in whitespace, we incorrectly assumed it was always ok to
trim that whitespace. That is not true if that whitespace is escaped,
i.e. if the path expression is something like '/x\ '. In that case, the
name really needs to be literally 'x ', i.e., we can not trim that
whitespace.

The incorrect behavior led to turning '/x\ ' first into 'x\' and then,
because we assume that '\' is always followed by a character inside the
string, when we removed the escaping '\', we would read beyond the end of
the intermediate string result; if we were lucky, that would lead to a
crash, otherwise we'd continue with junk.

We now make sure that escaped whitespace at the end of a string does not
get stripped, avoiding all these headaches.

Fixes RHBZ https://bugzilla.redhat.com/show_bug.cgi?id=1475621
---
 src/pathx.c        | 27 +++++++++++++++++++------
 tests/test-xpath.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/src/pathx.c b/src/pathx.c
index 8d8dbbbe..a726a032 100644
--- a/src/pathx.c
+++ b/src/pathx.c
@@ -1643,6 +1643,16 @@ int pathx_escape_name(const char *in, char **out) {
     return 0;
 }
 
+/* Return true if POS is preceded by an odd number of backslashes, i.e., if
+ * POS is escaped. Stop the search when we get to START */
+static bool backslash_escaped(const char *pos, const char *start) {
+    bool result=false;
+    while (pos-- > start && *pos == '\\') {
+        result = !result;
+    }
+    return result;
+}
+
 /*
  * NameNoWS ::= [^][|/\= \t\n] | \\.
  * NameWS   ::= [^][|/\=] | \\.
@@ -1652,11 +1662,14 @@ static char *parse_name(struct state *state) {
     const char *s = state->pos;
     char *result;
 
+    /* Advance state->pos until it points to the first character that is
+     * not part of a name. */
     while (*state->pos != '\0' && strchr(name_follow, *state->pos) == NULL) {
-        /* This is a hack: since we allow spaces in names, we need to avoid
-         * gobbling up stuff that is in follow(Name), e.g. 'or' so that
-         * things like [name1 or name2] still work.
-         */
+        /* Since we allow spaces in names, we need to avoid gobbling up
+         * stuff that is in follow(Name), e.g. 'or' so that things like
+         * [name1 or name2] still work. In other words, we'll parse 'x frob
+         * y' as one name, but for 'x or y', we consider 'x' a name in its
+         * own right. */
         if (STREQLEN(state->pos, " or ", strlen(" or ")) ||
             STREQLEN(state->pos, " and ", strlen(" and ")))
             break;
@@ -1671,10 +1684,12 @@ static char *parse_name(struct state *state) {
         state->pos += 1;
     }
 
-    /* Strip trailing white space */
+    /* Strip trailing white space. Make sure we respect escaped whitespace
+     * and don't strip it as in "x\\ " */
     if (state->pos > s) {
         state->pos -= 1;
-        while (isspace(*state->pos) && state->pos >= s)
+        while (isspace(*state->pos) && state->pos > s
+               && !backslash_escaped(state->pos, s))
             state->pos -= 1;
         state->pos += 1;
     }
diff --git a/tests/test-xpath.c b/tests/test-xpath.c
index 335e7bf8..dbba29e0 100644
--- a/tests/test-xpath.c
+++ b/tests/test-xpath.c
@@ -331,6 +331,62 @@ static int test_wrong_regexp_flag(struct augeas *aug) {
     return -1;
 }
 
+static int test_trailing_ws_in_name(struct augeas *aug) {
+    int r;
+
+    printf("%-30s ... ", "trailing_ws_in_name");
+
+    /* We used to incorrectly lop escaped whitespace off the end of a
+     * name. Make sure that we really create a tree node with label 'x '
+     * with the below set, and look for it in a number of ways to ensure we
+     * are not lopping off trailing whitespace. */
+    r = aug_set(aug, "/ws\\ ", "1");
+    if (r < 0) {
+        fprintf(stderr, "failed to set '/ws ': %d\n", r);
+        goto fail;
+    }
+    /* We did not create a node with label 'ws' */
+    r = aug_get(aug, "/ws", NULL);
+    if (r != 0) {
+        fprintf(stderr, "created '/ws' instead: %d\n", r);
+        goto fail;
+    }
+
+    /* We did not create a node with label 'ws\t' (this also checks that we
+     * don't create something like 'ws\\' by dropping the last whitespace
+     * character. */
+    r = aug_get(aug, "/ws\\\t", NULL);
+    if (r != 0) {
+        fprintf(stderr, "found '/ws\\t': %d\n", r);
+        goto fail;
+    }
+
+    /* But we did create 'ws ' */
+    r = aug_get(aug, "/ws\\ ", NULL);
+    if (r != 1) {
+        fprintf(stderr, "could not find '/ws ': %d\n", r);
+        goto fail;
+    }
+
+    /* If the whitespace is preceded by an even number of '\\' chars,
+     * whitespace must be stripped */
+    r = aug_set(aug, "/nows\\\\ ", "1");
+    if (r < 0) {
+        fprintf(stderr, "set of '/nows' failed: %d\n", r);
+        goto fail;
+    }
+    r = aug_get(aug, "/nows\\\\", NULL);
+    if (r != 1) {
+        fprintf(stderr, "could not get '/nows\\'\n");
+        goto fail;
+    }
+    printf("PASS\n");
+    return 0;
+ fail:
+    printf("FAIL\n");
+    return -1;
+}
+
 static int run_tests(struct test *tests, int argc, char **argv) {
     char *lensdir;
     struct augeas *aug = NULL;
@@ -374,6 +430,9 @@ static int run_tests(struct test *tests, int argc, char **argv) {
 
         if (test_wrong_regexp_flag(aug) < 0)
             result = EXIT_FAILURE;
+
+        if (test_trailing_ws_in_name(aug) < 0)
+            result = EXIT_FAILURE;
     }
     aug_close(aug);
 
-- 
2.13.6