Blame SOURCES/0172-lib-add-function-for-removing-userinfo-from-URIs.patch

28bab8
From 3e5fb3b7d678786dfd98b412e37f4757c7584aba Mon Sep 17 00:00:00 2001
28bab8
From: Jakub Filak <jfilak@redhat.com>
28bab8
Date: Wed, 21 Oct 2015 14:20:04 +0200
28bab8
Subject: [PATCH] lib: add function for removing userinfo from URIs
28bab8
28bab8
The function expects a valid URL.
28bab8
28bab8
Signed-off-by: Jakub Filak <jfilak@redhat.com>
28bab8
28bab8
Conflicts:
28bab8
	src/lib/Makefile.am
28bab8
---
28bab8
 src/include/internal_libreport.h |  22 ++++++
28bab8
 src/lib/Makefile.am              |   3 +-
28bab8
 src/lib/uriparser.c              | 166 +++++++++++++++++++++++++++++++++++++++
28bab8
 tests/Makefile.am                |   3 +-
28bab8
 tests/testsuite.at               |   1 +
28bab8
 tests/uriparser.at               | 144 +++++++++++++++++++++++++++++++++
28bab8
 6 files changed, 337 insertions(+), 2 deletions(-)
28bab8
 create mode 100644 src/lib/uriparser.c
28bab8
 create mode 100644 tests/uriparser.at
28bab8
28bab8
diff --git a/src/include/internal_libreport.h b/src/include/internal_libreport.h
28bab8
index 78a17ae..651e339 100644
28bab8
--- a/src/include/internal_libreport.h
28bab8
+++ b/src/include/internal_libreport.h
28bab8
@@ -1043,6 +1043,28 @@ void show_usage_and_die(const char *usage, const struct options *opt) NORETURN;
28bab8
  */
28bab8
 struct abrt_post_state;
28bab8
 
28bab8
+/* Decomposes uri to its base elements, removes userinfo out of the hostname and
28bab8
+ * composes a new uri without userinfo.
28bab8
+ *
28bab8
+ * The function does not validate the url.
28bab8
+ *
28bab8
+ * @param uri The uri that might contain userinfo
28bab8
+ * @param result The userinfo free uri will be store here. Cannot be null. Must
28bab8
+ * be de-allocated by free.
28bab8
+ * @param scheme Scheme of the uri. Can be NULL. Result can be NULL. Result
28bab8
+ * must be de-allocated by free.
28bab8
+ * @param hostname Hostname of the uri. Can be NULL. Result can be NULL. Result
28bab8
+ * must be de-allocated by free.
28bab8
+ * @param username Username of the uri. Can be NULL. Result can be NULL. Result
28bab8
+ * must be de-allocated by free.
28bab8
+ * @param password Password of the uri. Can be NULL. Result can be NULL. Result
28bab8
+ * must be de-allocated by free.
28bab8
+ * @param location Location of the uri. Can be NULL. Result is never NULL. Result
28bab8
+ * must be de-allocated by free.
28bab8
+ */
28bab8
+#define uri_userinfo_remove libreport_uri_userinfo_remove
28bab8
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location);
28bab8
+
28bab8
 #ifdef __cplusplus
28bab8
 }
28bab8
 #endif
28bab8
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
28bab8
index 50142f7..b7e4781 100644
28bab8
--- a/src/lib/Makefile.am
28bab8
+++ b/src/lib/Makefile.am
28bab8
@@ -56,7 +56,8 @@ libreport_la_SOURCES = \
28bab8
     config_item_info.c \
28bab8
     xml_parser.c \
28bab8
     libreport_init.c \
28bab8
-    global_configuration.c
28bab8
+    global_configuration.c \
28bab8
+    uriparser.c
28bab8
 
28bab8
 libreport_la_CPPFLAGS = \
28bab8
     -I$(srcdir)/../include \
28bab8
diff --git a/src/lib/uriparser.c b/src/lib/uriparser.c
28bab8
new file mode 100644
28bab8
index 0000000..01e9782
28bab8
--- /dev/null
28bab8
+++ b/src/lib/uriparser.c
28bab8
@@ -0,0 +1,166 @@
28bab8
+/*
28bab8
+    Copyright (C) 2015  ABRT team
28bab8
+    Copyright (C) 2015  RedHat Inc
28bab8
+
28bab8
+    This program is free software; you can redistribute it and/or modify
28bab8
+    it under the terms of the GNU General Public License as published by
28bab8
+    the Free Software Foundation; either version 2 of the License, or
28bab8
+    (at your option) any later version.
28bab8
+
28bab8
+    This program is distributed in the hope that it will be useful,
28bab8
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
28bab8
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
28bab8
+    GNU General Public License for more details.
28bab8
+
28bab8
+    You should have received a copy of the GNU General Public License along
28bab8
+    with this program; if not, write to the Free Software Foundation, Inc.,
28bab8
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
28bab8
+*/
28bab8
+
28bab8
+#include "internal_libreport.h"
28bab8
+
28bab8
+#include <regex.h>
28bab8
+
28bab8
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location)
28bab8
+{
28bab8
+    /* https://www.ietf.org/rfc/rfc3986.txt
28bab8
+     * Appendix B.  Parsing a URI Reference with a Regular Expression
28bab8
+     *
28bab8
+     * scheme    = $2
28bab8
+     * authority = $4
28bab8
+     * location  = $5 <- introduced by jfilak
28bab8
+     * path      = $6
28bab8
+     * query     = $8
28bab8
+     * fragment  = $10
28bab8
+     *                         12            3  4          56       7   8        9 10 */
28bab8
+    const char *rfc3986_rx = "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)$";
28bab8
+    regex_t re;
28bab8
+    int r = regcomp(&re, rfc3986_rx, REG_EXTENDED);
28bab8
+    assert(r == 0 || !"BUG: invalid regular expression");
28bab8
+
28bab8
+    regmatch_t matchptr[10];
28bab8
+    r = regexec(&re, uri, ARRAY_SIZE(matchptr), matchptr, 0);
28bab8
+    if (r != 0)
28bab8
+    {
28bab8
+        log_debug("URI does not match RFC3986 regular expression.");
28bab8
+        return -EINVAL;
28bab8
+    }
28bab8
+
28bab8
+    char *ptr = xzalloc((strlen(uri) + 1) * sizeof(char));
28bab8
+    *result = ptr;
28bab8
+    if (scheme != NULL)
28bab8
+        *scheme = NULL;
28bab8
+    if (hostname != NULL)
28bab8
+        *hostname = NULL;
28bab8
+    if (username != NULL)
28bab8
+        *username = NULL;
28bab8
+    if (password != NULL)
28bab8
+        *password = NULL;
28bab8
+    if (location != NULL)
28bab8
+        *location= NULL;
28bab8
+
28bab8
+    /* https://www.ietf.org/rfc/rfc3986.txt
28bab8
+     * 5.3.  Component Recomposition
28bab8
+     *
28bab8
+      result = ""
28bab8
+
28bab8
+      if defined(scheme) then
28bab8
+         append scheme to result;
28bab8
+         append ":" to result;
28bab8
+      endif;
28bab8
+
28bab8
+      if defined(authority) then
28bab8
+         append "//" to result;
28bab8
+         append authority to result;
28bab8
+      endif;
28bab8
+
28bab8
+      append path to result;
28bab8
+
28bab8
+      if defined(query) then
28bab8
+         append "?" to result;
28bab8
+         append query to result;
28bab8
+      endif;
28bab8
+
28bab8
+      if defined(fragment) then
28bab8
+         append "#" to result;
28bab8
+         append fragment to result;
28bab8
+      endif;
28bab8
+
28bab8
+      return result;
28bab8
+    */
28bab8
+
28bab8
+#define APPEND_MATCH(i, output) \
28bab8
+    if (matchptr[(i)].rm_so != -1) \
28bab8
+    { \
28bab8
+        size_t len = 0; \
28bab8
+        len = matchptr[(i)].rm_eo - matchptr[(i)].rm_so; \
28bab8
+        if (output) *output = xstrndup(uri + matchptr[(i)].rm_so, len); \
28bab8
+        strncpy(ptr, uri + matchptr[(i)].rm_so, len); \
28bab8
+        ptr += len; \
28bab8
+    }
28bab8
+
28bab8
+    /* Append "scheme:" if defined */
28bab8
+    APPEND_MATCH(1, scheme);
28bab8
+
28bab8
+    /* If authority is defined, append "//" */
28bab8
+    regmatch_t *match_authority = matchptr + 3;
28bab8
+    if (match_authority->rm_so != -1)
28bab8
+    {
28bab8
+        strcat(ptr, "//");
28bab8
+        ptr += 2;
28bab8
+    }
28bab8
+
28bab8
+    ++match_authority;
28bab8
+    /* If authority has address part, remove userinfo and add the address */
28bab8
+    if (match_authority->rm_so != -1)
28bab8
+    {
28bab8
+        size_t len = match_authority->rm_eo - match_authority->rm_so;
28bab8
+        const char *authority = uri + match_authority->rm_so;
28bab8
+
28bab8
+        /* Find the last '@'. Just for the case some used @ in username or
28bab8
+         * password */
28bab8
+        size_t at = len;
28bab8
+        while (at != 0)
28bab8
+        {
28bab8
+            if (authority[--at] != '@')
28bab8
+                continue;
28bab8
+
28bab8
+            /* Find the first ':' before @. There should not be more ':' but this
28bab8
+             * is the most secure way -> avoid leaking an excerpt of a password
28bab8
+             * containing ':'.*/
28bab8
+            size_t colon = 0;
28bab8
+            while (colon < at)
28bab8
+            {
28bab8
+                if (authority[colon] != ':')
28bab8
+                {
28bab8
+                    ++colon;
28bab8
+                    continue;
28bab8
+                }
28bab8
+
28bab8
+                if (password != NULL)
28bab8
+                    *password = xstrndup(authority + colon + 1, at - colon - 1);
28bab8
+
28bab8
+                break;
28bab8
+            }
28bab8
+
28bab8
+            if (username != NULL)
28bab8
+                *username = xstrndup(authority, colon);
28bab8
+
28bab8
+            ++at;
28bab8
+            break;
28bab8
+        }
28bab8
+
28bab8
+        len -= at;
28bab8
+
28bab8
+        if (hostname != NULL)
28bab8
+            *hostname = xstrndup(authority + at, len);
28bab8
+
28bab8
+        strncpy(ptr, authority + at, len);
28bab8
+        ptr += len;
28bab8
+    }
28bab8
+
28bab8
+    /* Append path, query and fragment or "" */
28bab8
+    APPEND_MATCH(5, location);
28bab8
+
28bab8
+    return 0;
28bab8
+}
28bab8
diff --git a/tests/Makefile.am b/tests/Makefile.am
28bab8
index f36ab57..c22958b 100644
28bab8
--- a/tests/Makefile.am
28bab8
+++ b/tests/Makefile.am
28bab8
@@ -45,7 +45,8 @@ TESTSUITE_AT = \
28bab8
   ureport.at \
28bab8
   dump_dir.at \
28bab8
   global_config.at \
28bab8
-  iso_date.at
28bab8
+  iso_date.at \
28bab8
+  uriparser.at
28bab8
 
28bab8
 EXTRA_DIST += $(TESTSUITE_AT)
28bab8
 TESTSUITE = $(srcdir)/testsuite
28bab8
diff --git a/tests/testsuite.at b/tests/testsuite.at
28bab8
index e5e2f72..72e0715 100644
28bab8
--- a/tests/testsuite.at
28bab8
+++ b/tests/testsuite.at
28bab8
@@ -20,3 +20,4 @@ m4_include([ureport.at])
28bab8
 m4_include([dump_dir.at])
28bab8
 m4_include([global_config.at])
28bab8
 m4_include([iso_date.at])
28bab8
+m4_include([uriparser.at])
28bab8
diff --git a/tests/uriparser.at b/tests/uriparser.at
28bab8
new file mode 100644
28bab8
index 0000000..def021f
28bab8
--- /dev/null
28bab8
+++ b/tests/uriparser.at
28bab8
@@ -0,0 +1,144 @@
28bab8
+# -*- Autotest -*-
28bab8
+
28bab8
+AT_BANNER([uriparser])
28bab8
+
28bab8
+## ------------------- ##
28bab8
+## uri_userinfo_remove ##
28bab8
+## ------------------- ##
28bab8
+
28bab8
+AT_TESTFUN([uri_userinfo_remove],
28bab8
+[[#include "internal_libreport.h"
28bab8
+#include <assert.h>
28bab8
+#include <string.h>
28bab8
+#include <stdio.h>
28bab8
+
28bab8
+bool string_cmp(const char *message, const char *orig, const char *other)
28bab8
+{
28bab8
+    if (orig == NULL && other != NULL)
28bab8
+    {
28bab8
+        printf("%s: expected NULL got '%s'\n", message, other);
28bab8
+        return false;
28bab8
+    }
28bab8
+
28bab8
+    if (orig != NULL && other == NULL)
28bab8
+    {
28bab8
+        printf("%s: expected '%s' got NULL\n", message, orig);
28bab8
+        return false;
28bab8
+    }
28bab8
+
28bab8
+    if (orig == NULL && other == NULL)
28bab8
+        return true;
28bab8
+
28bab8
+    if (strcmp(orig, other) == 0)
28bab8
+        return true;
28bab8
+
28bab8
+    printf("%s: '%s' != '%s'\n", message, orig, other);
28bab8
+    return false;
28bab8
+}
28bab8
+
28bab8
+int test(int retval, const char *uri, const char *result, const char *scheme, const char *hostname, const char *username, const char *password, const char *location)
28bab8
+{
28bab8
+    int e = 0;
28bab8
+    const char *names[] = {"result", "scheme", "hostname", "username", "password", "location"} ;
28bab8
+    char *outputs[6];
28bab8
+    const char *expected[6];
28bab8
+
28bab8
+    for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
28bab8
+        outputs[i] = (char *)0xDEADBEEF;
28bab8
+
28bab8
+    expected[0] = result;
28bab8
+    expected[1] = scheme;
28bab8
+    expected[2] = hostname;
28bab8
+    expected[3] = username;
28bab8
+    expected[4] = password;
28bab8
+    expected[5] = location;
28bab8
+
28bab8
+    fprintf(stderr, "==== Testing: '%s'\n", uri);
28bab8
+    fprintf(stdout, "==== Testing: '%s'\n", uri);
28bab8
+
28bab8
+    int r = uri_userinfo_remove(uri, &outputs[0], &outputs[1], &outputs[2], &outputs[3], &outputs[4], &outputs[5]);
28bab8
+    if (r != retval)
28bab8
+    {
28bab8
+        printf("Invalid retval %d != %d\n", retval, r);
28bab8
+        ++e;
28bab8
+    }
28bab8
+
28bab8
+    if (r != -EINVAL)
28bab8
+    {
28bab8
+        for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
28bab8
+        {
28bab8
+            if (outputs[i] == (char *)0xDEADBEEF)
28bab8
+            {
28bab8
+                printf("Not initialized argument '%s'\n", names[i]);
28bab8
+                ++e;
28bab8
+            }
28bab8
+            else
28bab8
+            {
28bab8
+                e += !string_cmp(names[i], expected[i], outputs[i]);
28bab8
+                free(outputs[i]);
28bab8
+                outputs[i] = (char *)0xDEADBEEF;
28bab8
+            }
28bab8
+        }
28bab8
+    }
28bab8
+    else
28bab8
+    {
28bab8
+        for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
28bab8
+        {
28bab8
+            if (outputs[i] != (char *)0xDEADBEEF)
28bab8
+            {
28bab8
+                printf("Touched argument '%s'\n", names[i]);
28bab8
+                ++e;
28bab8
+            }
28bab8
+        }
28bab8
+    }
28bab8
+
28bab8
+    fprintf(stderr, "== Test without arguments\n");
28bab8
+    fprintf(stdout, "== Test without arguments\n");
28bab8
+
28bab8
+
28bab8
+    r = uri_userinfo_remove(uri, &outputs[0], NULL, NULL, NULL, NULL, NULL);
28bab8
+    if (r != retval)
28bab8
+    {
28bab8
+        printf("Invalid retval without arguments: %d != %d\n", retval, r);
28bab8
+        ++e;
28bab8
+    }
28bab8
+
28bab8
+    e += !string_cmp(names[0], result, outputs[0]);
28bab8
+    free(outputs[0]);
28bab8
+
28bab8
+    return e;
28bab8
+}
28bab8
+
28bab8
+int main(void)
28bab8
+{
28bab8
+    g_verbose=3;
28bab8
+
28bab8
+    int e = 0;
28bab8
+    e += test(      0, "ftp://root:password@", "ftp://", "ftp:", "", "root", "password", "");
28bab8
+    e += test(      0, "ftp://root:password@/", "ftp:///", "ftp:", "", "root", "password", "/");
28bab8
+    e += test(      0, "ftp://root:password@/foo", "ftp:///foo", "ftp:", "", "root", "password", "/foo");
28bab8
+    e += test(      0, "ftp://@", "ftp://", "ftp:", "", "", NULL, "");
28bab8
+    e += test(      0, "ftp://@/", "ftp:///", "ftp:", "", "", NULL, "/");
28bab8
+    e += test(      0, "ftp://@/foo", "ftp:///foo", "ftp:", "", "", NULL, "/foo");
28bab8
+    e += test(      0, "ftp://:@", "ftp://", "ftp:", "", "", "", "");
28bab8
+    e += test(      0, "ftp://:@/", "ftp:///", "ftp:", "", "", "", "/");
28bab8
+    e += test(      0, "ftp://:@/foo", "ftp:///foo", "ftp:", "", "", "", "/foo");
28bab8
+    e += test(      0, "root:password", "root:password", "root:", NULL, NULL, NULL, "password");
28bab8
+    e += test(      0, "root:password@", "root:password@", "root:", NULL, NULL, NULL, "password@");
28bab8
+    e += test(      0, "ftp://root:password", "ftp://root:password", "ftp:", "root:password", NULL, NULL, "");
28bab8
+    e += test(      0, "scp:://root:password@localhost", "scp:://root:password@localhost", "scp:", NULL, NULL, NULL, "://root:password@localhost");
28bab8
+    e += test(      0, "scp:///root:password@localhost", "scp:///root:password@localhost", "scp:", "", NULL, NULL, "/root:password@localhost");
28bab8
+    e += test(      0, "ftp://root:password/", "ftp://root:password/", "ftp:", "root:password", NULL, NULL, "/");
28bab8
+    e += test(      0, "scp://B@rt:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "P@ssw0rd", "/t@rget1?query=foo#head");
28bab8
+    e += test(      0, "scp://B@rt@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", NULL, "/t@rget1?query=foo#head");
28bab8
+    e += test(      0, "scp://B@rt:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "", "/t@rget1?query=foo#head");
28bab8
+    e += test(      0, "scp://:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "P@ssw0rd", "/t@rget1?query=foo#head");
28bab8
+    e += test(      0, "scp://@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", NULL, "/t@rget1?query=foo#head");
28bab8
+    e += test(      0, "scp://:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "", "/t@rget1?query=foo#head");
28bab8
+    e += test(      0, "password/root", "password/root", NULL, NULL, NULL, NULL, "password/root");
28bab8
+    e += test(      0, "/password/root", "/password/root", NULL, NULL, NULL, NULL, "/password/root");
28bab8
+    e += test(      0, "://root:passowrd@localhost", "://root:passowrd@localhost", NULL, NULL, NULL, NULL, "://root:passowrd@localhost");
28bab8
+
28bab8
+    return e;
28bab8
+}
28bab8
+]])
28bab8
-- 
28bab8
1.8.3.1
28bab8