Blame SOURCES/0172-lib-add-function-for-removing-userinfo-from-URIs.patch

2c83a8
From 3e5fb3b7d678786dfd98b412e37f4757c7584aba Mon Sep 17 00:00:00 2001
2c83a8
From: Jakub Filak <jfilak@redhat.com>
2c83a8
Date: Wed, 21 Oct 2015 14:20:04 +0200
2c83a8
Subject: [PATCH] lib: add function for removing userinfo from URIs
2c83a8
2c83a8
The function expects a valid URL.
2c83a8
2c83a8
Signed-off-by: Jakub Filak <jfilak@redhat.com>
2c83a8
2c83a8
Conflicts:
2c83a8
	src/lib/Makefile.am
2c83a8
---
2c83a8
 src/include/internal_libreport.h |  22 ++++++
2c83a8
 src/lib/Makefile.am              |   3 +-
2c83a8
 src/lib/uriparser.c              | 166 +++++++++++++++++++++++++++++++++++++++
2c83a8
 tests/Makefile.am                |   3 +-
2c83a8
 tests/testsuite.at               |   1 +
2c83a8
 tests/uriparser.at               | 144 +++++++++++++++++++++++++++++++++
2c83a8
 6 files changed, 337 insertions(+), 2 deletions(-)
2c83a8
 create mode 100644 src/lib/uriparser.c
2c83a8
 create mode 100644 tests/uriparser.at
2c83a8
2c83a8
diff --git a/src/include/internal_libreport.h b/src/include/internal_libreport.h
2c83a8
index 78a17ae..651e339 100644
2c83a8
--- a/src/include/internal_libreport.h
2c83a8
+++ b/src/include/internal_libreport.h
2c83a8
@@ -1043,6 +1043,28 @@ void show_usage_and_die(const char *usage, const struct options *opt) NORETURN;
2c83a8
  */
2c83a8
 struct abrt_post_state;
2c83a8
 
2c83a8
+/* Decomposes uri to its base elements, removes userinfo out of the hostname and
2c83a8
+ * composes a new uri without userinfo.
2c83a8
+ *
2c83a8
+ * The function does not validate the url.
2c83a8
+ *
2c83a8
+ * @param uri The uri that might contain userinfo
2c83a8
+ * @param result The userinfo free uri will be store here. Cannot be null. Must
2c83a8
+ * be de-allocated by free.
2c83a8
+ * @param scheme Scheme of the uri. Can be NULL. Result can be NULL. Result
2c83a8
+ * must be de-allocated by free.
2c83a8
+ * @param hostname Hostname of the uri. Can be NULL. Result can be NULL. Result
2c83a8
+ * must be de-allocated by free.
2c83a8
+ * @param username Username of the uri. Can be NULL. Result can be NULL. Result
2c83a8
+ * must be de-allocated by free.
2c83a8
+ * @param password Password of the uri. Can be NULL. Result can be NULL. Result
2c83a8
+ * must be de-allocated by free.
2c83a8
+ * @param location Location of the uri. Can be NULL. Result is never NULL. Result
2c83a8
+ * must be de-allocated by free.
2c83a8
+ */
2c83a8
+#define uri_userinfo_remove libreport_uri_userinfo_remove
2c83a8
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location);
2c83a8
+
2c83a8
 #ifdef __cplusplus
2c83a8
 }
2c83a8
 #endif
2c83a8
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
2c83a8
index 50142f7..b7e4781 100644
2c83a8
--- a/src/lib/Makefile.am
2c83a8
+++ b/src/lib/Makefile.am
2c83a8
@@ -56,7 +56,8 @@ libreport_la_SOURCES = \
2c83a8
     config_item_info.c \
2c83a8
     xml_parser.c \
2c83a8
     libreport_init.c \
2c83a8
-    global_configuration.c
2c83a8
+    global_configuration.c \
2c83a8
+    uriparser.c
2c83a8
 
2c83a8
 libreport_la_CPPFLAGS = \
2c83a8
     -I$(srcdir)/../include \
2c83a8
diff --git a/src/lib/uriparser.c b/src/lib/uriparser.c
2c83a8
new file mode 100644
2c83a8
index 0000000..01e9782
2c83a8
--- /dev/null
2c83a8
+++ b/src/lib/uriparser.c
2c83a8
@@ -0,0 +1,166 @@
2c83a8
+/*
2c83a8
+    Copyright (C) 2015  ABRT team
2c83a8
+    Copyright (C) 2015  RedHat Inc
2c83a8
+
2c83a8
+    This program is free software; you can redistribute it and/or modify
2c83a8
+    it under the terms of the GNU General Public License as published by
2c83a8
+    the Free Software Foundation; either version 2 of the License, or
2c83a8
+    (at your option) any later version.
2c83a8
+
2c83a8
+    This program is distributed in the hope that it will be useful,
2c83a8
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
2c83a8
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2c83a8
+    GNU General Public License for more details.
2c83a8
+
2c83a8
+    You should have received a copy of the GNU General Public License along
2c83a8
+    with this program; if not, write to the Free Software Foundation, Inc.,
2c83a8
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
2c83a8
+*/
2c83a8
+
2c83a8
+#include "internal_libreport.h"
2c83a8
+
2c83a8
+#include <regex.h>
2c83a8
+
2c83a8
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location)
2c83a8
+{
2c83a8
+    /* https://www.ietf.org/rfc/rfc3986.txt
2c83a8
+     * Appendix B.  Parsing a URI Reference with a Regular Expression
2c83a8
+     *
2c83a8
+     * scheme    = $2
2c83a8
+     * authority = $4
2c83a8
+     * location  = $5 <- introduced by jfilak
2c83a8
+     * path      = $6
2c83a8
+     * query     = $8
2c83a8
+     * fragment  = $10
2c83a8
+     *                         12            3  4          56       7   8        9 10 */
2c83a8
+    const char *rfc3986_rx = "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)$";
2c83a8
+    regex_t re;
2c83a8
+    int r = regcomp(&re, rfc3986_rx, REG_EXTENDED);
2c83a8
+    assert(r == 0 || !"BUG: invalid regular expression");
2c83a8
+
2c83a8
+    regmatch_t matchptr[10];
2c83a8
+    r = regexec(&re, uri, ARRAY_SIZE(matchptr), matchptr, 0);
2c83a8
+    if (r != 0)
2c83a8
+    {
2c83a8
+        log_debug("URI does not match RFC3986 regular expression.");
2c83a8
+        return -EINVAL;
2c83a8
+    }
2c83a8
+
2c83a8
+    char *ptr = xzalloc((strlen(uri) + 1) * sizeof(char));
2c83a8
+    *result = ptr;
2c83a8
+    if (scheme != NULL)
2c83a8
+        *scheme = NULL;
2c83a8
+    if (hostname != NULL)
2c83a8
+        *hostname = NULL;
2c83a8
+    if (username != NULL)
2c83a8
+        *username = NULL;
2c83a8
+    if (password != NULL)
2c83a8
+        *password = NULL;
2c83a8
+    if (location != NULL)
2c83a8
+        *location= NULL;
2c83a8
+
2c83a8
+    /* https://www.ietf.org/rfc/rfc3986.txt
2c83a8
+     * 5.3.  Component Recomposition
2c83a8
+     *
2c83a8
+      result = ""
2c83a8
+
2c83a8
+      if defined(scheme) then
2c83a8
+         append scheme to result;
2c83a8
+         append ":" to result;
2c83a8
+      endif;
2c83a8
+
2c83a8
+      if defined(authority) then
2c83a8
+         append "//" to result;
2c83a8
+         append authority to result;
2c83a8
+      endif;
2c83a8
+
2c83a8
+      append path to result;
2c83a8
+
2c83a8
+      if defined(query) then
2c83a8
+         append "?" to result;
2c83a8
+         append query to result;
2c83a8
+      endif;
2c83a8
+
2c83a8
+      if defined(fragment) then
2c83a8
+         append "#" to result;
2c83a8
+         append fragment to result;
2c83a8
+      endif;
2c83a8
+
2c83a8
+      return result;
2c83a8
+    */
2c83a8
+
2c83a8
+#define APPEND_MATCH(i, output) \
2c83a8
+    if (matchptr[(i)].rm_so != -1) \
2c83a8
+    { \
2c83a8
+        size_t len = 0; \
2c83a8
+        len = matchptr[(i)].rm_eo - matchptr[(i)].rm_so; \
2c83a8
+        if (output) *output = xstrndup(uri + matchptr[(i)].rm_so, len); \
2c83a8
+        strncpy(ptr, uri + matchptr[(i)].rm_so, len); \
2c83a8
+        ptr += len; \
2c83a8
+    }
2c83a8
+
2c83a8
+    /* Append "scheme:" if defined */
2c83a8
+    APPEND_MATCH(1, scheme);
2c83a8
+
2c83a8
+    /* If authority is defined, append "//" */
2c83a8
+    regmatch_t *match_authority = matchptr + 3;
2c83a8
+    if (match_authority->rm_so != -1)
2c83a8
+    {
2c83a8
+        strcat(ptr, "//");
2c83a8
+        ptr += 2;
2c83a8
+    }
2c83a8
+
2c83a8
+    ++match_authority;
2c83a8
+    /* If authority has address part, remove userinfo and add the address */
2c83a8
+    if (match_authority->rm_so != -1)
2c83a8
+    {
2c83a8
+        size_t len = match_authority->rm_eo - match_authority->rm_so;
2c83a8
+        const char *authority = uri + match_authority->rm_so;
2c83a8
+
2c83a8
+        /* Find the last '@'. Just for the case some used @ in username or
2c83a8
+         * password */
2c83a8
+        size_t at = len;
2c83a8
+        while (at != 0)
2c83a8
+        {
2c83a8
+            if (authority[--at] != '@')
2c83a8
+                continue;
2c83a8
+
2c83a8
+            /* Find the first ':' before @. There should not be more ':' but this
2c83a8
+             * is the most secure way -> avoid leaking an excerpt of a password
2c83a8
+             * containing ':'.*/
2c83a8
+            size_t colon = 0;
2c83a8
+            while (colon < at)
2c83a8
+            {
2c83a8
+                if (authority[colon] != ':')
2c83a8
+                {
2c83a8
+                    ++colon;
2c83a8
+                    continue;
2c83a8
+                }
2c83a8
+
2c83a8
+                if (password != NULL)
2c83a8
+                    *password = xstrndup(authority + colon + 1, at - colon - 1);
2c83a8
+
2c83a8
+                break;
2c83a8
+            }
2c83a8
+
2c83a8
+            if (username != NULL)
2c83a8
+                *username = xstrndup(authority, colon);
2c83a8
+
2c83a8
+            ++at;
2c83a8
+            break;
2c83a8
+        }
2c83a8
+
2c83a8
+        len -= at;
2c83a8
+
2c83a8
+        if (hostname != NULL)
2c83a8
+            *hostname = xstrndup(authority + at, len);
2c83a8
+
2c83a8
+        strncpy(ptr, authority + at, len);
2c83a8
+        ptr += len;
2c83a8
+    }
2c83a8
+
2c83a8
+    /* Append path, query and fragment or "" */
2c83a8
+    APPEND_MATCH(5, location);
2c83a8
+
2c83a8
+    return 0;
2c83a8
+}
2c83a8
diff --git a/tests/Makefile.am b/tests/Makefile.am
2c83a8
index f36ab57..c22958b 100644
2c83a8
--- a/tests/Makefile.am
2c83a8
+++ b/tests/Makefile.am
2c83a8
@@ -45,7 +45,8 @@ TESTSUITE_AT = \
2c83a8
   ureport.at \
2c83a8
   dump_dir.at \
2c83a8
   global_config.at \
2c83a8
-  iso_date.at
2c83a8
+  iso_date.at \
2c83a8
+  uriparser.at
2c83a8
 
2c83a8
 EXTRA_DIST += $(TESTSUITE_AT)
2c83a8
 TESTSUITE = $(srcdir)/testsuite
2c83a8
diff --git a/tests/testsuite.at b/tests/testsuite.at
2c83a8
index e5e2f72..72e0715 100644
2c83a8
--- a/tests/testsuite.at
2c83a8
+++ b/tests/testsuite.at
2c83a8
@@ -20,3 +20,4 @@ m4_include([ureport.at])
2c83a8
 m4_include([dump_dir.at])
2c83a8
 m4_include([global_config.at])
2c83a8
 m4_include([iso_date.at])
2c83a8
+m4_include([uriparser.at])
2c83a8
diff --git a/tests/uriparser.at b/tests/uriparser.at
2c83a8
new file mode 100644
2c83a8
index 0000000..def021f
2c83a8
--- /dev/null
2c83a8
+++ b/tests/uriparser.at
2c83a8
@@ -0,0 +1,144 @@
2c83a8
+# -*- Autotest -*-
2c83a8
+
2c83a8
+AT_BANNER([uriparser])
2c83a8
+
2c83a8
+## ------------------- ##
2c83a8
+## uri_userinfo_remove ##
2c83a8
+## ------------------- ##
2c83a8
+
2c83a8
+AT_TESTFUN([uri_userinfo_remove],
2c83a8
+[[#include "internal_libreport.h"
2c83a8
+#include <assert.h>
2c83a8
+#include <string.h>
2c83a8
+#include <stdio.h>
2c83a8
+
2c83a8
+bool string_cmp(const char *message, const char *orig, const char *other)
2c83a8
+{
2c83a8
+    if (orig == NULL && other != NULL)
2c83a8
+    {
2c83a8
+        printf("%s: expected NULL got '%s'\n", message, other);
2c83a8
+        return false;
2c83a8
+    }
2c83a8
+
2c83a8
+    if (orig != NULL && other == NULL)
2c83a8
+    {
2c83a8
+        printf("%s: expected '%s' got NULL\n", message, orig);
2c83a8
+        return false;
2c83a8
+    }
2c83a8
+
2c83a8
+    if (orig == NULL && other == NULL)
2c83a8
+        return true;
2c83a8
+
2c83a8
+    if (strcmp(orig, other) == 0)
2c83a8
+        return true;
2c83a8
+
2c83a8
+    printf("%s: '%s' != '%s'\n", message, orig, other);
2c83a8
+    return false;
2c83a8
+}
2c83a8
+
2c83a8
+int test(int retval, const char *uri, const char *result, const char *scheme, const char *hostname, const char *username, const char *password, const char *location)
2c83a8
+{
2c83a8
+    int e = 0;
2c83a8
+    const char *names[] = {"result", "scheme", "hostname", "username", "password", "location"} ;
2c83a8
+    char *outputs[6];
2c83a8
+    const char *expected[6];
2c83a8
+
2c83a8
+    for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
2c83a8
+        outputs[i] = (char *)0xDEADBEEF;
2c83a8
+
2c83a8
+    expected[0] = result;
2c83a8
+    expected[1] = scheme;
2c83a8
+    expected[2] = hostname;
2c83a8
+    expected[3] = username;
2c83a8
+    expected[4] = password;
2c83a8
+    expected[5] = location;
2c83a8
+
2c83a8
+    fprintf(stderr, "==== Testing: '%s'\n", uri);
2c83a8
+    fprintf(stdout, "==== Testing: '%s'\n", uri);
2c83a8
+
2c83a8
+    int r = uri_userinfo_remove(uri, &outputs[0], &outputs[1], &outputs[2], &outputs[3], &outputs[4], &outputs[5]);
2c83a8
+    if (r != retval)
2c83a8
+    {
2c83a8
+        printf("Invalid retval %d != %d\n", retval, r);
2c83a8
+        ++e;
2c83a8
+    }
2c83a8
+
2c83a8
+    if (r != -EINVAL)
2c83a8
+    {
2c83a8
+        for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
2c83a8
+        {
2c83a8
+            if (outputs[i] == (char *)0xDEADBEEF)
2c83a8
+            {
2c83a8
+                printf("Not initialized argument '%s'\n", names[i]);
2c83a8
+                ++e;
2c83a8
+            }
2c83a8
+            else
2c83a8
+            {
2c83a8
+                e += !string_cmp(names[i], expected[i], outputs[i]);
2c83a8
+                free(outputs[i]);
2c83a8
+                outputs[i] = (char *)0xDEADBEEF;
2c83a8
+            }
2c83a8
+        }
2c83a8
+    }
2c83a8
+    else
2c83a8
+    {
2c83a8
+        for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
2c83a8
+        {
2c83a8
+            if (outputs[i] != (char *)0xDEADBEEF)
2c83a8
+            {
2c83a8
+                printf("Touched argument '%s'\n", names[i]);
2c83a8
+                ++e;
2c83a8
+            }
2c83a8
+        }
2c83a8
+    }
2c83a8
+
2c83a8
+    fprintf(stderr, "== Test without arguments\n");
2c83a8
+    fprintf(stdout, "== Test without arguments\n");
2c83a8
+
2c83a8
+
2c83a8
+    r = uri_userinfo_remove(uri, &outputs[0], NULL, NULL, NULL, NULL, NULL);
2c83a8
+    if (r != retval)
2c83a8
+    {
2c83a8
+        printf("Invalid retval without arguments: %d != %d\n", retval, r);
2c83a8
+        ++e;
2c83a8
+    }
2c83a8
+
2c83a8
+    e += !string_cmp(names[0], result, outputs[0]);
2c83a8
+    free(outputs[0]);
2c83a8
+
2c83a8
+    return e;
2c83a8
+}
2c83a8
+
2c83a8
+int main(void)
2c83a8
+{
2c83a8
+    g_verbose=3;
2c83a8
+
2c83a8
+    int e = 0;
2c83a8
+    e += test(      0, "ftp://root:password@", "ftp://", "ftp:", "", "root", "password", "");
2c83a8
+    e += test(      0, "ftp://root:password@/", "ftp:///", "ftp:", "", "root", "password", "/");
2c83a8
+    e += test(      0, "ftp://root:password@/foo", "ftp:///foo", "ftp:", "", "root", "password", "/foo");
2c83a8
+    e += test(      0, "ftp://@", "ftp://", "ftp:", "", "", NULL, "");
2c83a8
+    e += test(      0, "ftp://@/", "ftp:///", "ftp:", "", "", NULL, "/");
2c83a8
+    e += test(      0, "ftp://@/foo", "ftp:///foo", "ftp:", "", "", NULL, "/foo");
2c83a8
+    e += test(      0, "ftp://:@", "ftp://", "ftp:", "", "", "", "");
2c83a8
+    e += test(      0, "ftp://:@/", "ftp:///", "ftp:", "", "", "", "/");
2c83a8
+    e += test(      0, "ftp://:@/foo", "ftp:///foo", "ftp:", "", "", "", "/foo");
2c83a8
+    e += test(      0, "root:password", "root:password", "root:", NULL, NULL, NULL, "password");
2c83a8
+    e += test(      0, "root:password@", "root:password@", "root:", NULL, NULL, NULL, "password@");
2c83a8
+    e += test(      0, "ftp://root:password", "ftp://root:password", "ftp:", "root:password", NULL, NULL, "");
2c83a8
+    e += test(      0, "scp:://root:password@localhost", "scp:://root:password@localhost", "scp:", NULL, NULL, NULL, "://root:password@localhost");
2c83a8
+    e += test(      0, "scp:///root:password@localhost", "scp:///root:password@localhost", "scp:", "", NULL, NULL, "/root:password@localhost");
2c83a8
+    e += test(      0, "ftp://root:password/", "ftp://root:password/", "ftp:", "root:password", NULL, NULL, "/");
2c83a8
+    e += test(      0, "scp://B@rt:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "P@ssw0rd", "/t@rget1?query=foo#head");
2c83a8
+    e += test(      0, "scp://B@rt@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", NULL, "/t@rget1?query=foo#head");
2c83a8
+    e += test(      0, "scp://B@rt:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "", "/t@rget1?query=foo#head");
2c83a8
+    e += test(      0, "scp://:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "P@ssw0rd", "/t@rget1?query=foo#head");
2c83a8
+    e += test(      0, "scp://@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", NULL, "/t@rget1?query=foo#head");
2c83a8
+    e += test(      0, "scp://:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "", "/t@rget1?query=foo#head");
2c83a8
+    e += test(      0, "password/root", "password/root", NULL, NULL, NULL, NULL, "password/root");
2c83a8
+    e += test(      0, "/password/root", "/password/root", NULL, NULL, NULL, NULL, "/password/root");
2c83a8
+    e += test(      0, "://root:passowrd@localhost", "://root:passowrd@localhost", NULL, NULL, NULL, NULL, "://root:passowrd@localhost");
2c83a8
+
2c83a8
+    return e;
2c83a8
+}
2c83a8
+]])
2c83a8
-- 
2c83a8
1.8.3.1
2c83a8