Blob Blame History Raw
From 3e5fb3b7d678786dfd98b412e37f4757c7584aba Mon Sep 17 00:00:00 2001
From: Jakub Filak <jfilak@redhat.com>
Date: Wed, 21 Oct 2015 14:20:04 +0200
Subject: [PATCH] lib: add function for removing userinfo from URIs

The function expects a valid URL.

Signed-off-by: Jakub Filak <jfilak@redhat.com>

Conflicts:
	src/lib/Makefile.am
---
 src/include/internal_libreport.h |  22 ++++++
 src/lib/Makefile.am              |   3 +-
 src/lib/uriparser.c              | 166 +++++++++++++++++++++++++++++++++++++++
 tests/Makefile.am                |   3 +-
 tests/testsuite.at               |   1 +
 tests/uriparser.at               | 144 +++++++++++++++++++++++++++++++++
 6 files changed, 337 insertions(+), 2 deletions(-)
 create mode 100644 src/lib/uriparser.c
 create mode 100644 tests/uriparser.at

diff --git a/src/include/internal_libreport.h b/src/include/internal_libreport.h
index 78a17ae..651e339 100644
--- a/src/include/internal_libreport.h
+++ b/src/include/internal_libreport.h
@@ -1043,6 +1043,28 @@ void show_usage_and_die(const char *usage, const struct options *opt) NORETURN;
  */
 struct abrt_post_state;
 
+/* Decomposes uri to its base elements, removes userinfo out of the hostname and
+ * composes a new uri without userinfo.
+ *
+ * The function does not validate the url.
+ *
+ * @param uri The uri that might contain userinfo
+ * @param result The userinfo free uri will be store here. Cannot be null. Must
+ * be de-allocated by free.
+ * @param scheme Scheme of the uri. Can be NULL. Result can be NULL. Result
+ * must be de-allocated by free.
+ * @param hostname Hostname of the uri. Can be NULL. Result can be NULL. Result
+ * must be de-allocated by free.
+ * @param username Username of the uri. Can be NULL. Result can be NULL. Result
+ * must be de-allocated by free.
+ * @param password Password of the uri. Can be NULL. Result can be NULL. Result
+ * must be de-allocated by free.
+ * @param location Location of the uri. Can be NULL. Result is never NULL. Result
+ * must be de-allocated by free.
+ */
+#define uri_userinfo_remove libreport_uri_userinfo_remove
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location);
+
 #ifdef __cplusplus
 }
 #endif
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index 50142f7..b7e4781 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -56,7 +56,8 @@ libreport_la_SOURCES = \
     config_item_info.c \
     xml_parser.c \
     libreport_init.c \
-    global_configuration.c
+    global_configuration.c \
+    uriparser.c
 
 libreport_la_CPPFLAGS = \
     -I$(srcdir)/../include \
diff --git a/src/lib/uriparser.c b/src/lib/uriparser.c
new file mode 100644
index 0000000..01e9782
--- /dev/null
+++ b/src/lib/uriparser.c
@@ -0,0 +1,166 @@
+/*
+    Copyright (C) 2015  ABRT team
+    Copyright (C) 2015  RedHat Inc
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License along
+    with this program; if not, write to the Free Software Foundation, Inc.,
+    51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#include "internal_libreport.h"
+
+#include <regex.h>
+
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location)
+{
+    /* https://www.ietf.org/rfc/rfc3986.txt
+     * Appendix B.  Parsing a URI Reference with a Regular Expression
+     *
+     * scheme    = $2
+     * authority = $4
+     * location  = $5 <- introduced by jfilak
+     * path      = $6
+     * query     = $8
+     * fragment  = $10
+     *                         12            3  4          56       7   8        9 10 */
+    const char *rfc3986_rx = "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)$";
+    regex_t re;
+    int r = regcomp(&re, rfc3986_rx, REG_EXTENDED);
+    assert(r == 0 || !"BUG: invalid regular expression");
+
+    regmatch_t matchptr[10];
+    r = regexec(&re, uri, ARRAY_SIZE(matchptr), matchptr, 0);
+    if (r != 0)
+    {
+        log_debug("URI does not match RFC3986 regular expression.");
+        return -EINVAL;
+    }
+
+    char *ptr = xzalloc((strlen(uri) + 1) * sizeof(char));
+    *result = ptr;
+    if (scheme != NULL)
+        *scheme = NULL;
+    if (hostname != NULL)
+        *hostname = NULL;
+    if (username != NULL)
+        *username = NULL;
+    if (password != NULL)
+        *password = NULL;
+    if (location != NULL)
+        *location= NULL;
+
+    /* https://www.ietf.org/rfc/rfc3986.txt
+     * 5.3.  Component Recomposition
+     *
+      result = ""
+
+      if defined(scheme) then
+         append scheme to result;
+         append ":" to result;
+      endif;
+
+      if defined(authority) then
+         append "//" to result;
+         append authority to result;
+      endif;
+
+      append path to result;
+
+      if defined(query) then
+         append "?" to result;
+         append query to result;
+      endif;
+
+      if defined(fragment) then
+         append "#" to result;
+         append fragment to result;
+      endif;
+
+      return result;
+    */
+
+#define APPEND_MATCH(i, output) \
+    if (matchptr[(i)].rm_so != -1) \
+    { \
+        size_t len = 0; \
+        len = matchptr[(i)].rm_eo - matchptr[(i)].rm_so; \
+        if (output) *output = xstrndup(uri + matchptr[(i)].rm_so, len); \
+        strncpy(ptr, uri + matchptr[(i)].rm_so, len); \
+        ptr += len; \
+    }
+
+    /* Append "scheme:" if defined */
+    APPEND_MATCH(1, scheme);
+
+    /* If authority is defined, append "//" */
+    regmatch_t *match_authority = matchptr + 3;
+    if (match_authority->rm_so != -1)
+    {
+        strcat(ptr, "//");
+        ptr += 2;
+    }
+
+    ++match_authority;
+    /* If authority has address part, remove userinfo and add the address */
+    if (match_authority->rm_so != -1)
+    {
+        size_t len = match_authority->rm_eo - match_authority->rm_so;
+        const char *authority = uri + match_authority->rm_so;
+
+        /* Find the last '@'. Just for the case some used @ in username or
+         * password */
+        size_t at = len;
+        while (at != 0)
+        {
+            if (authority[--at] != '@')
+                continue;
+
+            /* Find the first ':' before @. There should not be more ':' but this
+             * is the most secure way -> avoid leaking an excerpt of a password
+             * containing ':'.*/
+            size_t colon = 0;
+            while (colon < at)
+            {
+                if (authority[colon] != ':')
+                {
+                    ++colon;
+                    continue;
+                }
+
+                if (password != NULL)
+                    *password = xstrndup(authority + colon + 1, at - colon - 1);
+
+                break;
+            }
+
+            if (username != NULL)
+                *username = xstrndup(authority, colon);
+
+            ++at;
+            break;
+        }
+
+        len -= at;
+
+        if (hostname != NULL)
+            *hostname = xstrndup(authority + at, len);
+
+        strncpy(ptr, authority + at, len);
+        ptr += len;
+    }
+
+    /* Append path, query and fragment or "" */
+    APPEND_MATCH(5, location);
+
+    return 0;
+}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f36ab57..c22958b 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -45,7 +45,8 @@ TESTSUITE_AT = \
   ureport.at \
   dump_dir.at \
   global_config.at \
-  iso_date.at
+  iso_date.at \
+  uriparser.at
 
 EXTRA_DIST += $(TESTSUITE_AT)
 TESTSUITE = $(srcdir)/testsuite
diff --git a/tests/testsuite.at b/tests/testsuite.at
index e5e2f72..72e0715 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -20,3 +20,4 @@ m4_include([ureport.at])
 m4_include([dump_dir.at])
 m4_include([global_config.at])
 m4_include([iso_date.at])
+m4_include([uriparser.at])
diff --git a/tests/uriparser.at b/tests/uriparser.at
new file mode 100644
index 0000000..def021f
--- /dev/null
+++ b/tests/uriparser.at
@@ -0,0 +1,144 @@
+# -*- Autotest -*-
+
+AT_BANNER([uriparser])
+
+## ------------------- ##
+## uri_userinfo_remove ##
+## ------------------- ##
+
+AT_TESTFUN([uri_userinfo_remove],
+[[#include "internal_libreport.h"
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+
+bool string_cmp(const char *message, const char *orig, const char *other)
+{
+    if (orig == NULL && other != NULL)
+    {
+        printf("%s: expected NULL got '%s'\n", message, other);
+        return false;
+    }
+
+    if (orig != NULL && other == NULL)
+    {
+        printf("%s: expected '%s' got NULL\n", message, orig);
+        return false;
+    }
+
+    if (orig == NULL && other == NULL)
+        return true;
+
+    if (strcmp(orig, other) == 0)
+        return true;
+
+    printf("%s: '%s' != '%s'\n", message, orig, other);
+    return false;
+}
+
+int test(int retval, const char *uri, const char *result, const char *scheme, const char *hostname, const char *username, const char *password, const char *location)
+{
+    int e = 0;
+    const char *names[] = {"result", "scheme", "hostname", "username", "password", "location"} ;
+    char *outputs[6];
+    const char *expected[6];
+
+    for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
+        outputs[i] = (char *)0xDEADBEEF;
+
+    expected[0] = result;
+    expected[1] = scheme;
+    expected[2] = hostname;
+    expected[3] = username;
+    expected[4] = password;
+    expected[5] = location;
+
+    fprintf(stderr, "==== Testing: '%s'\n", uri);
+    fprintf(stdout, "==== Testing: '%s'\n", uri);
+
+    int r = uri_userinfo_remove(uri, &outputs[0], &outputs[1], &outputs[2], &outputs[3], &outputs[4], &outputs[5]);
+    if (r != retval)
+    {
+        printf("Invalid retval %d != %d\n", retval, r);
+        ++e;
+    }
+
+    if (r != -EINVAL)
+    {
+        for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
+        {
+            if (outputs[i] == (char *)0xDEADBEEF)
+            {
+                printf("Not initialized argument '%s'\n", names[i]);
+                ++e;
+            }
+            else
+            {
+                e += !string_cmp(names[i], expected[i], outputs[i]);
+                free(outputs[i]);
+                outputs[i] = (char *)0xDEADBEEF;
+            }
+        }
+    }
+    else
+    {
+        for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
+        {
+            if (outputs[i] != (char *)0xDEADBEEF)
+            {
+                printf("Touched argument '%s'\n", names[i]);
+                ++e;
+            }
+        }
+    }
+
+    fprintf(stderr, "== Test without arguments\n");
+    fprintf(stdout, "== Test without arguments\n");
+
+
+    r = uri_userinfo_remove(uri, &outputs[0], NULL, NULL, NULL, NULL, NULL);
+    if (r != retval)
+    {
+        printf("Invalid retval without arguments: %d != %d\n", retval, r);
+        ++e;
+    }
+
+    e += !string_cmp(names[0], result, outputs[0]);
+    free(outputs[0]);
+
+    return e;
+}
+
+int main(void)
+{
+    g_verbose=3;
+
+    int e = 0;
+    e += test(      0, "ftp://root:password@", "ftp://", "ftp:", "", "root", "password", "");
+    e += test(      0, "ftp://root:password@/", "ftp:///", "ftp:", "", "root", "password", "/");
+    e += test(      0, "ftp://root:password@/foo", "ftp:///foo", "ftp:", "", "root", "password", "/foo");
+    e += test(      0, "ftp://@", "ftp://", "ftp:", "", "", NULL, "");
+    e += test(      0, "ftp://@/", "ftp:///", "ftp:", "", "", NULL, "/");
+    e += test(      0, "ftp://@/foo", "ftp:///foo", "ftp:", "", "", NULL, "/foo");
+    e += test(      0, "ftp://:@", "ftp://", "ftp:", "", "", "", "");
+    e += test(      0, "ftp://:@/", "ftp:///", "ftp:", "", "", "", "/");
+    e += test(      0, "ftp://:@/foo", "ftp:///foo", "ftp:", "", "", "", "/foo");
+    e += test(      0, "root:password", "root:password", "root:", NULL, NULL, NULL, "password");
+    e += test(      0, "root:password@", "root:password@", "root:", NULL, NULL, NULL, "password@");
+    e += test(      0, "ftp://root:password", "ftp://root:password", "ftp:", "root:password", NULL, NULL, "");
+    e += test(      0, "scp:://root:password@localhost", "scp:://root:password@localhost", "scp:", NULL, NULL, NULL, "://root:password@localhost");
+    e += test(      0, "scp:///root:password@localhost", "scp:///root:password@localhost", "scp:", "", NULL, NULL, "/root:password@localhost");
+    e += test(      0, "ftp://root:password/", "ftp://root:password/", "ftp:", "root:password", NULL, NULL, "/");
+    e += test(      0, "scp://B@rt:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "P@ssw0rd", "/t@rget1?query=foo#head");
+    e += test(      0, "scp://B@rt@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", NULL, "/t@rget1?query=foo#head");
+    e += test(      0, "scp://B@rt:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "", "/t@rget1?query=foo#head");
+    e += test(      0, "scp://:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "P@ssw0rd", "/t@rget1?query=foo#head");
+    e += test(      0, "scp://@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", NULL, "/t@rget1?query=foo#head");
+    e += test(      0, "scp://:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "", "/t@rget1?query=foo#head");
+    e += test(      0, "password/root", "password/root", NULL, NULL, NULL, NULL, "password/root");
+    e += test(      0, "/password/root", "/password/root", NULL, NULL, NULL, NULL, "/password/root");
+    e += test(      0, "://root:passowrd@localhost", "://root:passowrd@localhost", NULL, NULL, NULL, NULL, "://root:passowrd@localhost");
+
+    return e;
+}
+]])
-- 
1.8.3.1