From 3e5fb3b7d678786dfd98b412e37f4757c7584aba Mon Sep 17 00:00:00 2001
From: Jakub Filak <jfilak@redhat.com>
Date: Wed, 21 Oct 2015 14:20:04 +0200
Subject: [PATCH] lib: add function for removing userinfo from URIs
The function expects a valid URL.
Signed-off-by: Jakub Filak <jfilak@redhat.com>
Conflicts:
src/lib/Makefile.am
---
src/include/internal_libreport.h | 22 ++++++
src/lib/Makefile.am | 3 +-
src/lib/uriparser.c | 166 +++++++++++++++++++++++++++++++++++++++
tests/Makefile.am | 3 +-
tests/testsuite.at | 1 +
tests/uriparser.at | 144 +++++++++++++++++++++++++++++++++
6 files changed, 337 insertions(+), 2 deletions(-)
create mode 100644 src/lib/uriparser.c
create mode 100644 tests/uriparser.at
diff --git a/src/include/internal_libreport.h b/src/include/internal_libreport.h
index 78a17ae..651e339 100644
--- a/src/include/internal_libreport.h
+++ b/src/include/internal_libreport.h
@@ -1043,6 +1043,28 @@ void show_usage_and_die(const char *usage, const struct options *opt) NORETURN;
*/
struct abrt_post_state;
+/* Decomposes uri to its base elements, removes userinfo out of the hostname and
+ * composes a new uri without userinfo.
+ *
+ * The function does not validate the url.
+ *
+ * @param uri The uri that might contain userinfo
+ * @param result The userinfo free uri will be store here. Cannot be null. Must
+ * be de-allocated by free.
+ * @param scheme Scheme of the uri. Can be NULL. Result can be NULL. Result
+ * must be de-allocated by free.
+ * @param hostname Hostname of the uri. Can be NULL. Result can be NULL. Result
+ * must be de-allocated by free.
+ * @param username Username of the uri. Can be NULL. Result can be NULL. Result
+ * must be de-allocated by free.
+ * @param password Password of the uri. Can be NULL. Result can be NULL. Result
+ * must be de-allocated by free.
+ * @param location Location of the uri. Can be NULL. Result is never NULL. Result
+ * must be de-allocated by free.
+ */
+#define uri_userinfo_remove libreport_uri_userinfo_remove
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location);
+
#ifdef __cplusplus
}
#endif
diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am
index 50142f7..b7e4781 100644
--- a/src/lib/Makefile.am
+++ b/src/lib/Makefile.am
@@ -56,7 +56,8 @@ libreport_la_SOURCES = \
config_item_info.c \
xml_parser.c \
libreport_init.c \
- global_configuration.c
+ global_configuration.c \
+ uriparser.c
libreport_la_CPPFLAGS = \
-I$(srcdir)/../include \
diff --git a/src/lib/uriparser.c b/src/lib/uriparser.c
new file mode 100644
index 0000000..01e9782
--- /dev/null
+++ b/src/lib/uriparser.c
@@ -0,0 +1,166 @@
+/*
+ Copyright (C) 2015 ABRT team
+ Copyright (C) 2015 RedHat Inc
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation, Inc.,
+ 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+*/
+
+#include "internal_libreport.h"
+
+#include <regex.h>
+
+int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location)
+{
+ /* https://www.ietf.org/rfc/rfc3986.txt
+ * Appendix B. Parsing a URI Reference with a Regular Expression
+ *
+ * scheme = $2
+ * authority = $4
+ * location = $5 <- introduced by jfilak
+ * path = $6
+ * query = $8
+ * fragment = $10
+ * 12 3 4 56 7 8 9 10 */
+ const char *rfc3986_rx = "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)$";
+ regex_t re;
+ int r = regcomp(&re, rfc3986_rx, REG_EXTENDED);
+ assert(r == 0 || !"BUG: invalid regular expression");
+
+ regmatch_t matchptr[10];
+ r = regexec(&re, uri, ARRAY_SIZE(matchptr), matchptr, 0);
+ if (r != 0)
+ {
+ log_debug("URI does not match RFC3986 regular expression.");
+ return -EINVAL;
+ }
+
+ char *ptr = xzalloc((strlen(uri) + 1) * sizeof(char));
+ *result = ptr;
+ if (scheme != NULL)
+ *scheme = NULL;
+ if (hostname != NULL)
+ *hostname = NULL;
+ if (username != NULL)
+ *username = NULL;
+ if (password != NULL)
+ *password = NULL;
+ if (location != NULL)
+ *location= NULL;
+
+ /* https://www.ietf.org/rfc/rfc3986.txt
+ * 5.3. Component Recomposition
+ *
+ result = ""
+
+ if defined(scheme) then
+ append scheme to result;
+ append ":" to result;
+ endif;
+
+ if defined(authority) then
+ append "//" to result;
+ append authority to result;
+ endif;
+
+ append path to result;
+
+ if defined(query) then
+ append "?" to result;
+ append query to result;
+ endif;
+
+ if defined(fragment) then
+ append "#" to result;
+ append fragment to result;
+ endif;
+
+ return result;
+ */
+
+#define APPEND_MATCH(i, output) \
+ if (matchptr[(i)].rm_so != -1) \
+ { \
+ size_t len = 0; \
+ len = matchptr[(i)].rm_eo - matchptr[(i)].rm_so; \
+ if (output) *output = xstrndup(uri + matchptr[(i)].rm_so, len); \
+ strncpy(ptr, uri + matchptr[(i)].rm_so, len); \
+ ptr += len; \
+ }
+
+ /* Append "scheme:" if defined */
+ APPEND_MATCH(1, scheme);
+
+ /* If authority is defined, append "//" */
+ regmatch_t *match_authority = matchptr + 3;
+ if (match_authority->rm_so != -1)
+ {
+ strcat(ptr, "//");
+ ptr += 2;
+ }
+
+ ++match_authority;
+ /* If authority has address part, remove userinfo and add the address */
+ if (match_authority->rm_so != -1)
+ {
+ size_t len = match_authority->rm_eo - match_authority->rm_so;
+ const char *authority = uri + match_authority->rm_so;
+
+ /* Find the last '@'. Just for the case some used @ in username or
+ * password */
+ size_t at = len;
+ while (at != 0)
+ {
+ if (authority[--at] != '@')
+ continue;
+
+ /* Find the first ':' before @. There should not be more ':' but this
+ * is the most secure way -> avoid leaking an excerpt of a password
+ * containing ':'.*/
+ size_t colon = 0;
+ while (colon < at)
+ {
+ if (authority[colon] != ':')
+ {
+ ++colon;
+ continue;
+ }
+
+ if (password != NULL)
+ *password = xstrndup(authority + colon + 1, at - colon - 1);
+
+ break;
+ }
+
+ if (username != NULL)
+ *username = xstrndup(authority, colon);
+
+ ++at;
+ break;
+ }
+
+ len -= at;
+
+ if (hostname != NULL)
+ *hostname = xstrndup(authority + at, len);
+
+ strncpy(ptr, authority + at, len);
+ ptr += len;
+ }
+
+ /* Append path, query and fragment or "" */
+ APPEND_MATCH(5, location);
+
+ return 0;
+}
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f36ab57..c22958b 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -45,7 +45,8 @@ TESTSUITE_AT = \
ureport.at \
dump_dir.at \
global_config.at \
- iso_date.at
+ iso_date.at \
+ uriparser.at
EXTRA_DIST += $(TESTSUITE_AT)
TESTSUITE = $(srcdir)/testsuite
diff --git a/tests/testsuite.at b/tests/testsuite.at
index e5e2f72..72e0715 100644
--- a/tests/testsuite.at
+++ b/tests/testsuite.at
@@ -20,3 +20,4 @@ m4_include([ureport.at])
m4_include([dump_dir.at])
m4_include([global_config.at])
m4_include([iso_date.at])
+m4_include([uriparser.at])
diff --git a/tests/uriparser.at b/tests/uriparser.at
new file mode 100644
index 0000000..def021f
--- /dev/null
+++ b/tests/uriparser.at
@@ -0,0 +1,144 @@
+# -*- Autotest -*-
+
+AT_BANNER([uriparser])
+
+## ------------------- ##
+## uri_userinfo_remove ##
+## ------------------- ##
+
+AT_TESTFUN([uri_userinfo_remove],
+[[#include "internal_libreport.h"
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+
+bool string_cmp(const char *message, const char *orig, const char *other)
+{
+ if (orig == NULL && other != NULL)
+ {
+ printf("%s: expected NULL got '%s'\n", message, other);
+ return false;
+ }
+
+ if (orig != NULL && other == NULL)
+ {
+ printf("%s: expected '%s' got NULL\n", message, orig);
+ return false;
+ }
+
+ if (orig == NULL && other == NULL)
+ return true;
+
+ if (strcmp(orig, other) == 0)
+ return true;
+
+ printf("%s: '%s' != '%s'\n", message, orig, other);
+ return false;
+}
+
+int test(int retval, const char *uri, const char *result, const char *scheme, const char *hostname, const char *username, const char *password, const char *location)
+{
+ int e = 0;
+ const char *names[] = {"result", "scheme", "hostname", "username", "password", "location"} ;
+ char *outputs[6];
+ const char *expected[6];
+
+ for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
+ outputs[i] = (char *)0xDEADBEEF;
+
+ expected[0] = result;
+ expected[1] = scheme;
+ expected[2] = hostname;
+ expected[3] = username;
+ expected[4] = password;
+ expected[5] = location;
+
+ fprintf(stderr, "==== Testing: '%s'\n", uri);
+ fprintf(stdout, "==== Testing: '%s'\n", uri);
+
+ int r = uri_userinfo_remove(uri, &outputs[0], &outputs[1], &outputs[2], &outputs[3], &outputs[4], &outputs[5]);
+ if (r != retval)
+ {
+ printf("Invalid retval %d != %d\n", retval, r);
+ ++e;
+ }
+
+ if (r != -EINVAL)
+ {
+ for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
+ {
+ if (outputs[i] == (char *)0xDEADBEEF)
+ {
+ printf("Not initialized argument '%s'\n", names[i]);
+ ++e;
+ }
+ else
+ {
+ e += !string_cmp(names[i], expected[i], outputs[i]);
+ free(outputs[i]);
+ outputs[i] = (char *)0xDEADBEEF;
+ }
+ }
+ }
+ else
+ {
+ for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i)
+ {
+ if (outputs[i] != (char *)0xDEADBEEF)
+ {
+ printf("Touched argument '%s'\n", names[i]);
+ ++e;
+ }
+ }
+ }
+
+ fprintf(stderr, "== Test without arguments\n");
+ fprintf(stdout, "== Test without arguments\n");
+
+
+ r = uri_userinfo_remove(uri, &outputs[0], NULL, NULL, NULL, NULL, NULL);
+ if (r != retval)
+ {
+ printf("Invalid retval without arguments: %d != %d\n", retval, r);
+ ++e;
+ }
+
+ e += !string_cmp(names[0], result, outputs[0]);
+ free(outputs[0]);
+
+ return e;
+}
+
+int main(void)
+{
+ g_verbose=3;
+
+ int e = 0;
+ e += test( 0, "ftp://root:password@", "ftp://", "ftp:", "", "root", "password", "");
+ e += test( 0, "ftp://root:password@/", "ftp:///", "ftp:", "", "root", "password", "/");
+ e += test( 0, "ftp://root:password@/foo", "ftp:///foo", "ftp:", "", "root", "password", "/foo");
+ e += test( 0, "ftp://@", "ftp://", "ftp:", "", "", NULL, "");
+ e += test( 0, "ftp://@/", "ftp:///", "ftp:", "", "", NULL, "/");
+ e += test( 0, "ftp://@/foo", "ftp:///foo", "ftp:", "", "", NULL, "/foo");
+ e += test( 0, "ftp://:@", "ftp://", "ftp:", "", "", "", "");
+ e += test( 0, "ftp://:@/", "ftp:///", "ftp:", "", "", "", "/");
+ e += test( 0, "ftp://:@/foo", "ftp:///foo", "ftp:", "", "", "", "/foo");
+ e += test( 0, "root:password", "root:password", "root:", NULL, NULL, NULL, "password");
+ e += test( 0, "root:password@", "root:password@", "root:", NULL, NULL, NULL, "password@");
+ e += test( 0, "ftp://root:password", "ftp://root:password", "ftp:", "root:password", NULL, NULL, "");
+ e += test( 0, "scp:://root:password@localhost", "scp:://root:password@localhost", "scp:", NULL, NULL, NULL, "://root:password@localhost");
+ e += test( 0, "scp:///root:password@localhost", "scp:///root:password@localhost", "scp:", "", NULL, NULL, "/root:password@localhost");
+ e += test( 0, "ftp://root:password/", "ftp://root:password/", "ftp:", "root:password", NULL, NULL, "/");
+ e += test( 0, "scp://B@rt:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "P@ssw0rd", "/t@rget1?query=foo#head");
+ e += test( 0, "scp://B@rt@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", NULL, "/t@rget1?query=foo#head");
+ e += test( 0, "scp://B@rt:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "", "/t@rget1?query=foo#head");
+ e += test( 0, "scp://:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "P@ssw0rd", "/t@rget1?query=foo#head");
+ e += test( 0, "scp://@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", NULL, "/t@rget1?query=foo#head");
+ e += test( 0, "scp://:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "", "/t@rget1?query=foo#head");
+ e += test( 0, "password/root", "password/root", NULL, NULL, NULL, NULL, "password/root");
+ e += test( 0, "/password/root", "/password/root", NULL, NULL, NULL, NULL, "/password/root");
+ e += test( 0, "://root:passowrd@localhost", "://root:passowrd@localhost", NULL, NULL, NULL, NULL, "://root:passowrd@localhost");
+
+ return e;
+}
+]])
--
1.8.3.1