From 3e5fb3b7d678786dfd98b412e37f4757c7584aba Mon Sep 17 00:00:00 2001 From: Jakub Filak Date: Wed, 21 Oct 2015 14:20:04 +0200 Subject: [PATCH] lib: add function for removing userinfo from URIs The function expects a valid URL. Signed-off-by: Jakub Filak Conflicts: src/lib/Makefile.am --- src/include/internal_libreport.h | 22 ++++++ src/lib/Makefile.am | 3 +- src/lib/uriparser.c | 166 +++++++++++++++++++++++++++++++++++++++ tests/Makefile.am | 3 +- tests/testsuite.at | 1 + tests/uriparser.at | 144 +++++++++++++++++++++++++++++++++ 6 files changed, 337 insertions(+), 2 deletions(-) create mode 100644 src/lib/uriparser.c create mode 100644 tests/uriparser.at diff --git a/src/include/internal_libreport.h b/src/include/internal_libreport.h index 78a17ae..651e339 100644 --- a/src/include/internal_libreport.h +++ b/src/include/internal_libreport.h @@ -1043,6 +1043,28 @@ void show_usage_and_die(const char *usage, const struct options *opt) NORETURN; */ struct abrt_post_state; +/* Decomposes uri to its base elements, removes userinfo out of the hostname and + * composes a new uri without userinfo. + * + * The function does not validate the url. + * + * @param uri The uri that might contain userinfo + * @param result The userinfo free uri will be store here. Cannot be null. Must + * be de-allocated by free. + * @param scheme Scheme of the uri. Can be NULL. Result can be NULL. Result + * must be de-allocated by free. + * @param hostname Hostname of the uri. Can be NULL. Result can be NULL. Result + * must be de-allocated by free. + * @param username Username of the uri. Can be NULL. Result can be NULL. Result + * must be de-allocated by free. + * @param password Password of the uri. Can be NULL. Result can be NULL. Result + * must be de-allocated by free. + * @param location Location of the uri. Can be NULL. Result is never NULL. Result + * must be de-allocated by free. + */ +#define uri_userinfo_remove libreport_uri_userinfo_remove +int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location); + #ifdef __cplusplus } #endif diff --git a/src/lib/Makefile.am b/src/lib/Makefile.am index 50142f7..b7e4781 100644 --- a/src/lib/Makefile.am +++ b/src/lib/Makefile.am @@ -56,7 +56,8 @@ libreport_la_SOURCES = \ config_item_info.c \ xml_parser.c \ libreport_init.c \ - global_configuration.c + global_configuration.c \ + uriparser.c libreport_la_CPPFLAGS = \ -I$(srcdir)/../include \ diff --git a/src/lib/uriparser.c b/src/lib/uriparser.c new file mode 100644 index 0000000..01e9782 --- /dev/null +++ b/src/lib/uriparser.c @@ -0,0 +1,166 @@ +/* + Copyright (C) 2015 ABRT team + Copyright (C) 2015 RedHat Inc + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +*/ + +#include "internal_libreport.h" + +#include + +int uri_userinfo_remove(const char *uri, char **result, char **scheme, char **hostname, char **username, char **password, char **location) +{ + /* https://www.ietf.org/rfc/rfc3986.txt + * Appendix B. Parsing a URI Reference with a Regular Expression + * + * scheme = $2 + * authority = $4 + * location = $5 <- introduced by jfilak + * path = $6 + * query = $8 + * fragment = $10 + * 12 3 4 56 7 8 9 10 */ + const char *rfc3986_rx = "^(([^:/?#]+):)?(//([^/?#]*))?(([^?#]*)(\\?([^#]*))?(#(.*))?)$"; + regex_t re; + int r = regcomp(&re, rfc3986_rx, REG_EXTENDED); + assert(r == 0 || !"BUG: invalid regular expression"); + + regmatch_t matchptr[10]; + r = regexec(&re, uri, ARRAY_SIZE(matchptr), matchptr, 0); + if (r != 0) + { + log_debug("URI does not match RFC3986 regular expression."); + return -EINVAL; + } + + char *ptr = xzalloc((strlen(uri) + 1) * sizeof(char)); + *result = ptr; + if (scheme != NULL) + *scheme = NULL; + if (hostname != NULL) + *hostname = NULL; + if (username != NULL) + *username = NULL; + if (password != NULL) + *password = NULL; + if (location != NULL) + *location= NULL; + + /* https://www.ietf.org/rfc/rfc3986.txt + * 5.3. Component Recomposition + * + result = "" + + if defined(scheme) then + append scheme to result; + append ":" to result; + endif; + + if defined(authority) then + append "//" to result; + append authority to result; + endif; + + append path to result; + + if defined(query) then + append "?" to result; + append query to result; + endif; + + if defined(fragment) then + append "#" to result; + append fragment to result; + endif; + + return result; + */ + +#define APPEND_MATCH(i, output) \ + if (matchptr[(i)].rm_so != -1) \ + { \ + size_t len = 0; \ + len = matchptr[(i)].rm_eo - matchptr[(i)].rm_so; \ + if (output) *output = xstrndup(uri + matchptr[(i)].rm_so, len); \ + strncpy(ptr, uri + matchptr[(i)].rm_so, len); \ + ptr += len; \ + } + + /* Append "scheme:" if defined */ + APPEND_MATCH(1, scheme); + + /* If authority is defined, append "//" */ + regmatch_t *match_authority = matchptr + 3; + if (match_authority->rm_so != -1) + { + strcat(ptr, "//"); + ptr += 2; + } + + ++match_authority; + /* If authority has address part, remove userinfo and add the address */ + if (match_authority->rm_so != -1) + { + size_t len = match_authority->rm_eo - match_authority->rm_so; + const char *authority = uri + match_authority->rm_so; + + /* Find the last '@'. Just for the case some used @ in username or + * password */ + size_t at = len; + while (at != 0) + { + if (authority[--at] != '@') + continue; + + /* Find the first ':' before @. There should not be more ':' but this + * is the most secure way -> avoid leaking an excerpt of a password + * containing ':'.*/ + size_t colon = 0; + while (colon < at) + { + if (authority[colon] != ':') + { + ++colon; + continue; + } + + if (password != NULL) + *password = xstrndup(authority + colon + 1, at - colon - 1); + + break; + } + + if (username != NULL) + *username = xstrndup(authority, colon); + + ++at; + break; + } + + len -= at; + + if (hostname != NULL) + *hostname = xstrndup(authority + at, len); + + strncpy(ptr, authority + at, len); + ptr += len; + } + + /* Append path, query and fragment or "" */ + APPEND_MATCH(5, location); + + return 0; +} diff --git a/tests/Makefile.am b/tests/Makefile.am index f36ab57..c22958b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -45,7 +45,8 @@ TESTSUITE_AT = \ ureport.at \ dump_dir.at \ global_config.at \ - iso_date.at + iso_date.at \ + uriparser.at EXTRA_DIST += $(TESTSUITE_AT) TESTSUITE = $(srcdir)/testsuite diff --git a/tests/testsuite.at b/tests/testsuite.at index e5e2f72..72e0715 100644 --- a/tests/testsuite.at +++ b/tests/testsuite.at @@ -20,3 +20,4 @@ m4_include([ureport.at]) m4_include([dump_dir.at]) m4_include([global_config.at]) m4_include([iso_date.at]) +m4_include([uriparser.at]) diff --git a/tests/uriparser.at b/tests/uriparser.at new file mode 100644 index 0000000..def021f --- /dev/null +++ b/tests/uriparser.at @@ -0,0 +1,144 @@ +# -*- Autotest -*- + +AT_BANNER([uriparser]) + +## ------------------- ## +## uri_userinfo_remove ## +## ------------------- ## + +AT_TESTFUN([uri_userinfo_remove], +[[#include "internal_libreport.h" +#include +#include +#include + +bool string_cmp(const char *message, const char *orig, const char *other) +{ + if (orig == NULL && other != NULL) + { + printf("%s: expected NULL got '%s'\n", message, other); + return false; + } + + if (orig != NULL && other == NULL) + { + printf("%s: expected '%s' got NULL\n", message, orig); + return false; + } + + if (orig == NULL && other == NULL) + return true; + + if (strcmp(orig, other) == 0) + return true; + + printf("%s: '%s' != '%s'\n", message, orig, other); + return false; +} + +int test(int retval, const char *uri, const char *result, const char *scheme, const char *hostname, const char *username, const char *password, const char *location) +{ + int e = 0; + const char *names[] = {"result", "scheme", "hostname", "username", "password", "location"} ; + char *outputs[6]; + const char *expected[6]; + + for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i) + outputs[i] = (char *)0xDEADBEEF; + + expected[0] = result; + expected[1] = scheme; + expected[2] = hostname; + expected[3] = username; + expected[4] = password; + expected[5] = location; + + fprintf(stderr, "==== Testing: '%s'\n", uri); + fprintf(stdout, "==== Testing: '%s'\n", uri); + + int r = uri_userinfo_remove(uri, &outputs[0], &outputs[1], &outputs[2], &outputs[3], &outputs[4], &outputs[5]); + if (r != retval) + { + printf("Invalid retval %d != %d\n", retval, r); + ++e; + } + + if (r != -EINVAL) + { + for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i) + { + if (outputs[i] == (char *)0xDEADBEEF) + { + printf("Not initialized argument '%s'\n", names[i]); + ++e; + } + else + { + e += !string_cmp(names[i], expected[i], outputs[i]); + free(outputs[i]); + outputs[i] = (char *)0xDEADBEEF; + } + } + } + else + { + for (size_t i = 0; i < ARRAY_SIZE(outputs); ++i) + { + if (outputs[i] != (char *)0xDEADBEEF) + { + printf("Touched argument '%s'\n", names[i]); + ++e; + } + } + } + + fprintf(stderr, "== Test without arguments\n"); + fprintf(stdout, "== Test without arguments\n"); + + + r = uri_userinfo_remove(uri, &outputs[0], NULL, NULL, NULL, NULL, NULL); + if (r != retval) + { + printf("Invalid retval without arguments: %d != %d\n", retval, r); + ++e; + } + + e += !string_cmp(names[0], result, outputs[0]); + free(outputs[0]); + + return e; +} + +int main(void) +{ + g_verbose=3; + + int e = 0; + e += test( 0, "ftp://root:password@", "ftp://", "ftp:", "", "root", "password", ""); + e += test( 0, "ftp://root:password@/", "ftp:///", "ftp:", "", "root", "password", "/"); + e += test( 0, "ftp://root:password@/foo", "ftp:///foo", "ftp:", "", "root", "password", "/foo"); + e += test( 0, "ftp://@", "ftp://", "ftp:", "", "", NULL, ""); + e += test( 0, "ftp://@/", "ftp:///", "ftp:", "", "", NULL, "/"); + e += test( 0, "ftp://@/foo", "ftp:///foo", "ftp:", "", "", NULL, "/foo"); + e += test( 0, "ftp://:@", "ftp://", "ftp:", "", "", "", ""); + e += test( 0, "ftp://:@/", "ftp:///", "ftp:", "", "", "", "/"); + e += test( 0, "ftp://:@/foo", "ftp:///foo", "ftp:", "", "", "", "/foo"); + e += test( 0, "root:password", "root:password", "root:", NULL, NULL, NULL, "password"); + e += test( 0, "root:password@", "root:password@", "root:", NULL, NULL, NULL, "password@"); + e += test( 0, "ftp://root:password", "ftp://root:password", "ftp:", "root:password", NULL, NULL, ""); + e += test( 0, "scp:://root:password@localhost", "scp:://root:password@localhost", "scp:", NULL, NULL, NULL, "://root:password@localhost"); + e += test( 0, "scp:///root:password@localhost", "scp:///root:password@localhost", "scp:", "", NULL, NULL, "/root:password@localhost"); + e += test( 0, "ftp://root:password/", "ftp://root:password/", "ftp:", "root:password", NULL, NULL, "/"); + e += test( 0, "scp://B@rt:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "P@ssw0rd", "/t@rget1?query=foo#head"); + e += test( 0, "scp://B@rt@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", NULL, "/t@rget1?query=foo#head"); + e += test( 0, "scp://B@rt:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "B@rt", "", "/t@rget1?query=foo#head"); + e += test( 0, "scp://:P@ssw0rd@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "P@ssw0rd", "/t@rget1?query=foo#head"); + e += test( 0, "scp://@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", NULL, "/t@rget1?query=foo#head"); + e += test( 0, "scp://:@localhost/t@rget1?query=foo#head", "scp://localhost/t@rget1?query=foo#head", "scp:", "localhost", "", "", "/t@rget1?query=foo#head"); + e += test( 0, "password/root", "password/root", NULL, NULL, NULL, NULL, "password/root"); + e += test( 0, "/password/root", "/password/root", NULL, NULL, NULL, NULL, "/password/root"); + e += test( 0, "://root:passowrd@localhost", "://root:passowrd@localhost", NULL, NULL, NULL, NULL, "://root:passowrd@localhost"); + + return e; +} +]]) -- 1.8.3.1