From f036d10095892335b07eec02e6a1e80e18164a34 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 05 2019 20:55:25 +0000 Subject: import curl-7.61.1-11.el8 --- diff --git a/SOURCES/0008-curl-7.61.1-CVE-2018-20483.patch b/SOURCES/0008-curl-7.61.1-CVE-2018-20483.patch new file mode 100644 index 0000000..8b20ff6 --- /dev/null +++ b/SOURCES/0008-curl-7.61.1-CVE-2018-20483.patch @@ -0,0 +1,4776 @@ +From 907da069c450ca20442839d9e95e3661a5c06b61 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Sun, 5 Aug 2018 11:51:07 +0200 +Subject: [PATCH 01/14] URL-API + +See header file and man pages for API. All documented API details work +and are tested in the 1560 test case. + +Closes #2842 + +Upstream-commit: fb30ac5a2d63773c529c19259754e2b306ac2e2e +Signed-off-by: Kamil Dudka +--- + docs/libcurl/Makefile.inc | 1 + + docs/libcurl/curl_url.3 | 61 ++ + docs/libcurl/curl_url_cleanup.3 | 44 + + docs/libcurl/curl_url_dup.3 | 52 ++ + docs/libcurl/curl_url_get.3 | 110 +++ + docs/libcurl/curl_url_set.3 | 120 +++ + docs/libcurl/symbols-in-versions | 30 + + include/curl/Makefile.am | 4 +- + include/curl/curl.h | 1 + + include/curl/urlapi.h | 121 +++ + lib/Makefile.inc | 5 +- + lib/escape.c | 20 +- + lib/escape.h | 3 +- + lib/imap.c | 3 +- + lib/transfer.c | 314 +------ + lib/url.c | 44 +- + lib/url.h | 2 + + lib/{escape.h => urlapi-int.h} | 22 +- + lib/urlapi.c | 1315 ++++++++++++++++++++++++++++++ + tests/data/Makefile.inc | 2 + + tests/data/test1560 | 28 + + tests/libtest/Makefile.am | 5 + + tests/libtest/Makefile.inc | 4 + + tests/libtest/lib1560.c | 760 +++++++++++++++++ + 24 files changed, 2716 insertions(+), 355 deletions(-) + create mode 100644 docs/libcurl/curl_url.3 + create mode 100644 docs/libcurl/curl_url_cleanup.3 + create mode 100644 docs/libcurl/curl_url_dup.3 + create mode 100644 docs/libcurl/curl_url_get.3 + create mode 100644 docs/libcurl/curl_url_set.3 + create mode 100644 include/curl/urlapi.h + copy lib/{escape.h => urlapi-int.h} (66%) + create mode 100644 lib/urlapi.c + create mode 100644 tests/data/test1560 + create mode 100644 tests/libtest/lib1560.c + +diff --git a/docs/libcurl/Makefile.inc b/docs/libcurl/Makefile.inc +index eea48c4..955492c 100644 +--- a/docs/libcurl/Makefile.inc ++++ b/docs/libcurl/Makefile.inc +@@ -22,4 +22,5 @@ man_MANS = curl_easy_cleanup.3 curl_easy_getinfo.3 curl_easy_init.3 \ + curl_mime_data.3 curl_mime_data_cb.3 curl_mime_filedata.3 \ + curl_mime_filename.3 curl_mime_subparts.3 \ + curl_mime_type.3 curl_mime_headers.3 curl_mime_encoder.3 libcurl-env.3 \ ++ curl_url.3 curl_url_cleanup.3 curl_url_dup.3 curl_url_get.3 curl_url_set.3 \ + libcurl-security.3 +diff --git a/docs/libcurl/curl_url.3 b/docs/libcurl/curl_url.3 +new file mode 100644 +index 0000000..0a56264 +--- /dev/null ++++ b/docs/libcurl/curl_url.3 +@@ -0,0 +1,61 @@ ++.\" ************************************************************************** ++.\" * _ _ ____ _ ++.\" * Project ___| | | | _ \| | ++.\" * / __| | | | |_) | | ++.\" * | (__| |_| | _ <| |___ ++.\" * \___|\___/|_| \_\_____| ++.\" * ++.\" * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++.\" * ++.\" * This software is licensed as described in the file COPYING, which ++.\" * you should have received as part of this distribution. The terms ++.\" * are also available at https://curl.haxx.se/docs/copyright.html. ++.\" * ++.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++.\" * copies of the Software, and permit persons to whom the Software is ++.\" * furnished to do so, under the terms of the COPYING file. ++.\" * ++.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++.\" * KIND, either express or implied. ++.\" * ++.\" ************************************************************************** ++.TH curl_url 3 "6 Aug 2018" "libcurl" "libcurl Manual" ++.SH NAME ++curl_url - returns a new CURLU handle ++.SH SYNOPSIS ++.B #include ++ ++CURLU *curl_url(); ++.SH EXPERIMENTAL ++The URL API is considered \fBEXPERIMENTAL\fP until further notice. Please test ++it, report bugs and help us perfect it. Once proven to be reliable, the ++experimental label will be removed. ++ ++While this API is marked experimental, we reserve the right to modify the API ++slightly if we deem it necessary and it makes it notably better or easier to ++use. ++.SH DESCRIPTION ++This function will allocates and returns a pointer to a fresh CURLU handle, to ++be used for further use of the URL API. ++.SH RETURN VALUE ++Returns a \fBCURLU *\fP if successful, or NULL if out of memory. ++.SH EXAMPLE ++.nf ++ CURLUcode rc; ++ CURLU *url = curl_url(); ++ rc = curl_url_set(url, CURLUPART_URL, "https://example.com", 0); ++ if(!rc) { ++ char *scheme; ++ rc = curl_url_get(url, CURLUPART_SCHEME, &scheme, 0); ++ if(!rc) { ++ printf("the scheme is %s\n", scheme); ++ curl_free(scheme); ++ } ++ curl_url_cleanup(url); ++ } ++.fi ++.SH AVAILABILITY ++Added in curl 7.63.0 ++.SH "SEE ALSO" ++.BR curl_url_cleanup "(3), " curl_url_get "(3), " curl_url_set "(3), " ++.BR curl_url_dup "(3), " +diff --git a/docs/libcurl/curl_url_cleanup.3 b/docs/libcurl/curl_url_cleanup.3 +new file mode 100644 +index 0000000..a8158b7 +--- /dev/null ++++ b/docs/libcurl/curl_url_cleanup.3 +@@ -0,0 +1,44 @@ ++.\" ************************************************************************** ++.\" * _ _ ____ _ ++.\" * Project ___| | | | _ \| | ++.\" * / __| | | | |_) | | ++.\" * | (__| |_| | _ <| |___ ++.\" * \___|\___/|_| \_\_____| ++.\" * ++.\" * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++.\" * ++.\" * This software is licensed as described in the file COPYING, which ++.\" * you should have received as part of this distribution. The terms ++.\" * are also available at https://curl.haxx.se/docs/copyright.html. ++.\" * ++.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++.\" * copies of the Software, and permit persons to whom the Software is ++.\" * furnished to do so, under the terms of the COPYING file. ++.\" * ++.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++.\" * KIND, either express or implied. ++.\" * ++.\" ************************************************************************** ++.TH curl_url_cleanup 3 "6 Aug 2018" "libcurl" "libcurl Manual" ++.SH NAME ++curl_url_cleanup - free a CURLU handle ++.SH SYNOPSIS ++.B #include ++ ++void curl_url_cleanup(CURLU *handle); ++.fi ++.SH DESCRIPTION ++Frees all the resources associated with the given CURLU handle! ++.SH RETURN VALUE ++none ++.SH EXAMPLE ++.nf ++ CURLU *url = curl_url(); ++ curl_url_set(url, CURLUPART_URL, "https://example.com", 0); ++ curl_url_cleanup(url); ++.fi ++.SH AVAILABILITY ++Added in curl 7.63.0 ++.SH "SEE ALSO" ++.BR curl_url_dup "(3), " curl_url "(3), " curl_url_set "(3), " ++.BR curl_url_get "(3), " +diff --git a/docs/libcurl/curl_url_dup.3 b/docs/libcurl/curl_url_dup.3 +new file mode 100644 +index 0000000..4815dbd +--- /dev/null ++++ b/docs/libcurl/curl_url_dup.3 +@@ -0,0 +1,52 @@ ++.\" ************************************************************************** ++.\" * _ _ ____ _ ++.\" * Project ___| | | | _ \| | ++.\" * / __| | | | |_) | | ++.\" * | (__| |_| | _ <| |___ ++.\" * \___|\___/|_| \_\_____| ++.\" * ++.\" * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++.\" * ++.\" * This software is licensed as described in the file COPYING, which ++.\" * you should have received as part of this distribution. The terms ++.\" * are also available at https://curl.haxx.se/docs/copyright.html. ++.\" * ++.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++.\" * copies of the Software, and permit persons to whom the Software is ++.\" * furnished to do so, under the terms of the COPYING file. ++.\" * ++.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++.\" * KIND, either express or implied. ++.\" * ++.\" ************************************************************************** ++.TH curl_url_dup 3 "6 Aug 2018" "libcurl" "libcurl Manual" ++.SH NAME ++curl_url_dup - duplicate a CURLU handle ++.SH SYNOPSIS ++.B #include ++ ++CURLU *curl_url_dup(CURLU *inhandle); ++.fi ++.SH DESCRIPTION ++Duplicates a given CURLU \fIinhandle\fP and all its contents and returns a ++pointer to a new CURLU handle. The new handle also needs to be freed with ++\fIcurl_url_cleanup(3)\fP. ++.SH RETURN VALUE ++Returns a new handle or NULL if out of memory. ++.SH EXAMPLE ++.nf ++ CURLUcode rc; ++ CURLU *url = curl_url(); ++ CURLU *url2; ++ rc = curl_url_set(url, CURLUPART_URL, "https://example.com", 0); ++ if(!rc) { ++ url2 = curl_url_dup(url); /* clone it! */ ++ curl_url_cleanup(url2); ++ } ++ curl_url_cleanup(url); ++.fi ++.SH AVAILABILITY ++Added in curl 7.63.0 ++.SH "SEE ALSO" ++.BR curl_url_cleanup "(3), " curl_url "(3), " curl_url_set "(3), " ++.BR curl_url_get "(3), " +diff --git a/docs/libcurl/curl_url_get.3 b/docs/libcurl/curl_url_get.3 +new file mode 100644 +index 0000000..824d496 +--- /dev/null ++++ b/docs/libcurl/curl_url_get.3 +@@ -0,0 +1,110 @@ ++.\" ************************************************************************** ++.\" * _ _ ____ _ ++.\" * Project ___| | | | _ \| | ++.\" * / __| | | | |_) | | ++.\" * | (__| |_| | _ <| |___ ++.\" * \___|\___/|_| \_\_____| ++.\" * ++.\" * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++.\" * ++.\" * This software is licensed as described in the file COPYING, which ++.\" * you should have received as part of this distribution. The terms ++.\" * are also available at https://curl.haxx.se/docs/copyright.html. ++.\" * ++.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++.\" * copies of the Software, and permit persons to whom the Software is ++.\" * furnished to do so, under the terms of the COPYING file. ++.\" * ++.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++.\" * KIND, either express or implied. ++.\" * ++.\" ************************************************************************** ++.TH curl_url_get 3 "6 Aug 2018" "libcurl" "libcurl Manual" ++.SH NAME ++curl_url_get - extract a part from a URL ++.SH SYNOPSIS ++.B #include ++ ++.nf ++CURLUcode curl_url_get(CURLU *url, ++ CURLUPart what, ++ char **part, ++ unsigned int flags) ++.fi ++.SH DESCRIPTION ++Given the \fIurl\fP handle of an already parsed URL, this function lets the ++user extract individual pieces from it. ++ ++The \fIwhat\fP argument should be the particular part to extract (see list ++below) and \fIpart\fP points to a 'char *' to get updated to point to a newly ++allocated string with the contents. ++ ++The \fIflags\fP argument is a bitmask with individual features. ++ ++The returned part pointer must be freed with \fIcurl_free(3)\fP after use. ++.SH FLAGS ++The flags argument is zero, one or more bits set in a bitmask. ++.IP CURLU_DEFAULT_PORT ++If the handle has no port stored, this option will make \fIcurl_url_get(3)\fP ++return the default port for the used scheme. ++.IP CURLU_DEFAULT_SCHEME ++If the handle has no scheme stored, this option will make ++\fIcurl_url_get(3)\fP return the default scheme instead of error. ++.IP CURLU_NO_DEFAULT_PORT ++Instructs \fIcurl_url_get(3)\fP to not return a port number if it matches the ++default port for the scheme. ++.IP CURLU_URLDECODE ++Asks \fIcurl_url_get(3)\fP to URL decode the contents before returning it. It ++will not attempt to decode the scheme, the port number or the full URL. ++ ++The query component will also get plus-to-space convertion as a bonus when ++this bit is set. ++ ++Note that this URL decoding is charset unaware and you will get a zero ++terminated string back with data that could be intended for a particular ++encoding. ++ ++If there's any byte values lower than 32 in the decoded string, the get ++operation will return an error instead. ++.SH PARTS ++.IP CURLUPART_URL ++When asked to return the full URL, \fIcurl_url_get(3)\fP will return a ++normalized and possibly cleaned up version of what was previously parsed. ++.IP CURLUPART_SCHEME ++Scheme cannot be URL decoded on get. ++.IP CURLUPART_USER ++.IP CURLUPART_PASSWORD ++.IP CURLUPART_OPTIONS ++.IP CURLUPART_HOST ++.IP CURLUPART_PORT ++Port cannot be URL decoded on get. ++.IP CURLUPART_PATH ++.IP CURLUPART_QUERY ++The query part will also get pluses converted to space when asked to URL ++decode on get with the CURLU_URLDECODE bit. ++.IP CURLUPART_FRAGMENT ++.SH RETURN VALUE ++Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went ++fine. ++ ++If this function returns an error, no URL part is returned. ++.SH EXAMPLE ++.nf ++ CURLUcode rc; ++ CURLU *url = curl_url(); ++ rc = curl_url_set(url, CURLUPART_URL, "https://example.com", 0); ++ if(!rc) { ++ char *scheme; ++ rc = curl_url_get(url, CURLUPART_SCHEME, &scheme, 0); ++ if(!rc) { ++ printf("the scheme is %s\n", scheme); ++ curl_free(scheme); ++ } ++ curl_url_cleanup(url); ++ } ++.fi ++.SH AVAILABILITY ++Added in curl 7.63.0 ++.SH "SEE ALSO" ++.BR curl_url_cleanup "(3), " curl_url "(3), " curl_url_set "(3), " ++.BR curl_url_dup "(3), " +diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 +new file mode 100644 +index 0000000..75fc0d9 +--- /dev/null ++++ b/docs/libcurl/curl_url_set.3 +@@ -0,0 +1,120 @@ ++.\" ************************************************************************** ++.\" * _ _ ____ _ ++.\" * Project ___| | | | _ \| | ++.\" * / __| | | | |_) | | ++.\" * | (__| |_| | _ <| |___ ++.\" * \___|\___/|_| \_\_____| ++.\" * ++.\" * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++.\" * ++.\" * This software is licensed as described in the file COPYING, which ++.\" * you should have received as part of this distribution. The terms ++.\" * are also available at https://curl.haxx.se/docs/copyright.html. ++.\" * ++.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++.\" * copies of the Software, and permit persons to whom the Software is ++.\" * furnished to do so, under the terms of the COPYING file. ++.\" * ++.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++.\" * KIND, either express or implied. ++.\" * ++.\" ************************************************************************** ++.TH curl_url_set 3 "6 Aug 2018" "libcurl" "libcurl Manual" ++.SH NAME ++curl_url_set - set a part from a URL ++.SH SYNOPSIS ++.B #include ++ ++CURLUcode curl_url_set(CURLU *url, ++ CURLUPart part, ++ const char *content, ++ unsigned int flags) ++.fi ++.SH DESCRIPTION ++Given the \fIurl\fP handle of an already parsed URL, this function lets the ++user set/update individual pieces of it. ++ ++The \fIpart\fP argument should identify the particular URL part (see list ++below) to set or change, with \fIcontent\fP pointing to a zero terminated ++string with the new contents for that URL part. The contents should be in the ++form and encoding they'd use in a URL: URL encoded. ++ ++Setting a part to a NULL pointer will effectively remove that part's contents ++from the CURLU handle. ++ ++The \fIflags\fP argument is a bitmask with independent features. ++.SH PARTS ++.IP CURLUPART_URL ++Allows the full URL of the handle to be replaced. If the handle already is ++populated with a URL, the new URL can be relative to the previous. ++ ++When successfully setting a new URL, relative or absolute, the handle contents ++will be replaced with the information of the newly set URL. ++ ++Pass a pointer to a zero terminated string to the \fIurl\fP parameter. The ++string must point to a correctly formatted "RFC 3986+" URL or be a NULL ++pointer. ++.IP CURLUPART_SCHEME ++Scheme cannot be URL decoded on set. ++.IP CURLUPART_USER ++.IP CURLUPART_PASSWORD ++.IP CURLUPART_OPTIONS ++.IP CURLUPART_HOST ++The host name can use IDNA. The string must then be encoded as your locale ++says or UTF-8 (when winidn is used). ++.IP CURLUPART_PORT ++Port cannot be URL encoded on set. ++.IP CURLUPART_PATH ++If a path is set in the URL without a leading slash, a slash will be inserted ++automatically when this URL is read from the handle. ++.IP CURLUPART_QUERY ++The query part will also get spaces converted to pluses when asked to URL ++encode on set with the CURLU_URLENCODE bit. ++ ++If used in with \fICURLU_APPENDQUERY\fP, the provided part will be appended on ++the end of the existing query - and if the previous part didn't end with an ++ampersand (&), an ampersand will be inserted before the new appended part. ++ ++When \fCURLU_APPENDQUERY\fP is used together with \fICURLU_URLENCODE\fP, ++the '=' symbols will not be URL encoded. ++ ++The question mark in the URL is not part of the actual query contents. ++.IP CURLUPART_FRAGMENT ++The hash sign in the URL is not part of the actual fragment contents. ++.SH FLAGS ++The flags argument is zero, one or more bits set in a bitmask. ++.IP CURLU_NON_SUPPORT_SCHEME ++If set, allows \fIcurl_url_set(3)\fP to set a non-supported scheme. ++.IP CURLU_URLENCODE ++When set, \fIcurl_url_set(3)\fP URL encodes the part on entry, except for ++scheme, port and URL. ++ ++When setting the path component with URL encoding enabled, the slash character ++will be skipped. ++ ++The query part gets space-to-plus conversion before the URL conversion. ++ ++This URL encoding is charset unaware and will convert the input on a ++byte-by-byte manner. ++.SH RETURN VALUE ++Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went ++fine. ++ ++If this function returns an error, no URL part is returned. ++.SH EXAMPLE ++.nf ++ CURLUcode rc; ++ CURLU *url = curl_url(); ++ rc = curl_url_set(url, CURLUPART_URL, "https://example.com", 0); ++ if(!rc) { ++ char *scheme; ++ /* change it to an FTP URL */ ++ rc = curl_url_set(url, CURLUPART_SCHEME, "ftp", 0); ++ } ++ curl_url_cleanup(url); ++.fi ++.SH AVAILABILITY ++Added in curl 7.63.0 ++.SH "SEE ALSO" ++.BR curl_url_cleanup "(3), " curl_url "(3), " curl_url_get "(3), " ++.BR curl_url_dup "(3), " +diff --git a/docs/libcurl/symbols-in-versions b/docs/libcurl/symbols-in-versions +index 7448b4f..c797cb7 100644 +--- a/docs/libcurl/symbols-in-versions ++++ b/docs/libcurl/symbols-in-versions +@@ -718,6 +718,36 @@ CURLSSLSET_NO_BACKENDS 7.56.0 + CURLSSLSET_OK 7.56.0 + CURLSSLSET_TOO_LATE 7.56.0 + CURLSSLSET_UNKNOWN_BACKEND 7.56.0 ++CURLUPART_FRAGMENT 7.62.0 ++CURLUPART_HOST 7.62.0 ++CURLUPART_OPTIONS 7.62.0 ++CURLUPART_PASSWORD 7.62.0 ++CURLUPART_PATH 7.62.0 ++CURLUPART_PORT 7.62.0 ++CURLUPART_QUERY 7.62.0 ++CURLUPART_SCHEME 7.62.0 ++CURLUPART_URL 7.62.0 ++CURLUPART_USER 7.62.0 ++CURLUE_BAD_HANDLE 7.62.0 ++CURLUE_BAD_PARTPOINTER 7.62.0 ++CURLUE_BAD_PORT_NUMBER 7.62.0 ++CURLUE_MALFORMED_INPUT 7.62.0 ++CURLUE_NO_FRAGMENT 7.62.0 ++CURLUE_NO_HOST 7.62.0 ++CURLUE_NO_OPTIONS 7.62.0 ++CURLUE_NO_PASSWORD 7.62.0 ++CURLUE_NO_PATH 7.62.0 ++CURLUE_NO_PORT 7.62.0 ++CURLUE_NO_QUERY 7.62.0 ++CURLUE_NO_SCHEME 7.62.0 ++CURLUE_NO_USER 7.62.0 ++CURLUE_OK 7.62.0 ++CURLUE_OUT_OF_MEMORY 7.62.0 ++CURLUE_RELATIVE 7.62.0 ++CURLUE_UNKNOWN_PART 7.62.0 ++CURLUE_UNSUPPORTED_SCHEME 7.62.0 ++CURLUE_URLDECODE 7.62.0 ++CURLUE_USER_NOT_ALLOWED 7.62.0 + CURLUSESSL_ALL 7.17.0 + CURLUSESSL_CONTROL 7.17.0 + CURLUSESSL_NONE 7.17.0 +diff --git a/include/curl/Makefile.am b/include/curl/Makefile.am +index 989d4a2..bf5f061 100644 +--- a/include/curl/Makefile.am ++++ b/include/curl/Makefile.am +@@ -5,7 +5,7 @@ + # | (__| |_| | _ <| |___ + # \___|\___/|_| \_\_____| + # +-# Copyright (C) 1998 - 2017, Daniel Stenberg, , et al. ++# Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. + # + # This software is licensed as described in the file COPYING, which + # you should have received as part of this distribution. The terms +@@ -21,7 +21,7 @@ + ########################################################################### + pkginclude_HEADERS = \ + curl.h curlver.h easy.h mprintf.h stdcheaders.h multi.h \ +- typecheck-gcc.h system.h ++ typecheck-gcc.h system.h urlapi.h + + pkgincludedir= $(includedir)/curl + +diff --git a/include/curl/curl.h b/include/curl/curl.h +index 067b34d..8f473e2 100644 +--- a/include/curl/curl.h ++++ b/include/curl/curl.h +@@ -2779,6 +2779,7 @@ CURL_EXTERN CURLcode curl_easy_pause(CURL *handle, int bitmask); + stuff before they can be included! */ + #include "easy.h" /* nothing in curl is fun without the easy stuff */ + #include "multi.h" ++#include "urlapi.h" + + /* the typechecker doesn't work in C++ (yet) */ + #if defined(__GNUC__) && defined(__GNUC_MINOR__) && \ +diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h +new file mode 100644 +index 0000000..b16cfce +--- /dev/null ++++ b/include/curl/urlapi.h +@@ -0,0 +1,121 @@ ++#ifndef __CURL_URLAPI_H ++#define __CURL_URLAPI_H ++/*************************************************************************** ++ * _ _ ____ _ ++ * Project ___| | | | _ \| | ++ * / __| | | | |_) | | ++ * | (__| |_| | _ <| |___ ++ * \___|\___/|_| \_\_____| ++ * ++ * Copyright (C) 2018, Daniel Stenberg, , et al. ++ * ++ * This software is licensed as described in the file COPYING, which ++ * you should have received as part of this distribution. The terms ++ * are also available at https://curl.haxx.se/docs/copyright.html. ++ * ++ * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++ * copies of the Software, and permit persons to whom the Software is ++ * furnished to do so, under the terms of the COPYING file. ++ * ++ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++ * KIND, either express or implied. ++ * ++ ***************************************************************************/ ++ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++/* the error codes for the URL API */ ++typedef enum { ++ CURLUE_OK, ++ CURLUE_BAD_HANDLE, /* 1 */ ++ CURLUE_BAD_PARTPOINTER, /* 2 */ ++ CURLUE_MALFORMED_INPUT, /* 3 */ ++ CURLUE_BAD_PORT_NUMBER, /* 4 */ ++ CURLUE_UNSUPPORTED_SCHEME, /* 5 */ ++ CURLUE_URLDECODE, /* 6 */ ++ CURLUE_RELATIVE, /* 7 */ ++ CURLUE_USER_NOT_ALLOWED, /* 8 */ ++ CURLUE_UNKNOWN_PART, /* 9 */ ++ CURLUE_NO_SCHEME, /* 10 */ ++ CURLUE_NO_USER, /* 11 */ ++ CURLUE_NO_PASSWORD, /* 12 */ ++ CURLUE_NO_OPTIONS, /* 13 */ ++ CURLUE_NO_HOST, /* 14 */ ++ CURLUE_NO_PORT, /* 15 */ ++ CURLUE_NO_PATH, /* 16 */ ++ CURLUE_NO_QUERY, /* 17 */ ++ CURLUE_NO_FRAGMENT, /* 18 */ ++ CURLUE_OUT_OF_MEMORY /* 19 */ ++} CURLUcode; ++ ++typedef enum { ++ CURLUPART_URL, ++ CURLUPART_SCHEME, ++ CURLUPART_USER, ++ CURLUPART_PASSWORD, ++ CURLUPART_OPTIONS, ++ CURLUPART_HOST, ++ CURLUPART_PORT, ++ CURLUPART_PATH, ++ CURLUPART_QUERY, ++ CURLUPART_FRAGMENT ++} CURLUPart; ++ ++#define CURLU_DEFAULT_PORT (1<<0) /* return default port number */ ++#define CURLU_NO_DEFAULT_PORT (1<<1) /* act as if no port number was set, ++ if the port number matches the ++ default for the scheme */ ++#define CURLU_DEFAULT_SCHEME (1<<2) /* return default scheme if ++ missing */ ++#define CURLU_NON_SUPPORT_SCHEME (1<<3) /* allow non-supported scheme */ ++#define CURLU_PATH_AS_IS (1<<4) /* leave dot sequences */ ++#define CURLU_DISALLOW_USER (1<<5) /* no user+password allowed */ ++#define CURLU_URLDECODE (1<<6) /* URL decode on get */ ++#define CURLU_URLENCODE (1<<7) /* URL encode on set */ ++#define CURLU_APPENDQUERY (1<<8) /* append a form style part */ ++ ++typedef struct Curl_URL CURLU; ++ ++/* ++ * curl_url() creates a new CURLU handle and returns a pointer to it. ++ * Must be freed with curl_url_cleanup(). ++ */ ++CURL_EXTERN CURLU *curl_url(void); ++ ++/* ++ * curl_url_cleanup() frees the CURLU handle and related resources used for ++ * the URL parsing. It will not free strings previously returned with the URL ++ * API. ++ */ ++CURL_EXTERN void curl_url_cleanup(CURLU *handle); ++ ++/* ++ * curl_url_dup() duplicates a CURLU handle and returns a new copy. The new ++ * handle must also be freed with curl_url_cleanup(). ++ */ ++CURL_EXTERN CURLU *curl_url_dup(CURLU *in); ++ ++/* ++ * curl_url_get() extracts a specific part of the URL from a CURLU ++ * handle. Returns error code. The returned pointer MUST be freed with ++ * curl_free() afterwards. ++ */ ++CURL_EXTERN CURLUcode curl_url_get(CURLU *handle, CURLUPart what, ++ char **part, unsigned int flags); ++ ++/* ++ * curl_url_set() sets a specific part of the URL in a CURLU handle. Returns ++ * error code. The passed in string will be copied. Passing a NULL instead of ++ * a part string, clears that part. ++ */ ++CURL_EXTERN CURLUcode curl_url_set(CURLU *handle, CURLUPart what, ++ const char *part, unsigned int flags); ++ ++ ++#ifdef __cplusplus ++} /* end of extern "C" */ ++#endif ++ ++#endif +diff --git a/lib/Makefile.inc b/lib/Makefile.inc +index 76ca6d0..1ff82e1 100644 +--- a/lib/Makefile.inc ++++ b/lib/Makefile.inc +@@ -54,7 +54,8 @@ LIB_CFILES = file.c timeval.c base64.c hostip.c progress.c formdata.c \ + http_ntlm.c curl_ntlm_wb.c curl_ntlm_core.c curl_sasl.c rand.c \ + curl_multibyte.c hostcheck.c conncache.c pipeline.c dotdot.c \ + x509asn1.c http2.c smb.c curl_endian.c curl_des.c system_win32.c \ +- mime.c sha256.c setopt.c curl_path.c curl_ctype.c curl_range.c psl.c ++ mime.c sha256.c setopt.c curl_path.c curl_ctype.c curl_range.c psl.c \ ++ urlapi.c + + LIB_HFILES = arpa_telnet.h netrc.h file.h timeval.h hostip.h progress.h \ + formdata.h cookie.h http.h sendf.h ftp.h url.h dict.h if2ip.h \ +@@ -74,7 +75,7 @@ LIB_HFILES = arpa_telnet.h netrc.h file.h timeval.h hostip.h progress.h \ + curl_setup_once.h multihandle.h setup-vms.h pipeline.h dotdot.h \ + x509asn1.h http2.h sigpipe.h smb.h curl_endian.h curl_des.h \ + curl_printf.h system_win32.h rand.h mime.h curl_sha256.h setopt.h \ +- curl_path.h curl_ctype.h curl_range.h psl.h ++ curl_path.h curl_ctype.h curl_range.h psl.h urlapi-int.h + + LIB_RCFILES = libcurl.rc + +diff --git a/lib/escape.c b/lib/escape.c +index 10774f0..afd3899 100644 +--- a/lib/escape.c ++++ b/lib/escape.c +@@ -5,7 +5,7 @@ + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * +- * Copyright (C) 1998 - 2017, Daniel Stenberg, , et al. ++ * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms +@@ -41,7 +41,7 @@ + its behavior is altered by the current locale. + See https://tools.ietf.org/html/rfc3986#section-2.3 + */ +-static bool Curl_isunreserved(unsigned char in) ++bool Curl_isunreserved(unsigned char in) + { + switch(in) { + case '0': case '1': case '2': case '3': case '4': +@@ -141,6 +141,8 @@ char *curl_easy_escape(struct Curl_easy *data, const char *string, + * Returns a pointer to a malloced string in *ostring with length given in + * *olen. If length == 0, the length is assumed to be strlen(string). + * ++ * 'data' can be set to NULL but then this function can't convert network ++ * data to host for non-ascii. + */ + CURLcode Curl_urldecode(struct Curl_easy *data, + const char *string, size_t length, +@@ -151,7 +153,7 @@ CURLcode Curl_urldecode(struct Curl_easy *data, + char *ns = malloc(alloc); + size_t strindex = 0; + unsigned long hex; +- CURLcode result; ++ CURLcode result = CURLE_OK; + + if(!ns) + return CURLE_OUT_OF_MEMORY; +@@ -171,11 +173,13 @@ CURLcode Curl_urldecode(struct Curl_easy *data, + + in = curlx_ultouc(hex); /* this long is never bigger than 255 anyway */ + +- result = Curl_convert_from_network(data, (char *)&in, 1); +- if(result) { +- /* Curl_convert_from_network calls failf if unsuccessful */ +- free(ns); +- return result; ++ if(data) { ++ result = Curl_convert_from_network(data, (char *)&in, 1); ++ if(result) { ++ /* Curl_convert_from_network calls failf if unsuccessful */ ++ free(ns); ++ return result; ++ } + } + + string += 2; +diff --git a/lib/escape.h b/lib/escape.h +index 638666f..666f1ad 100644 +--- a/lib/escape.h ++++ b/lib/escape.h +@@ -7,7 +7,7 @@ + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * +- * Copyright (C) 1998 - 2011, Daniel Stenberg, , et al. ++ * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms +@@ -24,6 +24,7 @@ + /* Escape and unescape URL encoding in strings. The functions return a new + * allocated string or NULL if an error occurred. */ + ++bool Curl_isunreserved(unsigned char in); + CURLcode Curl_urldecode(struct Curl_easy *data, + const char *string, size_t length, + char **ostring, size_t *olen, +diff --git a/lib/imap.c b/lib/imap.c +index 942fe7d..28962c1 100644 +--- a/lib/imap.c ++++ b/lib/imap.c +@@ -159,7 +159,8 @@ const struct Curl_handler Curl_handler_imaps = { + ZERO_NULL, /* connection_check */ + PORT_IMAPS, /* defport */ + CURLPROTO_IMAPS, /* protocol */ +- PROTOPT_CLOSEACTION | PROTOPT_SSL /* flags */ ++ PROTOPT_CLOSEACTION | PROTOPT_SSL | /* flags */ ++ PROTOPT_URLOPTIONS + }; + #endif + +diff --git a/lib/transfer.c b/lib/transfer.c +index 7159d5c..ecd1063 100644 +--- a/lib/transfer.c ++++ b/lib/transfer.c +@@ -75,6 +75,7 @@ + #include "http2.h" + #include "mime.h" + #include "strcase.h" ++#include "urlapi-int.h" + + /* The last 3 #include files should be in this order */ + #include "curl_printf.h" +@@ -1453,311 +1454,6 @@ CURLcode Curl_posttransfer(struct Curl_easy *data) + return CURLE_OK; + } + +-#ifndef CURL_DISABLE_HTTP +-/* +- * Find the separator at the end of the host name, or the '?' in cases like +- * http://www.url.com?id=2380 +- */ +-static const char *find_host_sep(const char *url) +-{ +- const char *sep; +- const char *query; +- +- /* Find the start of the hostname */ +- sep = strstr(url, "//"); +- if(!sep) +- sep = url; +- else +- sep += 2; +- +- query = strchr(sep, '?'); +- sep = strchr(sep, '/'); +- +- if(!sep) +- sep = url + strlen(url); +- +- if(!query) +- query = url + strlen(url); +- +- return sep < query ? sep : query; +-} +- +-/* +- * Decide in an encoding-independent manner whether a character in an +- * URL must be escaped. The same criterion must be used in strlen_url() +- * and strcpy_url(). +- */ +-static bool urlchar_needs_escaping(int c) +-{ +- return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); +-} +- +-/* +- * strlen_url() returns the length of the given URL if the spaces within the +- * URL were properly URL encoded. +- * URL encoding should be skipped for host names, otherwise IDN resolution +- * will fail. +- */ +-static size_t strlen_url(const char *url, bool relative) +-{ +- const unsigned char *ptr; +- size_t newlen = 0; +- bool left = TRUE; /* left side of the ? */ +- const unsigned char *host_sep = (const unsigned char *) url; +- +- if(!relative) +- host_sep = (const unsigned char *) find_host_sep(url); +- +- for(ptr = (unsigned char *)url; *ptr; ptr++) { +- +- if(ptr < host_sep) { +- ++newlen; +- continue; +- } +- +- switch(*ptr) { +- case '?': +- left = FALSE; +- /* FALLTHROUGH */ +- default: +- if(urlchar_needs_escaping(*ptr)) +- newlen += 2; +- newlen++; +- break; +- case ' ': +- if(left) +- newlen += 3; +- else +- newlen++; +- break; +- } +- } +- return newlen; +-} +- +-/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in +- * the source URL accordingly. +- * URL encoding should be skipped for host names, otherwise IDN resolution +- * will fail. +- */ +-static void strcpy_url(char *output, const char *url, bool relative) +-{ +- /* we must add this with whitespace-replacing */ +- bool left = TRUE; +- const unsigned char *iptr; +- char *optr = output; +- const unsigned char *host_sep = (const unsigned char *) url; +- +- if(!relative) +- host_sep = (const unsigned char *) find_host_sep(url); +- +- for(iptr = (unsigned char *)url; /* read from here */ +- *iptr; /* until zero byte */ +- iptr++) { +- +- if(iptr < host_sep) { +- *optr++ = *iptr; +- continue; +- } +- +- switch(*iptr) { +- case '?': +- left = FALSE; +- /* FALLTHROUGH */ +- default: +- if(urlchar_needs_escaping(*iptr)) { +- snprintf(optr, 4, "%%%02x", *iptr); +- optr += 3; +- } +- else +- *optr++=*iptr; +- break; +- case ' ': +- if(left) { +- *optr++='%'; /* add a '%' */ +- *optr++='2'; /* add a '2' */ +- *optr++='0'; /* add a '0' */ +- } +- else +- *optr++='+'; /* add a '+' here */ +- break; +- } +- } +- *optr = 0; /* zero terminate output buffer */ +- +-} +- +-/* +- * Returns true if the given URL is absolute (as opposed to relative) +- */ +-static bool is_absolute_url(const char *url) +-{ +- char prot[16]; /* URL protocol string storage */ +- char letter; /* used for a silly sscanf */ +- +- return (2 == sscanf(url, "%15[^?&/:]://%c", prot, &letter)) ? TRUE : FALSE; +-} +- +-/* +- * Concatenate a relative URL to a base URL making it absolute. +- * URL-encodes any spaces. +- * The returned pointer must be freed by the caller unless NULL +- * (returns NULL on out of memory). +- */ +-static char *concat_url(const char *base, const char *relurl) +-{ +- /*** +- TRY to append this new path to the old URL +- to the right of the host part. Oh crap, this is doomed to cause +- problems in the future... +- */ +- char *newest; +- char *protsep; +- char *pathsep; +- size_t newlen; +- bool host_changed = FALSE; +- +- const char *useurl = relurl; +- size_t urllen; +- +- /* we must make our own copy of the URL to play with, as it may +- point to read-only data */ +- char *url_clone = strdup(base); +- +- if(!url_clone) +- return NULL; /* skip out of this NOW */ +- +- /* protsep points to the start of the host name */ +- protsep = strstr(url_clone, "//"); +- if(!protsep) +- protsep = url_clone; +- else +- protsep += 2; /* pass the slashes */ +- +- if('/' != relurl[0]) { +- int level = 0; +- +- /* First we need to find out if there's a ?-letter in the URL, +- and cut it and the right-side of that off */ +- pathsep = strchr(protsep, '?'); +- if(pathsep) +- *pathsep = 0; +- +- /* we have a relative path to append to the last slash if there's one +- available, or if the new URL is just a query string (starts with a +- '?') we append the new one at the end of the entire currently worked +- out URL */ +- if(useurl[0] != '?') { +- pathsep = strrchr(protsep, '/'); +- if(pathsep) +- *pathsep = 0; +- } +- +- /* Check if there's any slash after the host name, and if so, remember +- that position instead */ +- pathsep = strchr(protsep, '/'); +- if(pathsep) +- protsep = pathsep + 1; +- else +- protsep = NULL; +- +- /* now deal with one "./" or any amount of "../" in the newurl +- and act accordingly */ +- +- if((useurl[0] == '.') && (useurl[1] == '/')) +- useurl += 2; /* just skip the "./" */ +- +- while((useurl[0] == '.') && +- (useurl[1] == '.') && +- (useurl[2] == '/')) { +- level++; +- useurl += 3; /* pass the "../" */ +- } +- +- if(protsep) { +- while(level--) { +- /* cut off one more level from the right of the original URL */ +- pathsep = strrchr(protsep, '/'); +- if(pathsep) +- *pathsep = 0; +- else { +- *protsep = 0; +- break; +- } +- } +- } +- } +- else { +- /* We got a new absolute path for this server */ +- +- if((relurl[0] == '/') && (relurl[1] == '/')) { +- /* the new URL starts with //, just keep the protocol part from the +- original one */ +- *protsep = 0; +- useurl = &relurl[2]; /* we keep the slashes from the original, so we +- skip the new ones */ +- host_changed = TRUE; +- } +- else { +- /* cut off the original URL from the first slash, or deal with URLs +- without slash */ +- pathsep = strchr(protsep, '/'); +- if(pathsep) { +- /* When people use badly formatted URLs, such as +- "http://www.url.com?dir=/home/daniel" we must not use the first +- slash, if there's a ?-letter before it! */ +- char *sep = strchr(protsep, '?'); +- if(sep && (sep < pathsep)) +- pathsep = sep; +- *pathsep = 0; +- } +- else { +- /* There was no slash. Now, since we might be operating on a badly +- formatted URL, such as "http://www.url.com?id=2380" which doesn't +- use a slash separator as it is supposed to, we need to check for a +- ?-letter as well! */ +- pathsep = strchr(protsep, '?'); +- if(pathsep) +- *pathsep = 0; +- } +- } +- } +- +- /* If the new part contains a space, this is a mighty stupid redirect +- but we still make an effort to do "right". To the left of a '?' +- letter we replace each space with %20 while it is replaced with '+' +- on the right side of the '?' letter. +- */ +- newlen = strlen_url(useurl, !host_changed); +- +- urllen = strlen(url_clone); +- +- newest = malloc(urllen + 1 + /* possible slash */ +- newlen + 1 /* zero byte */); +- +- if(!newest) { +- free(url_clone); /* don't leak this */ +- return NULL; +- } +- +- /* copy over the root url part */ +- memcpy(newest, url_clone, urllen); +- +- /* check if we need to append a slash */ +- if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) +- ; +- else +- newest[urllen++]='/'; +- +- /* then append the new piece on the right side */ +- strcpy_url(&newest[urllen], useurl, !host_changed); +- +- free(url_clone); +- +- return newest; +-} +-#endif /* CURL_DISABLE_HTTP */ +- + /* + * Curl_follow() handles the URL redirect magic. Pass in the 'newurl' string + * as given by the remote server and set up the new URL to request. +@@ -1809,12 +1505,12 @@ CURLcode Curl_follow(struct Curl_easy *data, + } + } + +- if(!is_absolute_url(newurl)) { ++ if(!Curl_is_absolute_url(newurl, NULL, 8)) { + /*** + *DANG* this is an RFC 2068 violation. The URL is supposed + to be absolute and this doesn't seem to be that! + */ +- char *absolute = concat_url(data->change.url, newurl); ++ char *absolute = Curl_concat_url(data->change.url, newurl); + if(!absolute) + return CURLE_OUT_OF_MEMORY; + newurl = absolute; +@@ -1823,7 +1519,7 @@ CURLcode Curl_follow(struct Curl_easy *data, + /* The new URL MAY contain space or high byte values, that means a mighty + stupid redirect URL but we still make an effort to do "right". */ + char *newest; +- size_t newlen = strlen_url(newurl, FALSE); ++ size_t newlen = Curl_strlen_url(newurl, FALSE); + + /* This is an absolute URL, don't allow the custom port number */ + disallowport = TRUE; +@@ -1832,7 +1528,7 @@ CURLcode Curl_follow(struct Curl_easy *data, + if(!newest) + return CURLE_OUT_OF_MEMORY; + +- strcpy_url(newest, newurl, FALSE); /* create a space-free URL */ ++ Curl_strcpy_url(newest, newurl, FALSE); /* create a space-free URL */ + newurl = newest; /* use this instead now */ + + } +diff --git a/lib/url.c b/lib/url.c +index dcc1ecc..4f75f11 100644 +--- a/lib/url.c ++++ b/lib/url.c +@@ -1939,30 +1939,37 @@ static struct connectdata *allocate_conn(struct Curl_easy *data) + return NULL; + } + +-static CURLcode findprotocol(struct Curl_easy *data, +- struct connectdata *conn, +- const char *protostr) ++/* returns the handdler if the given scheme is built-in */ ++const struct Curl_handler *Curl_builtin_scheme(const char *scheme) + { + const struct Curl_handler * const *pp; + const struct Curl_handler *p; +- +- /* Scan protocol handler table and match against 'protostr' to set a few +- variables based on the URL. Now that the handler may be changed later +- when the protocol specific setup function is called. */ +- for(pp = protocols; (p = *pp) != NULL; pp++) { +- if(strcasecompare(p->scheme, protostr)) { ++ /* Scan protocol handler table and match against 'scheme'. The handler may ++ be changed later when the protocol specific setup function is called. */ ++ for(pp = protocols; (p = *pp) != NULL; pp++) ++ if(strcasecompare(p->scheme, scheme)) + /* Protocol found in table. Check if allowed */ +- if(!(data->set.allowed_protocols & p->protocol)) +- /* nope, get out */ +- break; ++ return p; ++ return NULL; /* not found */ ++} + +- /* it is allowed for "normal" request, now do an extra check if this is +- the result of a redirect */ +- if(data->state.this_is_a_follow && +- !(data->set.redir_protocols & p->protocol)) +- /* nope, get out */ +- break; + ++static CURLcode findprotocol(struct Curl_easy *data, ++ struct connectdata *conn, ++ const char *protostr) ++{ ++ const struct Curl_handler *p = Curl_builtin_scheme(protostr); ++ ++ if(p && /* Protocol found in table. Check if allowed */ ++ (data->set.allowed_protocols & p->protocol)) { ++ ++ /* it is allowed for "normal" request, now do an extra check if this is ++ the result of a redirect */ ++ if(data->state.this_is_a_follow && ++ !(data->set.redir_protocols & p->protocol)) ++ /* nope, get out */ ++ ; ++ else { + /* Perform setup complement if some. */ + conn->handler = conn->given = p; + +@@ -1971,7 +1978,6 @@ static CURLcode findprotocol(struct Curl_easy *data, + } + } + +- + /* The protocol was not found in the table, but we don't have to assign it + to anything since it is already assigned to a dummy-struct in the + create_conn() function when the connectdata struct is allocated. */ +diff --git a/lib/url.h b/lib/url.h +index ef3ebf0..0034f82 100644 +--- a/lib/url.h ++++ b/lib/url.h +@@ -69,6 +69,8 @@ void Curl_getoff_all_pipelines(struct Curl_easy *data, + + void Curl_close_connections(struct Curl_easy *data); + ++const struct Curl_handler *Curl_builtin_scheme(const char *scheme); ++ + #define CURL_DEFAULT_PROXY_PORT 1080 /* default proxy port unless specified */ + #define CURL_DEFAULT_HTTPS_PROXY_PORT 443 /* default https proxy port unless + specified */ +diff --git a/lib/escape.h b/lib/urlapi-int.h +similarity index 66% +copy from lib/escape.h +copy to lib/urlapi-int.h +index 638666f..7ac09fd 100644 +--- a/lib/escape.h ++++ b/lib/urlapi-int.h +@@ -1,5 +1,5 @@ +-#ifndef HEADER_CURL_ESCAPE_H +-#define HEADER_CURL_ESCAPE_H ++#ifndef HEADER_CURL_URLAPI_INT_H ++#define HEADER_CURL_URLAPI_INT_H + /*************************************************************************** + * _ _ ____ _ + * Project ___| | | | _ \| | +@@ -7,7 +7,7 @@ + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * +- * Copyright (C) 1998 - 2011, Daniel Stenberg, , et al. ++ * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms +@@ -21,13 +21,9 @@ + * KIND, either express or implied. + * + ***************************************************************************/ +-/* Escape and unescape URL encoding in strings. The functions return a new +- * allocated string or NULL if an error occurred. */ +- +-CURLcode Curl_urldecode(struct Curl_easy *data, +- const char *string, size_t length, +- char **ostring, size_t *olen, +- bool reject_crlf); +- +-#endif /* HEADER_CURL_ESCAPE_H */ +- ++#include "curl_setup.h" ++bool Curl_is_absolute_url(const char *url, char *scheme, size_t buflen); ++char *Curl_concat_url(const char *base, const char *relurl); ++size_t Curl_strlen_url(const char *url, bool relative); ++void Curl_strcpy_url(char *output, const char *url, bool relative); ++#endif +diff --git a/lib/urlapi.c b/lib/urlapi.c +new file mode 100644 +index 0000000..8287861 +--- /dev/null ++++ b/lib/urlapi.c +@@ -0,0 +1,1315 @@ ++/*************************************************************************** ++ * _ _ ____ _ ++ * Project ___| | | | _ \| | ++ * / __| | | | |_) | | ++ * | (__| |_| | _ <| |___ ++ * \___|\___/|_| \_\_____| ++ * ++ * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++ * ++ * This software is licensed as described in the file COPYING, which ++ * you should have received as part of this distribution. The terms ++ * are also available at https://curl.haxx.se/docs/copyright.html. ++ * ++ * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++ * copies of the Software, and permit persons to whom the Software is ++ * furnished to do so, under the terms of the COPYING file. ++ * ++ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++ * KIND, either express or implied. ++ * ++ ***************************************************************************/ ++ ++#include "curl_setup.h" ++ ++#include "urldata.h" ++#include "urlapi-int.h" ++#include "strcase.h" ++#include "dotdot.h" ++#include "url.h" ++#include "escape.h" ++#include "curl_ctype.h" ++ ++/* The last 3 #include files should be in this order */ ++#include "curl_printf.h" ++#include "curl_memory.h" ++#include "memdebug.h" ++ ++/* Internal representation of CURLU. Point to URL-encoded strings. */ ++struct Curl_URL { ++ char *scheme; ++ char *user; ++ char *password; ++ char *options; /* IMAP only? */ ++ char *host; ++ char *port; ++ char *path; ++ char *query; ++ char *fragment; ++ ++ char *scratch; /* temporary scratch area */ ++ long portnum; /* the numerical version */ ++}; ++ ++#define DEFAULT_SCHEME "https" ++ ++/* scheme is not URL encoded, the longest libcurl supported ones are 6 ++ letters */ ++#define MAX_SCHEME_LEN 8 ++ ++static void free_urlhandle(struct Curl_URL *u) ++{ ++ free(u->scheme); ++ free(u->user); ++ free(u->password); ++ free(u->options); ++ free(u->host); ++ free(u->port); ++ free(u->path); ++ free(u->query); ++ free(u->fragment); ++ free(u->scratch); ++} ++ ++/* move the full contents of one handle onto another and ++ free the original */ ++static void mv_urlhandle(struct Curl_URL *from, ++ struct Curl_URL *to) ++{ ++ free_urlhandle(to); ++ *to = *from; ++ free(from); ++} ++ ++/* ++ * Find the separator at the end of the host name, or the '?' in cases like ++ * http://www.url.com?id=2380 ++ */ ++static const char *find_host_sep(const char *url) ++{ ++ const char *sep; ++ const char *query; ++ ++ /* Find the start of the hostname */ ++ sep = strstr(url, "//"); ++ if(!sep) ++ sep = url; ++ else ++ sep += 2; ++ ++ query = strchr(sep, '?'); ++ sep = strchr(sep, '/'); ++ ++ if(!sep) ++ sep = url + strlen(url); ++ ++ if(!query) ++ query = url + strlen(url); ++ ++ return sep < query ? sep : query; ++} ++ ++/* ++ * Decide in an encoding-independent manner whether a character in an ++ * URL must be escaped. The same criterion must be used in strlen_url() ++ * and strcpy_url(). ++ */ ++static bool urlchar_needs_escaping(int c) ++{ ++ return !(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)); ++} ++ ++/* ++ * strlen_url() returns the length of the given URL if the spaces within the ++ * URL were properly URL encoded. ++ * URL encoding should be skipped for host names, otherwise IDN resolution ++ * will fail. ++ */ ++size_t Curl_strlen_url(const char *url, bool relative) ++{ ++ const unsigned char *ptr; ++ size_t newlen = 0; ++ bool left = TRUE; /* left side of the ? */ ++ const unsigned char *host_sep = (const unsigned char *) url; ++ ++ if(!relative) ++ host_sep = (const unsigned char *) find_host_sep(url); ++ ++ for(ptr = (unsigned char *)url; *ptr; ptr++) { ++ ++ if(ptr < host_sep) { ++ ++newlen; ++ continue; ++ } ++ ++ switch(*ptr) { ++ case '?': ++ left = FALSE; ++ /* FALLTHROUGH */ ++ default: ++ if(urlchar_needs_escaping(*ptr)) ++ newlen += 2; ++ newlen++; ++ break; ++ case ' ': ++ if(left) ++ newlen += 3; ++ else ++ newlen++; ++ break; ++ } ++ } ++ return newlen; ++} ++ ++/* strcpy_url() copies a url to a output buffer and URL-encodes the spaces in ++ * the source URL accordingly. ++ * URL encoding should be skipped for host names, otherwise IDN resolution ++ * will fail. ++ */ ++void Curl_strcpy_url(char *output, const char *url, bool relative) ++{ ++ /* we must add this with whitespace-replacing */ ++ bool left = TRUE; ++ const unsigned char *iptr; ++ char *optr = output; ++ const unsigned char *host_sep = (const unsigned char *) url; ++ ++ if(!relative) ++ host_sep = (const unsigned char *) find_host_sep(url); ++ ++ for(iptr = (unsigned char *)url; /* read from here */ ++ *iptr; /* until zero byte */ ++ iptr++) { ++ ++ if(iptr < host_sep) { ++ *optr++ = *iptr; ++ continue; ++ } ++ ++ switch(*iptr) { ++ case '?': ++ left = FALSE; ++ /* FALLTHROUGH */ ++ default: ++ if(urlchar_needs_escaping(*iptr)) { ++ snprintf(optr, 4, "%%%02x", *iptr); ++ optr += 3; ++ } ++ else ++ *optr++=*iptr; ++ break; ++ case ' ': ++ if(left) { ++ *optr++='%'; /* add a '%' */ ++ *optr++='2'; /* add a '2' */ ++ *optr++='0'; /* add a '0' */ ++ } ++ else ++ *optr++='+'; /* add a '+' here */ ++ break; ++ } ++ } ++ *optr = 0; /* zero terminate output buffer */ ++ ++} ++ ++/* ++ * Returns true if the given URL is absolute (as opposed to relative) within ++ * the buffer size. Returns the scheme in the buffer if TRUE and 'buf' is ++ * non-NULL. ++ */ ++bool Curl_is_absolute_url(const char *url, char *buf, size_t buflen) ++{ ++ size_t i; ++ for(i = 0; i < buflen && url[i]; ++i) { ++ char s = url[i]; ++ if(s == ':') { ++ if(buf) ++ buf[i] = 0; ++ return TRUE; ++ } ++ /* RFC 3986 3.1 explains: ++ scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) ++ */ ++ else if(ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') ) { ++ if(buf) ++ buf[i] = (char)TOLOWER(s); ++ } ++ else ++ break; ++ } ++ return FALSE; ++} ++ ++/* ++ * Concatenate a relative URL to a base URL making it absolute. ++ * URL-encodes any spaces. ++ * The returned pointer must be freed by the caller unless NULL ++ * (returns NULL on out of memory). ++ */ ++char *Curl_concat_url(const char *base, const char *relurl) ++{ ++ /*** ++ TRY to append this new path to the old URL ++ to the right of the host part. Oh crap, this is doomed to cause ++ problems in the future... ++ */ ++ char *newest; ++ char *protsep; ++ char *pathsep; ++ size_t newlen; ++ bool host_changed = FALSE; ++ ++ const char *useurl = relurl; ++ size_t urllen; ++ ++ /* we must make our own copy of the URL to play with, as it may ++ point to read-only data */ ++ char *url_clone = strdup(base); ++ ++ if(!url_clone) ++ return NULL; /* skip out of this NOW */ ++ ++ /* protsep points to the start of the host name */ ++ protsep = strstr(url_clone, "//"); ++ if(!protsep) ++ protsep = url_clone; ++ else ++ protsep += 2; /* pass the slashes */ ++ ++ if('/' != relurl[0]) { ++ int level = 0; ++ ++ /* First we need to find out if there's a ?-letter in the URL, ++ and cut it and the right-side of that off */ ++ pathsep = strchr(protsep, '?'); ++ if(pathsep) ++ *pathsep = 0; ++ ++ /* we have a relative path to append to the last slash if there's one ++ available, or if the new URL is just a query string (starts with a ++ '?') we append the new one at the end of the entire currently worked ++ out URL */ ++ if(useurl[0] != '?') { ++ pathsep = strrchr(protsep, '/'); ++ if(pathsep) ++ *pathsep = 0; ++ } ++ ++ /* Check if there's any slash after the host name, and if so, remember ++ that position instead */ ++ pathsep = strchr(protsep, '/'); ++ if(pathsep) ++ protsep = pathsep + 1; ++ else ++ protsep = NULL; ++ ++ /* now deal with one "./" or any amount of "../" in the newurl ++ and act accordingly */ ++ ++ if((useurl[0] == '.') && (useurl[1] == '/')) ++ useurl += 2; /* just skip the "./" */ ++ ++ while((useurl[0] == '.') && ++ (useurl[1] == '.') && ++ (useurl[2] == '/')) { ++ level++; ++ useurl += 3; /* pass the "../" */ ++ } ++ ++ if(protsep) { ++ while(level--) { ++ /* cut off one more level from the right of the original URL */ ++ pathsep = strrchr(protsep, '/'); ++ if(pathsep) ++ *pathsep = 0; ++ else { ++ *protsep = 0; ++ break; ++ } ++ } ++ } ++ } ++ else { ++ /* We got a new absolute path for this server */ ++ ++ if((relurl[0] == '/') && (relurl[1] == '/')) { ++ /* the new URL starts with //, just keep the protocol part from the ++ original one */ ++ *protsep = 0; ++ useurl = &relurl[2]; /* we keep the slashes from the original, so we ++ skip the new ones */ ++ host_changed = TRUE; ++ } ++ else { ++ /* cut off the original URL from the first slash, or deal with URLs ++ without slash */ ++ pathsep = strchr(protsep, '/'); ++ if(pathsep) { ++ /* When people use badly formatted URLs, such as ++ "http://www.url.com?dir=/home/daniel" we must not use the first ++ slash, if there's a ?-letter before it! */ ++ char *sep = strchr(protsep, '?'); ++ if(sep && (sep < pathsep)) ++ pathsep = sep; ++ *pathsep = 0; ++ } ++ else { ++ /* There was no slash. Now, since we might be operating on a badly ++ formatted URL, such as "http://www.url.com?id=2380" which doesn't ++ use a slash separator as it is supposed to, we need to check for a ++ ?-letter as well! */ ++ pathsep = strchr(protsep, '?'); ++ if(pathsep) ++ *pathsep = 0; ++ } ++ } ++ } ++ ++ /* If the new part contains a space, this is a mighty stupid redirect ++ but we still make an effort to do "right". To the left of a '?' ++ letter we replace each space with %20 while it is replaced with '+' ++ on the right side of the '?' letter. ++ */ ++ newlen = Curl_strlen_url(useurl, !host_changed); ++ ++ urllen = strlen(url_clone); ++ ++ newest = malloc(urllen + 1 + /* possible slash */ ++ newlen + 1 /* zero byte */); ++ ++ if(!newest) { ++ free(url_clone); /* don't leak this */ ++ return NULL; ++ } ++ ++ /* copy over the root url part */ ++ memcpy(newest, url_clone, urllen); ++ ++ /* check if we need to append a slash */ ++ if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) ++ ; ++ else ++ newest[urllen++]='/'; ++ ++ /* then append the new piece on the right side */ ++ Curl_strcpy_url(&newest[urllen], useurl, !host_changed); ++ ++ free(url_clone); ++ ++ return newest; ++} ++ ++/* ++ * parse_hostname_login() ++ * ++ * Parse the login details (user name, password and options) from the URL and ++ * strip them out of the host name ++ * ++ */ ++static CURLUcode parse_hostname_login(struct Curl_URL *u, ++ const struct Curl_handler *h, ++ char **hostname, ++ unsigned int flags) ++{ ++ CURLUcode result = CURLUE_OK; ++ CURLcode ccode; ++ char *userp = NULL; ++ char *passwdp = NULL; ++ char *optionsp = NULL; ++ ++ /* At this point, we're hoping all the other special cases have ++ * been taken care of, so conn->host.name is at most ++ * [user[:password][;options]]@]hostname ++ * ++ * We need somewhere to put the embedded details, so do that first. ++ */ ++ ++ char *ptr = strchr(*hostname, '@'); ++ char *login = *hostname; ++ ++ if(!ptr) ++ goto out; ++ ++ /* We will now try to extract the ++ * possible login information in a string like: ++ * ftp://user:password@ftp.my.site:8021/README */ ++ *hostname = ++ptr; ++ ++ /* We could use the login information in the URL so extract it. Only parse ++ options if the handler says we should. */ ++ ccode = Curl_parse_login_details(login, ptr - login - 1, ++ &userp, &passwdp, ++ h->flags & PROTOPT_URLOPTIONS ? ++ &optionsp:NULL); ++ if(ccode) { ++ result = CURLUE_MALFORMED_INPUT; ++ goto out; ++ } ++ ++ if(userp) { ++ if(flags & CURLU_DISALLOW_USER) { ++ /* Option DISALLOW_USER is set and url contains username. */ ++ result = CURLUE_USER_NOT_ALLOWED; ++ goto out; ++ } ++ ++ u->user = userp; ++ } ++ ++ if(passwdp) ++ u->password = passwdp; ++ ++ if(optionsp) ++ u->options = optionsp; ++ ++ return CURLUE_OK; ++ out: ++ ++ free(userp); ++ free(passwdp); ++ free(optionsp); ++ ++ return result; ++} ++ ++static CURLUcode parse_port(struct Curl_URL *u, char *hostname) ++{ ++ char *portptr; ++ char endbracket; ++ int len; ++ ++ if((1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n", ++ &endbracket, &len)) && ++ (']' == endbracket)) { ++ /* this is a RFC2732-style specified IP-address */ ++ portptr = &hostname[len]; ++ if (*portptr != ':') ++ return CURLUE_MALFORMED_INPUT; ++ } ++ else ++ portptr = strchr(hostname, ':'); ++ ++ if(portptr) { ++ char *rest; ++ long port; ++ char portbuf[7]; ++ ++ if(!ISDIGIT(portptr[1])) ++ return CURLUE_BAD_PORT_NUMBER; ++ ++ port = strtol(portptr + 1, &rest, 10); /* Port number must be decimal */ ++ ++ if((port <= 0) || (port > 0xffff)) ++ /* Single unix standard says port numbers are 16 bits long, but we don't ++ treat port zero as OK. */ ++ return CURLUE_BAD_PORT_NUMBER; ++ ++ if(rest[0]) ++ return CURLUE_BAD_PORT_NUMBER; ++ ++ if(rest != &portptr[1]) { ++ *portptr++ = '\0'; /* cut off the name there */ ++ *rest = 0; ++ /* generate a new to get rid of leading zeroes etc */ ++ snprintf(portbuf, sizeof(portbuf), "%ld", port); ++ u->portnum = port; ++ u->port = strdup(portbuf); ++ if(!u->port) ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ else { ++ /* Browser behavior adaptation. If there's a colon with no digits after, ++ just cut off the name there which makes us ignore the colon and just ++ use the default port. Firefox and Chrome both do that. */ ++ *portptr = '\0'; ++ } ++ } ++ ++ return CURLUE_OK; ++} ++ ++/* scan for byte values < 31 or 127 */ ++static CURLUcode junkscan(char *part) ++{ ++ char badbytes[]={ ++ /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, ++ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, ++ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, ++ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, ++ 0x7f, ++ 0x00 /* zero terminate */ ++ }; ++ if(part) { ++ size_t n = strlen(part); ++ size_t nfine = strcspn(part, badbytes); ++ if(nfine != n) ++ /* since we don't know which part is scanned, return a generic error ++ code */ ++ return CURLUE_MALFORMED_INPUT; ++ } ++ return CURLUE_OK; ++} ++ ++static CURLUcode hostname_check(char *hostname, unsigned int flags) ++{ ++ const char *l; /* accepted characters */ ++ size_t len; ++ size_t hlen = strlen(hostname); ++ (void)flags; ++ ++ if(hostname[0] == '[') { ++ hostname++; ++ l = "0123456789abcdefABCDEF::."; ++ hlen -= 2; ++ } ++ else /* % for URL escaped letters */ ++ l = "0123456789abcdefghijklimnopqrstuvwxyz-_.ABCDEFGHIJKLIMNOPQRSTUVWXYZ%"; ++ ++ len = strspn(hostname, l); ++ if(hlen != len) ++ /* hostname with bad content */ ++ return CURLUE_MALFORMED_INPUT; ++ ++ return CURLUE_OK; ++} ++ ++#define HOSTNAME_END(x) (((x) == '/') || ((x) == '?') || ((x) == '#')) ++ ++static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) ++{ ++ char *path; ++ bool path_alloced = FALSE; ++ char *hostname; ++ char *query = NULL; ++ char *fragment = NULL; ++ CURLUcode result; ++ bool url_has_scheme = FALSE; ++ char schemebuf[MAX_SCHEME_LEN]; ++ char *schemep; ++ size_t schemelen = 0; ++ size_t urllen; ++ const struct Curl_handler *h = NULL; ++ ++ if(!url) ++ return CURLUE_MALFORMED_INPUT; ++ ++ /************************************************************* ++ * Parse the URL. ++ ************************************************************/ ++ /* allocate scratch area */ ++ urllen = strlen(url); ++ path = u->scratch = malloc(urllen * 2 + 2); ++ if(!path) ++ return CURLUE_OUT_OF_MEMORY; ++ ++ hostname = &path[urllen + 1]; ++ hostname[0] = 0; ++ ++ /* MSDOS/Windows style drive prefix, eg c: in c:foo */ ++#define STARTS_WITH_DRIVE_PREFIX(str) \ ++ ((('a' <= str[0] && str[0] <= 'z') || \ ++ ('A' <= str[0] && str[0] <= 'Z')) && \ ++ (str[1] == ':')) ++ ++ /* MSDOS/Windows style drive prefix, optionally with ++ * a '|' instead of ':', followed by a slash or NUL */ ++#define STARTS_WITH_URL_DRIVE_PREFIX(str) \ ++ ((('a' <= (str)[0] && (str)[0] <= 'z') || \ ++ ('A' <= (str)[0] && (str)[0] <= 'Z')) && \ ++ ((str)[1] == ':' || (str)[1] == '|') && \ ++ ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0)) ++ ++ if(Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf))) { ++ url_has_scheme = TRUE; ++ schemelen = strlen(schemebuf); ++ } ++ ++ /* handle the file: scheme */ ++ if(url_has_scheme && strcasecompare(schemebuf, "file")) { ++ /* path has been allocated large anough to hold this */ ++ strcpy(path, &url[5]); ++ ++ hostname = NULL; /* no host for file: URLs */ ++ u->scheme = strdup("file"); ++ if(!u->scheme) ++ return CURLUE_OUT_OF_MEMORY; ++ ++ /* Extra handling URLs with an authority component (i.e. that start with ++ * "file://") ++ * ++ * We allow omitted hostname (e.g. file:/) -- valid according to ++ * RFC 8089, but not the (current) WHAT-WG URL spec. ++ */ ++ if(path[0] == '/' && path[1] == '/') { ++ /* swallow the two slashes */ ++ char *ptr = &path[2]; ++ ++ /* ++ * According to RFC 8089, a file: URL can be reliably dereferenced if: ++ * ++ * o it has no/blank hostname, or ++ * ++ * o the hostname matches "localhost" (case-insensitively), or ++ * ++ * o the hostname is a FQDN that resolves to this machine. ++ * ++ * For brevity, we only consider URLs with empty, "localhost", or ++ * "127.0.0.1" hostnames as local. ++ * ++ * Additionally, there is an exception for URLs with a Windows drive ++ * letter in the authority (which was accidentally omitted from RFC 8089 ++ * Appendix E, but believe me, it was meant to be there. --MK) ++ */ ++ if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) { ++ /* the URL includes a host name, it must match "localhost" or ++ "127.0.0.1" to be valid */ ++ if(!checkprefix("localhost/", ptr) && ++ !checkprefix("127.0.0.1/", ptr)) { ++ /* Invalid file://hostname/, expected localhost or 127.0.0.1 or ++ none */ ++ return CURLUE_MALFORMED_INPUT; ++ } ++ ptr += 9; /* now points to the slash after the host */ ++ } ++ ++ path = ptr; ++ } ++ ++#if !defined(MSDOS) && !defined(WIN32) && !defined(__CYGWIN__) ++ /* Don't allow Windows drive letters when not in Windows. ++ * This catches both "file:/c:" and "file:c:" */ ++ if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || ++ STARTS_WITH_URL_DRIVE_PREFIX(path)) { ++ /* File drive letters are only accepted in MSDOS/Windows */ ++ return CURLUE_MALFORMED_INPUT; ++ } ++#else ++ /* If the path starts with a slash and a drive letter, ditch the slash */ ++ if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) { ++ /* This cannot be done with strcpy, as the memory chunks overlap! */ ++ memmove(path, &path[1], strlen(&path[1]) + 1); ++ } ++#endif ++ ++ } ++ else { ++ /* clear path */ ++ const char *p; ++ const char *hostp; ++ size_t len; ++ path[0] = 0; ++ ++ if(url_has_scheme) { ++ int i = 0; ++ p = &url[schemelen + 1]; ++ while(p && (*p == '/') && (i < 4)) { ++ p++; ++ i++; ++ } ++ if((i < 1) || (i>3)) ++ /* less than one or more than three slashes */ ++ return CURLUE_MALFORMED_INPUT; ++ ++ schemep = schemebuf; ++ if(!Curl_builtin_scheme(schemep) && ++ !(flags & CURLU_NON_SUPPORT_SCHEME)) ++ return CURLUE_UNSUPPORTED_SCHEME; ++ ++ if(junkscan(schemep)) ++ return CURLUE_MALFORMED_INPUT; ++ } ++ else { ++ /* no scheme! */ ++ ++ if(!(flags & CURLU_DEFAULT_SCHEME)) ++ return CURLUE_MALFORMED_INPUT; ++ schemep = (char *) DEFAULT_SCHEME; ++ ++ /* ++ * The URL was badly formatted, let's try without scheme specified. ++ */ ++ p = url; ++ } ++ hostp = p; /* host name starts here */ ++ ++ while(*p && !HOSTNAME_END(*p)) /* find end of host name */ ++ p++; ++ ++ len = p - hostp; ++ if(!len) ++ return CURLUE_MALFORMED_INPUT; ++ ++ memcpy(hostname, hostp, len); ++ hostname[len] = 0; ++ ++ len = strlen(p); ++ memcpy(path, p, len); ++ path[len] = 0; ++ ++ u->scheme = strdup(schemep); ++ if(!u->scheme) ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ ++ /* if this is a known scheme, get some details */ ++ h = Curl_builtin_scheme(u->scheme); ++ ++ if(junkscan(path)) ++ return CURLUE_MALFORMED_INPUT; ++ ++ query = strchr(path, '?'); ++ if(query) ++ *query++ = 0; ++ ++ fragment = strchr(query?query:path, '#'); ++ if(fragment) ++ *fragment++ = 0; ++ ++ if(!path[0]) ++ /* if there's no path set, unset */ ++ path = NULL; ++ else if(!(flags & CURLU_PATH_AS_IS)) { ++ /* sanitise paths and remove ../ and ./ sequences according to RFC3986 */ ++ char *newp = Curl_dedotdotify(path); ++ if(!newp) ++ return CURLUE_OUT_OF_MEMORY; ++ ++ if(strcmp(newp, path)) { ++ /* if we got a new version */ ++ path = newp; ++ path_alloced = TRUE; ++ } ++ else ++ free(newp); ++ } ++ if(path) { ++ u->path = path_alloced?path:strdup(path); ++ if(!u->path) ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ ++ if(hostname) { ++ /* ++ * Parse the login details and strip them out of the host name. ++ */ ++ if(junkscan(hostname)) ++ return CURLUE_MALFORMED_INPUT; ++ ++ result = parse_hostname_login(u, h, &hostname, flags); ++ if(result) ++ return result; ++ ++ result = parse_port(u, hostname); ++ if(result) ++ return result; ++ ++ result = hostname_check(hostname, flags); ++ if(result) ++ return result; ++ ++ u->host = strdup(hostname); ++ if(!u->host) ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ ++ if(query && query[0]) { ++ u->query = strdup(query); ++ if(!u->query) ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ if(fragment && fragment[0]) { ++ u->fragment = strdup(fragment); ++ if(!u->fragment) ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ ++ free(u->scratch); ++ u->scratch = NULL; ++ ++ return CURLUE_OK; ++} ++ ++/* ++ * Parse the URL and set the relevant members of the Curl_URL struct. ++ */ ++static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) ++{ ++ CURLUcode result = seturl(url, u, flags); ++ if(result) { ++ free_urlhandle(u); ++ memset(u, 0, sizeof(struct Curl_URL)); ++ } ++ return result; ++} ++ ++/* ++ */ ++CURLU *curl_url(void) ++{ ++ return calloc(sizeof(struct Curl_URL), 1); ++} ++ ++void curl_url_cleanup(CURLU *u) ++{ ++ if(u) { ++ free_urlhandle(u); ++ free(u); ++ } ++} ++ ++#define DUP(dest, src, name) \ ++ if(src->name) { \ ++ dest->name = strdup(src->name); \ ++ if(!dest->name) \ ++ goto fail; \ ++ } ++ ++CURLU *curl_url_dup(CURLU *in) ++{ ++ struct Curl_URL *u = calloc(sizeof(struct Curl_URL), 1); ++ if(u) { ++ DUP(u, in, scheme); ++ DUP(u, in, user); ++ DUP(u, in, password); ++ DUP(u, in, options); ++ DUP(u, in, host); ++ DUP(u, in, port); ++ DUP(u, in, path); ++ DUP(u, in, query); ++ DUP(u, in, fragment); ++ u->portnum = in->portnum; ++ } ++ return u; ++ fail: ++ curl_url_cleanup(u); ++ return NULL; ++} ++ ++CURLUcode curl_url_get(CURLU *u, CURLUPart what, ++ char **part, unsigned int flags) ++{ ++ char *ptr; ++ CURLUcode ifmissing = CURLUE_UNKNOWN_PART; ++ char portbuf[7]; ++ bool urldecode = (flags & CURLU_URLDECODE)?1:0; ++ bool plusdecode = FALSE; ++ (void)flags; ++ if(!u) ++ return CURLUE_BAD_HANDLE; ++ if(!part) ++ return CURLUE_BAD_PARTPOINTER; ++ *part = NULL; ++ ++ switch(what) { ++ case CURLUPART_SCHEME: ++ ptr = u->scheme; ++ ifmissing = CURLUE_NO_SCHEME; ++ urldecode = FALSE; /* never for schemes */ ++ break; ++ case CURLUPART_USER: ++ ptr = u->user; ++ ifmissing = CURLUE_NO_USER; ++ break; ++ case CURLUPART_PASSWORD: ++ ptr = u->password; ++ ifmissing = CURLUE_NO_PASSWORD; ++ break; ++ case CURLUPART_OPTIONS: ++ ptr = u->options; ++ ifmissing = CURLUE_NO_OPTIONS; ++ break; ++ case CURLUPART_HOST: ++ ptr = u->host; ++ ifmissing = CURLUE_NO_HOST; ++ break; ++ case CURLUPART_PORT: ++ ptr = u->port; ++ ifmissing = CURLUE_NO_PORT; ++ urldecode = FALSE; /* never for port */ ++ if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) { ++ /* there's no stored port number, but asked to deliver ++ a default one for the scheme */ ++ const struct Curl_handler *h = ++ Curl_builtin_scheme(u->scheme); ++ if(h) { ++ snprintf(portbuf, sizeof(portbuf), "%ld", h->defport); ++ ptr = portbuf; ++ } ++ } ++ else if(ptr && u->scheme) { ++ /* there is a stored port number, but ask to inhibit if ++ it matches the default one for the scheme */ ++ const struct Curl_handler *h = ++ Curl_builtin_scheme(u->scheme); ++ if(h && (h->defport == u->portnum) && ++ (flags & CURLU_NO_DEFAULT_PORT)) ++ ptr = NULL; ++ } ++ break; ++ case CURLUPART_PATH: ++ ptr = u->path; ++ if(!ptr) { ++ ptr = u->path = strdup("/"); ++ if(!u->path) ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ break; ++ case CURLUPART_QUERY: ++ ptr = u->query; ++ ifmissing = CURLUE_NO_QUERY; ++ plusdecode = urldecode; ++ break; ++ case CURLUPART_FRAGMENT: ++ ptr = u->fragment; ++ ifmissing = CURLUE_NO_FRAGMENT; ++ break; ++ case CURLUPART_URL: { ++ char *url; ++ char *scheme; ++ char *options = u->options; ++ char *port = u->port; ++ urldecode = FALSE; /* not for the whole thing */ ++ if(u->scheme && strcasecompare("file", u->scheme)) { ++ url = aprintf("file://%s%s%s", ++ u->path, ++ u->fragment? "#": "", ++ u->fragment? u->fragment : ""); ++ } ++ else if(!u->host) ++ return CURLUE_NO_HOST; ++ else { ++ const struct Curl_handler *h = NULL; ++ if(u->scheme) ++ scheme = u->scheme; ++ else if(flags & CURLU_DEFAULT_SCHEME) ++ scheme = (char *) DEFAULT_SCHEME; ++ else ++ return CURLUE_NO_SCHEME; ++ ++ if(scheme) { ++ h = Curl_builtin_scheme(scheme); ++ if(!port && (flags & CURLU_DEFAULT_PORT)) { ++ /* there's no stored port number, but asked to deliver ++ a default one for the scheme */ ++ if(h) { ++ snprintf(portbuf, sizeof(portbuf), "%ld", h->defport); ++ port = portbuf; ++ } ++ } ++ else if(port) { ++ /* there is a stored port number, but asked to inhibit if it matches ++ the default one for the scheme */ ++ if(h && (h->defport == u->portnum) && ++ (flags & CURLU_NO_DEFAULT_PORT)) ++ port = NULL; ++ } ++ } ++ if(h && !(h->flags & PROTOPT_URLOPTIONS)) ++ options = NULL; ++ ++ url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s", ++ scheme, ++ u->user ? u->user : "", ++ u->password ? ":": "", ++ u->password ? u->password : "", ++ options ? ";" : "", ++ options ? options : "", ++ (u->user || u->password || options) ? "@": "", ++ u->host, ++ port ? ":": "", ++ port ? port : "", ++ (u->path && (u->path[0] != '/')) ? "/": "", ++ u->path ? u->path : "/", ++ u->query? "?": "", ++ u->query? u->query : "", ++ u->fragment? "#": "", ++ u->fragment? u->fragment : ""); ++ } ++ if(!url) ++ return CURLUE_OUT_OF_MEMORY; ++ *part = url; ++ return CURLUE_OK; ++ break; ++ } ++ default: ++ ptr = NULL; ++ } ++ if(ptr) { ++ *part = strdup(ptr); ++ if(!*part) ++ return CURLUE_OUT_OF_MEMORY; ++ if(plusdecode) { ++ /* convert + to space */ ++ char *plus; ++ for(plus = *part; *plus; ++plus) { ++ if(*plus == '+') ++ *plus = ' '; ++ } ++ } ++ if(urldecode) { ++ char *decoded; ++ size_t dlen; ++ CURLcode res = Curl_urldecode(NULL, *part, 0, &decoded, &dlen, TRUE); ++ free(*part); ++ if(res) { ++ *part = NULL; ++ return CURLUE_URLDECODE; ++ } ++ *part = decoded; ++ } ++ return CURLUE_OK; ++ } ++ else ++ return ifmissing; ++} ++ ++CURLUcode curl_url_set(CURLU *u, CURLUPart what, ++ const char *part, unsigned int flags) ++{ ++ char **storep = NULL; ++ long port = 0; ++ bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0; ++ bool plusencode = FALSE; ++ bool urlskipslash = FALSE; ++ bool appendquery = FALSE; ++ ++ if(!u) ++ return CURLUE_BAD_HANDLE; ++ if(!part) { ++ /* setting a part to NULL clears it */ ++ switch(what) { ++ case CURLUPART_URL: ++ break; ++ case CURLUPART_SCHEME: ++ storep = &u->scheme; ++ break; ++ case CURLUPART_USER: ++ storep = &u->user; ++ break; ++ case CURLUPART_PASSWORD: ++ storep = &u->password; ++ break; ++ case CURLUPART_OPTIONS: ++ storep = &u->options; ++ break; ++ case CURLUPART_HOST: ++ storep = &u->host; ++ break; ++ case CURLUPART_PORT: ++ storep = &u->port; ++ break; ++ case CURLUPART_PATH: ++ storep = &u->path; ++ break; ++ case CURLUPART_QUERY: ++ storep = &u->query; ++ break; ++ case CURLUPART_FRAGMENT: ++ storep = &u->fragment; ++ break; ++ default: ++ return CURLUE_UNKNOWN_PART; ++ } ++ if(storep && *storep) { ++ free(*storep); ++ *storep = NULL; ++ } ++ return CURLUE_OK; ++ } ++ ++ switch(what) { ++ case CURLUPART_SCHEME: ++ if(!(flags & CURLU_NON_SUPPORT_SCHEME) && ++ /* verify that it is a fine scheme */ ++ !Curl_builtin_scheme(part)) ++ return CURLUE_UNSUPPORTED_SCHEME; ++ storep = &u->scheme; ++ urlencode = FALSE; /* never */ ++ break; ++ case CURLUPART_USER: ++ storep = &u->user; ++ break; ++ case CURLUPART_PASSWORD: ++ storep = &u->password; ++ break; ++ case CURLUPART_OPTIONS: ++ storep = &u->options; ++ break; ++ case CURLUPART_HOST: ++ storep = &u->host; ++ break; ++ case CURLUPART_PORT: ++ urlencode = FALSE; /* never */ ++ port = strtol(part, NULL, 10); /* Port number must be decimal */ ++ if((port <= 0) || (port > 0xffff)) ++ return CURLUE_BAD_PORT_NUMBER; ++ storep = &u->port; ++ break; ++ case CURLUPART_PATH: ++ urlskipslash = TRUE; ++ storep = &u->path; ++ break; ++ case CURLUPART_QUERY: ++ plusencode = urlencode; ++ appendquery = (flags & CURLU_APPENDQUERY)?1:0; ++ storep = &u->query; ++ break; ++ case CURLUPART_FRAGMENT: ++ storep = &u->fragment; ++ break; ++ case CURLUPART_URL: { ++ /* ++ * Allow a new URL to replace the existing (if any) contents. ++ * ++ * If the existing contents is enough for a URL, allow a relative URL to ++ * replace it. ++ */ ++ CURLUcode result; ++ char *oldurl; ++ char *redired_url; ++ CURLU *handle2; ++ ++ if(Curl_is_absolute_url(part, NULL, MAX_SCHEME_LEN)) { ++ handle2 = curl_url(); ++ if(!handle2) ++ return CURLUE_OUT_OF_MEMORY; ++ result = parseurl(part, handle2, flags); ++ if(!result) ++ mv_urlhandle(handle2, u); ++ else ++ curl_url_cleanup(handle2); ++ return result; ++ } ++ /* extract the full "old" URL to do the redirect on */ ++ result = curl_url_get(u, CURLUPART_URL, &oldurl, flags); ++ if(result) { ++ /* couldn't get the old URL, just use the new! */ ++ handle2 = curl_url(); ++ if(!handle2) ++ return CURLUE_OUT_OF_MEMORY; ++ result = parseurl(part, handle2, flags); ++ if(!result) ++ mv_urlhandle(handle2, u); ++ else ++ curl_url_cleanup(handle2); ++ return result; ++ } ++ ++ /* apply the relative part to create a new URL */ ++ redired_url = Curl_concat_url(oldurl, part); ++ free(oldurl); ++ if(!redired_url) ++ return CURLUE_OUT_OF_MEMORY; ++ ++ /* now parse the new URL */ ++ handle2 = curl_url(); ++ if(!handle2) { ++ free(redired_url); ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ result = parseurl(redired_url, handle2, flags); ++ free(redired_url); ++ if(!result) ++ mv_urlhandle(handle2, u); ++ else ++ curl_url_cleanup(handle2); ++ return result; ++ } ++ default: ++ return CURLUE_UNKNOWN_PART; ++ } ++ if(storep) { ++ const char *newp = part; ++ size_t nalloc = strlen(part); ++ ++ if(urlencode) { ++ const char *i; ++ char *o; ++ bool free_part = FALSE; ++ char *enc = malloc(nalloc * 3 + 1); /* for worst case! */ ++ if(!enc) ++ return CURLUE_OUT_OF_MEMORY; ++ if(plusencode) { ++ /* space to plus */ ++ i = part; ++ for(o = enc; *i; ++o, ++i) ++ *o = (*i == ' ') ? '+' : *i; ++ *o = 0; /* zero terminate */ ++ part = strdup(enc); ++ if(!part) { ++ free(enc); ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ free_part = TRUE; ++ } ++ for(i = part, o = enc; *i; i++) { ++ if(Curl_isunreserved(*i) || ++ ((*i == '/') && urlskipslash) || ++ ((*i == '=') && appendquery) || ++ ((*i == '+') && plusencode)) { ++ *o = *i; ++ o++; ++ } ++ else { ++ snprintf(o, 4, "%%%02x", *i); ++ o += 3; ++ } ++ } ++ *o = 0; /* zero terminate */ ++ newp = enc; ++ if(free_part) ++ free((char *)part); ++ } ++ else { ++ char *p; ++ newp = strdup(part); ++ if(!newp) ++ return CURLUE_OUT_OF_MEMORY; ++ p = (char *)newp; ++ while(*p) { ++ /* make sure percent encoded are lower case */ ++ if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) && ++ (ISUPPER(p[1]) || ISUPPER(p[2]))) { ++ p[1] = (char)TOLOWER(p[1]); ++ p[2] = (char)TOLOWER(p[2]); ++ p += 3; ++ } ++ else ++ p++; ++ } ++ } ++ ++ if(appendquery) { ++ /* Append the string onto the old query. Add a '&' separator if none is ++ present at the end of the exsting query already */ ++ size_t querylen = u->query ? strlen(u->query) : 0; ++ bool addamperand = querylen && (u->query[querylen -1] != '&'); ++ if(querylen) { ++ size_t newplen = strlen(newp); ++ char *p = malloc(querylen + addamperand + newplen + 1); ++ if(!p) { ++ free((char *)newp); ++ return CURLUE_OUT_OF_MEMORY; ++ } ++ strcpy(p, u->query); /* original query */ ++ if(addamperand) ++ p[querylen] = '&'; /* ampersand */ ++ strcpy(&p[querylen + addamperand], newp); /* new suffix */ ++ free((char *)newp); ++ free(*storep); ++ *storep = p; ++ return CURLUE_OK; ++ } ++ } ++ ++ free(*storep); ++ *storep = (char *)newp; ++ } ++ /* set after the string, to make it not assigned if the allocation above ++ fails */ ++ if(port) ++ u->portnum = port; ++ return CURLUE_OK; ++} +diff --git a/tests/data/Makefile.inc b/tests/data/Makefile.inc +index aa5fff0..0f6ac44 100644 +--- a/tests/data/Makefile.inc ++++ b/tests/data/Makefile.inc +@@ -178,6 +178,8 @@ test1533 test1534 test1535 test1536 test1537 test1538 \ + test1540 \ + test1550 test1551 test1552 test1553 test1554 test1555 test1556 test1557 \ + \ ++test1560 \ ++\ + test1590 \ + test1600 test1601 test1602 test1603 test1604 test1605 test1606 test1607 \ + test1608 test1609 \ +diff --git a/tests/data/test1560 b/tests/data/test1560 +new file mode 100644 +index 0000000..720df03 +--- /dev/null ++++ b/tests/data/test1560 +@@ -0,0 +1,28 @@ ++ ++ ++ ++unittest ++URL API ++ ++ ++ ++# ++# Client-side ++ ++ ++none ++ ++ ++file ++https ++http ++ ++ ++URL API ++ ++ ++lib1560 ++ ++ ++ ++ +diff --git a/tests/libtest/Makefile.am b/tests/libtest/Makefile.am +index d14f37d..dc97e32 100644 +--- a/tests/libtest/Makefile.am ++++ b/tests/libtest/Makefile.am +@@ -133,3 +133,8 @@ lib1521.c: $(top_srcdir)/tests/libtest/mk-lib1521.pl $(top_srcdir)/include/curl/ + + checksrc: + @PERL@ $(top_srcdir)/lib/checksrc.pl $(srcdir)/*.c ++ ++if CURLDEBUG ++# for debug builds, we scan the sources on all regular make invokes ++all-local: checksrc ++endif +diff --git a/tests/libtest/Makefile.inc b/tests/libtest/Makefile.inc +index 238ef97..7a3cd16 100644 +--- a/tests/libtest/Makefile.inc ++++ b/tests/libtest/Makefile.inc +@@ -30,6 +30,7 @@ noinst_PROGRAMS = chkhostname libauthretry libntlmconnect \ + lib1534 lib1535 lib1536 lib1537 lib1538 \ + lib1540 \ + lib1550 lib1551 lib1552 lib1553 lib1554 lib1555 lib1556 lib1557 \ ++ lib1560 \ + lib1900 \ + lib2033 + +@@ -507,6 +508,9 @@ lib1557_SOURCES = lib1557.c $(SUPPORTFILES) $(TESTUTIL) $(WARNLESS) + lib1557_LDADD = $(TESTUTIL_LIBS) + lib1557_CPPFLAGS = $(AM_CPPFLAGS) -DLIB1557 + ++lib1560_SOURCES = lib1560.c $(SUPPORTFILES) $(TESTUTIL) $(WARNLESS) ++lib1560_LDADD = $(TESTUTIL_LIBS) ++ + lib1900_SOURCES = lib1900.c $(SUPPORTFILES) $(TESTUTIL) $(WARNLESS) + lib1900_LDADD = $(TESTUTIL_LIBS) + lib1900_CPPFLAGS = $(AM_CPPFLAGS) +diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c +new file mode 100644 +index 0000000..669ea9a +--- /dev/null ++++ b/tests/libtest/lib1560.c +@@ -0,0 +1,760 @@ ++/*************************************************************************** ++ * _ _ ____ _ ++ * Project ___| | | | _ \| | ++ * / __| | | | |_) | | ++ * | (__| |_| | _ <| |___ ++ * \___|\___/|_| \_\_____| ++ * ++ * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++ * ++ * This software is licensed as described in the file COPYING, which ++ * you should have received as part of this distribution. The terms ++ * are also available at https://curl.haxx.se/docs/copyright.html. ++ * ++ * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++ * copies of the Software, and permit persons to whom the Software is ++ * furnished to do so, under the terms of the COPYING file. ++ * ++ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++ * KIND, either express or implied. ++ * ++ ***************************************************************************/ ++ ++/* ++ * Note: ++ * ++ * Since the URL parser by default only accepts schemes that *this instance* ++ * of libcurl supports, make sure that the test1560 file lists all the schemes ++ * that this test will assume to be present! ++ */ ++ ++#include "test.h" ++ ++#include "testutil.h" ++#include "warnless.h" ++#include "memdebug.h" /* LAST include file */ ++ ++struct part { ++ CURLUPart part; ++ const char *name; ++}; ++ ++ ++static int checkparts(CURLU *u, const char *in, const char *wanted, ++ unsigned int getflags) ++{ ++ int i; ++ CURLUcode rc; ++ char buf[256]; ++ char *bufp = &buf[0]; ++ size_t len = sizeof(buf); ++ struct part parts[] = { ++ {CURLUPART_SCHEME, "scheme"}, ++ {CURLUPART_USER, "user"}, ++ {CURLUPART_PASSWORD, "password"}, ++ {CURLUPART_OPTIONS, "options"}, ++ {CURLUPART_HOST, "host"}, ++ {CURLUPART_PORT, "port"}, ++ {CURLUPART_PATH, "path"}, ++ {CURLUPART_QUERY, "query"}, ++ {CURLUPART_FRAGMENT, "fragment"}, ++ {0, NULL} ++ }; ++ buf[0] = 0; ++ ++ for(i = 0; parts[i].name; i++) { ++ char *p = NULL; ++ size_t n; ++ rc = curl_url_get(u, parts[i].part, &p, getflags); ++ if(!rc && p) { ++ snprintf(bufp, len, "%s%s", buf[0]?" | ":"", p); ++ } ++ else ++ snprintf(bufp, len, "%s[%d]", buf[0]?" | ":"", (int)rc); ++ ++ n = strlen(bufp); ++ bufp += n; ++ len -= n; ++ curl_free(p); ++ } ++ if(strcmp(buf, wanted)) { ++ fprintf(stderr, "in: %s\nwanted: %s\ngot: %s\n", in, wanted, buf); ++ return 1; ++ } ++ return 0; ++} ++ ++struct redircase { ++ const char *in; ++ const char *set; ++ const char *out; ++ unsigned int urlflags; ++ unsigned int setflags; ++ CURLUcode ucode; ++}; ++ ++struct setcase { ++ const char *in; ++ const char *set; ++ const char *out; ++ unsigned int urlflags; ++ unsigned int setflags; ++ CURLUcode ucode; ++}; ++ ++struct testcase { ++ const char *in; ++ const char *out; ++ unsigned int urlflags; ++ unsigned int getflags; ++ CURLUcode ucode; ++}; ++ ++struct urltestcase { ++ const char *in; ++ const char *out; ++ unsigned int urlflags; /* pass to curl_url() */ ++ unsigned int getflags; /* pass to curl_url_get() */ ++ CURLUcode ucode; ++}; ++ ++struct querycase { ++ const char *in; ++ const char *q; ++ const char *out; ++ unsigned int urlflags; /* pass to curl_url() */ ++ unsigned int qflags; /* pass to curl_url_get() */ ++ CURLUcode ucode; ++}; ++ ++static struct testcase get_parts_list[] ={ ++ {"https://127.0.0.1:443", ++ "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [17] | [18]", ++ 0, CURLU_NO_DEFAULT_PORT, CURLUE_OK}, ++ {"http://%3a:%3a@ex%0ample/%3f+?+%3f+%23#+%23%3f%g7", ++ "http | : | : | [13] | [6] | [15] | /?+ | ? # | +#?%g7", ++ 0, CURLU_URLDECODE, CURLUE_OK}, ++ {"http://%3a:%3a@ex%0ample/%3f?%3f%35#%35%3f%g7", ++ "http | %3a | %3a | [13] | ex%0ample | [15] | /%3f | %3f%35 | %35%3f%g7", ++ 0, 0, CURLUE_OK}, ++ {"http://HO0_-st%41/", ++ "http | [11] | [12] | [13] | HO0_-st%41 | [15] | / | [17] | [18]", ++ 0, 0, CURLUE_OK}, ++ {"file://hello.html", ++ "", ++ 0, 0, CURLUE_MALFORMED_INPUT}, ++ {"http://HO0_-st/", ++ "http | [11] | [12] | [13] | HO0_-st | [15] | / | [17] | [18]", ++ 0, 0, CURLUE_OK}, ++ {"imap://user:pass;option@server/path", ++ "imap | user | pass | option | server | [15] | /path | [17] | [18]", ++ 0, 0, CURLUE_OK}, ++ {"http://user:pass;option@server/path", ++ "http | user | pass;option | [13] | server | [15] | /path | [17] | [18]", ++ 0, 0, CURLUE_OK}, ++ {"file:/hello.html", ++ "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [17] | [18]", ++ 0, 0, CURLUE_OK}, ++ {"file://127.0.0.1/hello.html", ++ "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [17] | [18]", ++ 0, 0, CURLUE_OK}, ++ {"file:////hello.html", ++ "file | [11] | [12] | [13] | [14] | [15] | //hello.html | [17] | [18]", ++ 0, 0, CURLUE_OK}, ++ {"file:///hello.html", ++ "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [17] | [18]", ++ 0, 0, CURLUE_OK}, ++ {"https://127.0.0.1", ++ "https | [11] | [12] | [13] | 127.0.0.1 | 443 | / | [17] | [18]", ++ 0, CURLU_DEFAULT_PORT, CURLUE_OK}, ++ {"https://127.0.0.1", ++ "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [17] | [18]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"https://[::1]:1234", ++ "https | [11] | [12] | [13] | [::1] | 1234 | / | [17] | [18]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"https://127abc.com", ++ "https | [11] | [12] | [13] | 127abc.com | [15] | / | [17] | [18]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"https:// example.com?check", ++ "", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, ++ {"https://e x a m p l e.com?check", ++ "", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, ++ {"https://example.com?check", ++ "https | [11] | [12] | [13] | example.com | [15] | / | check | [18]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"https://example.com:65536", ++ "", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_BAD_PORT_NUMBER}, ++ {"https://example.com:0#moo", ++ "", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_BAD_PORT_NUMBER}, ++ {"https://example.com:01#moo", ++ "https | [11] | [12] | [13] | example.com | 1 | / | " ++ "[17] | moo", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"https://example.com:1#moo", ++ "https | [11] | [12] | [13] | example.com | 1 | / | " ++ "[17] | moo", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://example.com#moo", ++ "http | [11] | [12] | [13] | example.com | [15] | / | " ++ "[17] | moo", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://example.com", ++ "http | [11] | [12] | [13] | example.com | [15] | / | " ++ "[17] | [18]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://example.com/path/html", ++ "http | [11] | [12] | [13] | example.com | [15] | /path/html | " ++ "[17] | [18]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://example.com/path/html?query=name", ++ "http | [11] | [12] | [13] | example.com | [15] | /path/html | " ++ "query=name | [18]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://example.com/path/html?query=name#anchor", ++ "http | [11] | [12] | [13] | example.com | [15] | /path/html | " ++ "query=name | anchor", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://example.com:1234/path/html?query=name#anchor", ++ "http | [11] | [12] | [13] | example.com | 1234 | /path/html | " ++ "query=name | anchor", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http:///user:password@example.com:1234/path/html?query=name#anchor", ++ "http | user | password | [13] | example.com | 1234 | /path/html | " ++ "query=name | anchor", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"https://user:password@example.com:1234/path/html?query=name#anchor", ++ "https | user | password | [13] | example.com | 1234 | /path/html | " ++ "query=name | anchor", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://user:password@example.com:1234/path/html?query=name#anchor", ++ "http | user | password | [13] | example.com | 1234 | /path/html | " ++ "query=name | anchor", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http:/user:password@example.com:1234/path/html?query=name#anchor", ++ "http | user | password | [13] | example.com | 1234 | /path/html | " ++ "query=name | anchor", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http:////user:password@example.com:1234/path/html?query=name#anchor", ++ "", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, ++ {NULL, NULL, 0, 0, CURLUE_OK}, ++}; ++ ++static struct urltestcase get_url_list[] = { ++ {"HTTP://test/", "http://test/", 0, 0, CURLUE_OK}, ++ {"http://HO0_-st..~./", "", 0, 0, CURLUE_MALFORMED_INPUT}, ++ {"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, ++ {"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, ++ {"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, ++ {"http://host/file\r", "", 0, 0, CURLUE_MALFORMED_INPUT}, ++ {"http://host/file\n\x03", "", 0, 0, CURLUE_MALFORMED_INPUT}, ++ {"htt\x02://host/file", "", ++ CURLU_NON_SUPPORT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, ++ {" http://host/file", "", 0, 0, CURLUE_MALFORMED_INPUT}, ++ /* here the password ends at the semicolon and options is 'word' */ ++ {"imap://user:pass;word@host/file", ++ "imap://user:pass;word@host/file", ++ 0, 0, CURLUE_OK}, ++ /* here the password has the semicolon */ ++ {"http://user:pass;word@host/file", ++ "http://user:pass;word@host/file", ++ 0, 0, CURLUE_OK}, ++ {"file:///file.txt#moo", ++ "file:///file.txt#moo", ++ 0, 0, CURLUE_OK}, ++ {"file:////file.txt", ++ "file:////file.txt", ++ 0, 0, CURLUE_OK}, ++ {"file:///file.txt", ++ "file:///file.txt", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com/hello/../here", ++ "http://example.com/hello/../here", ++ CURLU_PATH_AS_IS, 0, CURLUE_OK}, ++ {"http://example.com/hello/../here", ++ "http://example.com/here", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com:80", ++ "http://example.com/", ++ 0, CURLU_NO_DEFAULT_PORT, CURLUE_OK}, ++ {"tp://example.com/path/html", ++ "", ++ 0, 0, CURLUE_UNSUPPORTED_SCHEME}, ++ {"http://hello:fool@example.com", ++ "", ++ CURLU_DISALLOW_USER, 0, CURLUE_USER_NOT_ALLOWED}, ++ {"http:/@example.com:123", ++ "http://example.com:123/", ++ 0, 0, CURLUE_OK}, ++ {"http:/:password@example.com", ++ "http://:password@example.com/", ++ 0, 0, CURLUE_OK}, ++ {"http://user@example.com?#", ++ "http://user@example.com/", ++ 0, 0, CURLUE_OK}, ++ {"http://user@example.com?", ++ "http://user@example.com/", ++ 0, 0, CURLUE_OK}, ++ {"http://user@example.com#anchor", ++ "http://user@example.com/#anchor", ++ 0, 0, CURLUE_OK}, ++ {"example.com/path/html", ++ "https://example.com/path/html", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"example.com/path/html", ++ "", ++ 0, 0, CURLUE_MALFORMED_INPUT}, ++ {"http://user:password@example.com:1234/path/html?query=name#anchor", ++ "http://user:password@example.com:1234/path/html?query=name#anchor", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com:1234/path/html?query=name#anchor", ++ "http://example.com:1234/path/html?query=name#anchor", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com/path/html?query=name#anchor", ++ "http://example.com/path/html?query=name#anchor", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com/path/html?query=name", ++ "http://example.com/path/html?query=name", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com/path/html", ++ "http://example.com/path/html", ++ 0, 0, CURLUE_OK}, ++ {"tp://example.com/path/html", ++ "tp://example.com/path/html", ++ CURLU_NON_SUPPORT_SCHEME, 0, CURLUE_OK}, ++ {NULL, NULL, 0, 0, 0} ++}; ++ ++static int checkurl(const char *url, const char *out) ++{ ++ if(strcmp(out, url)) { ++ fprintf(stderr, "Wanted: %s\nGot : %s\n", ++ out, url); ++ return 1; ++ } ++ return 0; ++} ++ ++/* !checksrc! disable SPACEBEFORECOMMA 1 */ ++static struct setcase set_parts_list[] = { ++ {"https://host/", ++ "path=%4A%4B%4C,", ++ "https://host/%4a%4b%4c", ++ 0, 0, CURLUE_NO_HOST}, ++ {"https://host/mooo?q#f", ++ "path=NULL,query=NULL,fragment=NULL,", ++ "https://host/", ++ 0, 0, CURLUE_NO_HOST}, ++ {"https://user:secret@host/", ++ "user=NULL,password=NULL,", ++ "https://host/", ++ 0, 0, CURLUE_NO_HOST}, ++ {NULL, ++ "scheme=https,user= @:,host=foobar,", ++ "https://%20%20%20%40%3a@foobar/", ++ 0, CURLU_URLENCODE, CURLUE_OK}, ++ {NULL, ++ "scheme=https,host= ,path= ,user= ,password= ,query= ,fragment= ,", ++ "https://%20:%20@%20%20/%20?+#%20", ++ 0, CURLU_URLENCODE, CURLUE_OK}, ++ {NULL, ++ "scheme=https,host=foobar,path=/this /path /is /here,", ++ "https://foobar/this%20/path%20/is%20/here", ++ 0, CURLU_URLENCODE, CURLUE_OK}, ++ {"imap://user:secret;opt@host/", ++ "options=updated,scheme=imaps,password=p4ssw0rd,", ++ "imaps://user:p4ssw0rd;updated@host/", ++ 0, 0, CURLUE_NO_HOST}, ++ {"imap://user:secret;optit@host/", ++ "scheme=https,", ++ "https://user:secret@host/", ++ 0, 0, CURLUE_NO_HOST}, ++ {"file:///file#anchor", ++ "scheme=https,host=example,", ++ "https://example/file#anchor", ++ 0, 0, CURLUE_NO_HOST}, ++ {NULL, /* start fresh! */ ++ "scheme=file,host=127.0.0.1,path=/no,user=anonymous,", ++ "file:///no", ++ 0, 0, CURLUE_OK}, ++ {NULL, /* start fresh! */ ++ "scheme=ftp,host=127.0.0.1,path=/no,user=anonymous,", ++ "ftp://anonymous@127.0.0.1/no", ++ 0, 0, CURLUE_OK}, ++ {NULL, /* start fresh! */ ++ "scheme=https,host=example.com,", ++ "https://example.com/", ++ 0, CURLU_NON_SUPPORT_SCHEME, CURLUE_OK}, ++ {"http://user:foo@example.com/path?query#frag", ++ "fragment=changed,", ++ "http://user:foo@example.com/path?query#changed", ++ 0, CURLU_NON_SUPPORT_SCHEME, CURLUE_OK}, ++ {"http://example.com/", ++ "scheme=foo,", /* not accepted */ ++ "http://example.com/", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com/", ++ "scheme=https,path=/hello,fragment=snippet,", ++ "https://example.com/hello#snippet", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com:80", ++ "user=foo,port=1922,", ++ "http://foo@example.com:1922/", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com:80", ++ "user=foo,password=bar,", ++ "http://foo:bar@example.com:80/", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com:80", ++ "user=foo,", ++ "http://foo@example.com:80/", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com", ++ "host=www.example.com,", ++ "http://www.example.com/", ++ 0, 0, CURLUE_OK}, ++ {"http://example.com:80", ++ "scheme=ftp,", ++ "ftp://example.com:80/", ++ 0, 0, CURLUE_OK}, ++ {NULL, NULL, NULL, 0, 0, 0} ++}; ++ ++static CURLUPart part2id(char *part) ++{ ++ if(!strcmp("url", part)) ++ return CURLUPART_URL; ++ if(!strcmp("scheme", part)) ++ return CURLUPART_SCHEME; ++ if(!strcmp("user", part)) ++ return CURLUPART_USER; ++ if(!strcmp("password", part)) ++ return CURLUPART_PASSWORD; ++ if(!strcmp("options", part)) ++ return CURLUPART_OPTIONS; ++ if(!strcmp("host", part)) ++ return CURLUPART_HOST; ++ if(!strcmp("port", part)) ++ return CURLUPART_PORT; ++ if(!strcmp("path", part)) ++ return CURLUPART_PATH; ++ if(!strcmp("query", part)) ++ return CURLUPART_QUERY; ++ if(!strcmp("fragment", part)) ++ return CURLUPART_FRAGMENT; ++ return 9999; /* bad input => bad output */ ++} ++ ++static void updateurl(CURLU *u, const char *cmd, unsigned int setflags) ++{ ++ const char *p = cmd; ++ ++ /* make sure the last command ends with a comma too! */ ++ while(p) { ++ char *e = strchr(p, ','); ++ if(e) { ++ size_t n = e-p; ++ char buf[80]; ++ char part[80]; ++ char value[80]; ++ memcpy(buf, p, n); ++ buf[n] = 0; ++ if(2 == sscanf(buf, "%79[^=]=%79[^,]", part, value)) { ++ CURLUPart what = part2id(part); ++#if 0 ++ /* for debugging this */ ++ fprintf(stderr, "%s = %s [%d]\n", part, value, (int)what); ++#endif ++ if(!strcmp("NULL", value)) ++ curl_url_set(u, what, NULL, setflags); ++ else ++ curl_url_set(u, what, value, setflags); ++ } ++ p = e + 1; ++ continue; ++ } ++ break; ++ } ++ ++} ++ ++static struct redircase set_url_list[] = { ++ {"file://localhost/path?query#frag", ++ "foo#another", ++ "file:///foo#another", ++ 0, 0, 0}, ++ {"http://example.com/path?query#frag", ++ "https://two.example.com/bradnew", ++ "https://two.example.com/bradnew", ++ 0, 0, 0}, ++ {"http://example.com/path?query#frag", ++ "../../newpage#foo", ++ "http://example.com/newpage#foo", ++ 0, 0, 0}, ++ {"http://user:foo@example.com/path?query#frag", ++ "../../newpage", ++ "http://user:foo@example.com/newpage", ++ 0, 0, 0}, ++ {"http://user:foo@example.com/path?query#frag", ++ "../newpage", ++ "http://user:foo@example.com/newpage", ++ 0, 0, 0}, ++ {NULL, NULL, NULL, 0, 0, 0} ++}; ++ ++static int set_url(void) ++{ ++ int i; ++ CURLUcode rc; ++ CURLU *urlp; ++ int error = 0; ++ ++ for(i = 0; set_url_list[i].in && !error; i++) { ++ char *url = NULL; ++ urlp = curl_url(); ++ if(!urlp) ++ break; ++ rc = curl_url_set(urlp, CURLUPART_URL, set_url_list[i].in, ++ set_url_list[i].urlflags); ++ if(!rc) { ++ rc = curl_url_set(urlp, CURLUPART_URL, set_url_list[i].set, ++ set_url_list[i].setflags); ++ if(rc) { ++ fprintf(stderr, "%s:%d Set URL %s returned %d\n", ++ __FILE__, __LINE__, set_url_list[i].set, ++ (int)rc); ++ error++; ++ } ++ else { ++ rc = curl_url_get(urlp, CURLUPART_URL, &url, 0); ++ if(rc) { ++ fprintf(stderr, "%s:%d Get URL returned %d\n", ++ __FILE__, __LINE__, (int)rc); ++ error++; ++ } ++ else { ++ if(checkurl(url, set_url_list[i].out)) { ++ error++; ++ } ++ } ++ } ++ curl_free(url); ++ } ++ else if(rc != set_url_list[i].ucode) { ++ fprintf(stderr, "Set URL\nin: %s\nreturned %d (expected %d)\n", ++ set_url_list[i].in, (int)rc, set_url_list[i].ucode); ++ error++; ++ } ++ curl_url_cleanup(urlp); ++ } ++ return error; ++} ++ ++static int set_parts(void) ++{ ++ int i; ++ CURLUcode rc; ++ int error = 0; ++ ++ for(i = 0; set_parts_list[i].set && !error; i++) { ++ char *url = NULL; ++ CURLU *urlp = curl_url(); ++ if(!urlp) { ++ error++; ++ break; ++ } ++ if(set_parts_list[i].in) ++ rc = curl_url_set(urlp, CURLUPART_URL, set_parts_list[i].in, ++ set_parts_list[i].urlflags); ++ else ++ rc = CURLUE_OK; ++ if(!rc) { ++ updateurl(urlp, set_parts_list[i].set, set_parts_list[i].setflags); ++ rc = curl_url_get(urlp, CURLUPART_URL, &url, 0); ++ ++ if(rc) { ++ fprintf(stderr, "%s:%d Get URL returned %d\n", ++ __FILE__, __LINE__, (int)rc); ++ error++; ++ } ++ else if(checkurl(url, set_parts_list[i].out)) { ++ error++; ++ } ++ } ++ else if(rc != set_parts_list[i].ucode) { ++ fprintf(stderr, "Set parts\nin: %s\nreturned %d (expected %d)\n", ++ set_parts_list[i].in, (int)rc, set_parts_list[i].ucode); ++ error++; ++ } ++ curl_free(url); ++ curl_url_cleanup(urlp); ++ } ++ return error; ++} ++ ++static int get_url(void) ++{ ++ int i; ++ CURLUcode rc; ++ int error = 0; ++ for(i = 0; get_url_list[i].in && !error; i++) { ++ char *url = NULL; ++ CURLU *urlp = curl_url(); ++ if(!urlp) { ++ error++; ++ break; ++ } ++ rc = curl_url_set(urlp, CURLUPART_URL, get_url_list[i].in, ++ get_url_list[i].urlflags); ++ if(!rc) { ++ rc = curl_url_get(urlp, CURLUPART_URL, &url, get_url_list[i].getflags); ++ ++ if(rc) { ++ fprintf(stderr, "%s:%d returned %d\n", ++ __FILE__, __LINE__, (int)rc); ++ error++; ++ } ++ else { ++ if(checkurl(url, get_url_list[i].out)) { ++ error++; ++ } ++ } ++ } ++ else if(rc != get_url_list[i].ucode) { ++ fprintf(stderr, "Get URL\nin: %s\nreturned %d (expected %d)\n", ++ get_url_list[i].in, (int)rc, get_url_list[i].ucode); ++ error++; ++ } ++ curl_free(url); ++ curl_url_cleanup(urlp); ++ } ++ return error; ++} ++ ++static int get_parts(void) ++{ ++ int i; ++ CURLUcode rc; ++ CURLU *urlp; ++ int error = 0; ++ for(i = 0; get_parts_list[i].in && !error; i++) { ++ urlp = curl_url(); ++ if(!urlp) { ++ error++; ++ break; ++ } ++ rc = curl_url_set(urlp, CURLUPART_URL, ++ get_parts_list[i].in, ++ get_parts_list[i].urlflags); ++ if(rc != get_parts_list[i].ucode) { ++ fprintf(stderr, "Get parts\nin: %s\nreturned %d (expected %d)\n", ++ get_parts_list[i].in, (int)rc, get_parts_list[i].ucode); ++ error++; ++ } ++ else if(get_parts_list[i].ucode) { ++ /* the expected error happened */ ++ } ++ else if(checkparts(urlp, get_parts_list[i].in, get_parts_list[i].out, ++ get_parts_list[i].getflags)) ++ error++; ++ curl_url_cleanup(urlp); ++ } ++ return error; ++} ++ ++static struct querycase append_list[] = { ++ {"HTTP://test/?s", "name=joe\x02", "http://test/?s&name=joe%02", ++ 0, CURLU_URLENCODE, CURLUE_OK}, ++ {"HTTP://test/?size=2#f", "name=joe=", "http://test/?size=2&name=joe=#f", ++ 0, CURLU_URLENCODE, CURLUE_OK}, ++ {"HTTP://test/?size=2#f", "name=joe doe", ++ "http://test/?size=2&name=joe+doe#f", ++ 0, CURLU_URLENCODE, CURLUE_OK}, ++ {"HTTP://test/", "name=joe", "http://test/?name=joe", 0, 0, CURLUE_OK}, ++ {"HTTP://test/?size=2", "name=joe", "http://test/?size=2&name=joe", ++ 0, 0, CURLUE_OK}, ++ {"HTTP://test/?size=2&", "name=joe", "http://test/?size=2&name=joe", ++ 0, 0, CURLUE_OK}, ++ {"HTTP://test/?size=2#f", "name=joe", "http://test/?size=2&name=joe#f", ++ 0, 0, CURLUE_OK}, ++ {NULL, NULL, NULL, 0, 0, 0} ++}; ++ ++static int append(void) ++{ ++ int i; ++ CURLUcode rc; ++ CURLU *urlp; ++ int error = 0; ++ for(i = 0; append_list[i].in && !error; i++) { ++ urlp = curl_url(); ++ if(!urlp) { ++ error++; ++ break; ++ } ++ rc = curl_url_set(urlp, CURLUPART_URL, ++ append_list[i].in, ++ append_list[i].urlflags); ++ if(rc) ++ error++; ++ else ++ rc = curl_url_set(urlp, CURLUPART_QUERY, ++ append_list[i].q, ++ append_list[i].qflags | CURLU_APPENDQUERY); ++ if(error) ++ ; ++ else if(rc != append_list[i].ucode) { ++ fprintf(stderr, "Append\nin: %s\nreturned %d (expected %d)\n", ++ append_list[i].in, (int)rc, append_list[i].ucode); ++ error++; ++ } ++ else if(append_list[i].ucode) { ++ /* the expected error happened */ ++ } ++ else { ++ char *url; ++ rc = curl_url_get(urlp, CURLUPART_URL, &url, 0); ++ if(rc) { ++ fprintf(stderr, "%s:%d Get URL returned %d\n", ++ __FILE__, __LINE__, (int)rc); ++ error++; ++ } ++ else { ++ if(checkurl(url, append_list[i].out)) { ++ error++; ++ } ++ curl_free(url); ++ } ++ } ++ curl_url_cleanup(urlp); ++ } ++ return error; ++} ++ ++int test(char *URL) ++{ ++ (void)URL; /* not used */ ++ ++ if(append()) ++ return 5; ++ ++ if(set_url()) ++ return 1; ++ ++ if(set_parts()) ++ return 2; ++ ++ if(get_url()) ++ return 3; ++ ++ if(get_parts()) ++ return 4; ++ ++ printf("success\n"); ++ return 0; ++} +-- +2.17.2 + + +From 581a3b902b949f090776c5295a8aa0786edba773 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Sat, 8 Sep 2018 16:02:25 +0200 +Subject: [PATCH 02/14] curl_url-docs: fix AVAILABILITY as Added in curl 7.62.0 + +Upstream-commit: 890eea5aade0fc4ee167e83948d53351c11dd1ae +Signed-off-by: Kamil Dudka +--- + docs/libcurl/curl_url.3 | 2 +- + docs/libcurl/curl_url_cleanup.3 | 2 +- + docs/libcurl/curl_url_dup.3 | 2 +- + docs/libcurl/curl_url_get.3 | 2 +- + docs/libcurl/curl_url_set.3 | 2 +- + 5 files changed, 5 insertions(+), 5 deletions(-) + +diff --git a/docs/libcurl/curl_url.3 b/docs/libcurl/curl_url.3 +index 0a56264..a14c45b 100644 +--- a/docs/libcurl/curl_url.3 ++++ b/docs/libcurl/curl_url.3 +@@ -55,7 +55,7 @@ Returns a \fBCURLU *\fP if successful, or NULL if out of memory. + } + .fi + .SH AVAILABILITY +-Added in curl 7.63.0 ++Added in curl 7.62.0 + .SH "SEE ALSO" + .BR curl_url_cleanup "(3), " curl_url_get "(3), " curl_url_set "(3), " + .BR curl_url_dup "(3), " +diff --git a/docs/libcurl/curl_url_cleanup.3 b/docs/libcurl/curl_url_cleanup.3 +index a8158b7..4d095a9 100644 +--- a/docs/libcurl/curl_url_cleanup.3 ++++ b/docs/libcurl/curl_url_cleanup.3 +@@ -38,7 +38,7 @@ none + curl_url_cleanup(url); + .fi + .SH AVAILABILITY +-Added in curl 7.63.0 ++Added in curl 7.62.0 + .SH "SEE ALSO" + .BR curl_url_dup "(3), " curl_url "(3), " curl_url_set "(3), " + .BR curl_url_get "(3), " +diff --git a/docs/libcurl/curl_url_dup.3 b/docs/libcurl/curl_url_dup.3 +index 4815dbd..c0259e0 100644 +--- a/docs/libcurl/curl_url_dup.3 ++++ b/docs/libcurl/curl_url_dup.3 +@@ -46,7 +46,7 @@ Returns a new handle or NULL if out of memory. + curl_url_cleanup(url); + .fi + .SH AVAILABILITY +-Added in curl 7.63.0 ++Added in curl 7.62.0 + .SH "SEE ALSO" + .BR curl_url_cleanup "(3), " curl_url "(3), " curl_url_set "(3), " + .BR curl_url_get "(3), " +diff --git a/docs/libcurl/curl_url_get.3 b/docs/libcurl/curl_url_get.3 +index 824d496..b1313ea 100644 +--- a/docs/libcurl/curl_url_get.3 ++++ b/docs/libcurl/curl_url_get.3 +@@ -104,7 +104,7 @@ If this function returns an error, no URL part is returned. + } + .fi + .SH AVAILABILITY +-Added in curl 7.63.0 ++Added in curl 7.62.0 + .SH "SEE ALSO" + .BR curl_url_cleanup "(3), " curl_url "(3), " curl_url_set "(3), " + .BR curl_url_dup "(3), " +diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 +index 75fc0d9..79272e8 100644 +--- a/docs/libcurl/curl_url_set.3 ++++ b/docs/libcurl/curl_url_set.3 +@@ -114,7 +114,7 @@ If this function returns an error, no URL part is returned. + curl_url_cleanup(url); + .fi + .SH AVAILABILITY +-Added in curl 7.63.0 ++Added in curl 7.62.0 + .SH "SEE ALSO" + .BR curl_url_cleanup "(3), " curl_url "(3), " curl_url_get "(3), " + .BR curl_url_dup "(3), " +-- +2.17.2 + + +From 9c33cac88a9d94557ba48df7c290afc950895bc4 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Sat, 8 Sep 2018 19:39:57 +0200 +Subject: [PATCH 03/14] curl_url_set.3: correct description + +Upstream-commit: 8b85a3cac516a302a8ce3911cf8b9a229b62a59d +Signed-off-by: Kamil Dudka +--- + docs/libcurl/curl_url_set.3 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 +index 79272e8..0d6e9aa 100644 +--- a/docs/libcurl/curl_url_set.3 ++++ b/docs/libcurl/curl_url_set.3 +@@ -21,7 +21,7 @@ + .\" ************************************************************************** + .TH curl_url_set 3 "6 Aug 2018" "libcurl" "libcurl Manual" + .SH NAME +-curl_url_set - set a part from a URL ++curl_url_set - set a URL part + .SH SYNOPSIS + .B #include + +-- +2.17.2 + + +From dc2c1d978ec78a5f278d194e1b258015e8bfd664 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Sat, 8 Sep 2018 22:57:36 +0200 +Subject: [PATCH 04/14] urlapi: avoid derefencing a possible NULL pointer + +Coverity CID 1439134 + +Upstream-commit: 01dedc99fc8d386fe955421ab05a1c4094c9190b +Signed-off-by: Kamil Dudka +--- + lib/urlapi.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/lib/urlapi.c b/lib/urlapi.c +index 8287861..3183598 100644 +--- a/lib/urlapi.c ++++ b/lib/urlapi.c +@@ -438,10 +438,10 @@ static CURLUcode parse_hostname_login(struct Curl_URL *u, + *hostname = ++ptr; + + /* We could use the login information in the URL so extract it. Only parse +- options if the handler says we should. */ ++ options if the handler says we should. Note that 'h' might be NULL! */ + ccode = Curl_parse_login_details(login, ptr - login - 1, + &userp, &passwdp, +- h->flags & PROTOPT_URLOPTIONS ? ++ (h && (h->flags & PROTOPT_URLOPTIONS)) ? + &optionsp:NULL); + if(ccode) { + result = CURLUE_MALFORMED_INPUT; +-- +2.17.2 + + +From 6684d372c20609afd21f21399deda6deedea911e Mon Sep 17 00:00:00 2001 +From: Daniel Gustafsson +Date: Sat, 8 Sep 2018 23:05:21 +0200 +Subject: [PATCH 05/14] url.c: fix comment typo and indentation + +Closes #2960 + +Upstream-commit: 6e4b8c5073c3985cef98656c3b375981d25a8898 +Signed-off-by: Kamil Dudka +--- + lib/url.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/lib/url.c b/lib/url.c +index 4f75f11..dcc6cc8 100644 +--- a/lib/url.c ++++ b/lib/url.c +@@ -1939,7 +1939,7 @@ static struct connectdata *allocate_conn(struct Curl_easy *data) + return NULL; + } + +-/* returns the handdler if the given scheme is built-in */ ++/* returns the handler if the given scheme is built-in */ + const struct Curl_handler *Curl_builtin_scheme(const char *scheme) + { + const struct Curl_handler * const *pp; +@@ -2245,7 +2245,7 @@ static CURLcode parseurlandfillconn(struct Curl_easy *data, + the host-name part */ + memmove(path + hostlen + 1, path, pathlen + 1); + +- /* now copy the trailing host part in front of the existing path */ ++ /* now copy the trailing host part in front of the existing path */ + memcpy(path + 1, query, hostlen); + + path[0]='/'; /* prepend the missing slash */ +-- +2.17.2 + + +From 0f8d6ab26abd00459d1364a69d7771a6b3a58ce3 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Mon, 10 Sep 2018 10:09:18 +0200 +Subject: [PATCH 06/14] libcurl-url.3: overview man page for the URL API + +Closes #2967 + +Upstream-commit: 11e8a43f853b9bf050db58f073e6f2411821ce60 +Signed-off-by: Kamil Dudka +--- + docs/libcurl/Makefile.inc | 1 + + docs/libcurl/libcurl-url.3 | 137 +++++++++++++++++++++++++++++++++++++ + 2 files changed, 138 insertions(+) + create mode 100644 docs/libcurl/libcurl-url.3 + +diff --git a/docs/libcurl/Makefile.inc b/docs/libcurl/Makefile.inc +index 955492c..97cb50c 100644 +--- a/docs/libcurl/Makefile.inc ++++ b/docs/libcurl/Makefile.inc +@@ -23,4 +23,5 @@ man_MANS = curl_easy_cleanup.3 curl_easy_getinfo.3 curl_easy_init.3 \ + curl_mime_filename.3 curl_mime_subparts.3 \ + curl_mime_type.3 curl_mime_headers.3 curl_mime_encoder.3 libcurl-env.3 \ + curl_url.3 curl_url_cleanup.3 curl_url_dup.3 curl_url_get.3 curl_url_set.3 \ ++ libcurl-url.3 \ + libcurl-security.3 +diff --git a/docs/libcurl/libcurl-url.3 b/docs/libcurl/libcurl-url.3 +new file mode 100644 +index 0000000..4ad0a15 +--- /dev/null ++++ b/docs/libcurl/libcurl-url.3 +@@ -0,0 +1,137 @@ ++.\" ************************************************************************** ++.\" * _ _ ____ _ ++.\" * Project ___| | | | _ \| | ++.\" * / __| | | | |_) | | ++.\" * | (__| |_| | _ <| |___ ++.\" * \___|\___/|_| \_\_____| ++.\" * ++.\" * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++.\" * ++.\" * This software is licensed as described in the file COPYING, which ++.\" * you should have received as part of this distribution. The terms ++.\" * are also available at https://curl.haxx.se/docs/copyright.html. ++.\" * ++.\" * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++.\" * copies of the Software, and permit persons to whom the Software is ++.\" * furnished to do so, under the terms of the COPYING file. ++.\" * ++.\" * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++.\" * KIND, either express or implied. ++.\" * ++.\" ************************************************************************** ++.TH libcurl 3 "10 Sep 2018" "libcurl" "libcurl url interface" ++.SH NAME ++libcurl-url \- URL interface overview ++.SH DESCRIPTION ++The URL interface provides a set of functions for parsing and generating URLs. ++.SH INCLUDE ++You still only include in your code. Note that the URL API was ++introduced in 7.62.0. ++.SH CREATE ++Create a handle that holds URL info and resources with \fIcurl_url(3)\fP: ++ ++ CURLU *h = curl_url(); ++.SH CLEANUP ++When done with it, clean it up with \fIcurl_url_cleanup(3)\fP: ++ ++ curl_url_cleanup(h); ++.SH DUPLICATE ++When you need a copy of a handle, just duplicate it with \fIcurl_url_dup(3)\fP: ++ ++ CURLU *nh = curl_url_dup(h); ++.SH PARSING ++By "setting" a URL to the handle with \fIcurl_url_set(3)\fP, the URL is parsed ++and stored in the handle. If the URL is not syntactically correct it will ++return an error instead. ++ ++.nf ++ rc = curl_url_set(h, CURLUPART_URL, ++ "https://example.com:449/foo/bar?name=moo", 0); ++.fi ++ ++The zero in the fourth argument is a bitmask for changing specific features. ++ ++If successful, this stores the URL in its individual parts within the handle. ++.SH REDIRECT ++When a handle already contains info about a URL, setting a relative URL will ++make it "redirect" to adapt to it. ++ ++ rc = curl_url_set(h, CURLUPART_URL, "../test?another", 0); ++.SH "GET URL" ++The `CURLU` handle represents a URL and you can easily extract that with ++\fIcurl_url_get(3)\fP: ++ ++ char *url; ++ rc = curl_url_get(h, CURLUPART_URL, &url, 0); ++ curl_free(url); ++ ++The zero in the fourth argument is a bitmask for changing specific features. ++.SH "GET PARTS" ++When a URL has been parsed or parts have been set, you can extract those ++pieces from the handle at any time. ++ ++.nf ++ rc = curl_url_get(h, CURLUPART_HOST, &host, 0); ++ rc = curl_url_get(h, CURLUPART_SCHEME, &scheme, 0); ++ rc = curl_url_get(h, CURLUPART_USER, &user, 0); ++ rc = curl_url_get(h, CURLUPART_PASSWORD, &password, 0); ++ rc = curl_url_get(h, CURLUPART_PORT, &port, 0); ++ rc = curl_url_get(h, CURLUPART_PATH, &path, 0); ++ rc = curl_url_get(h, CURLUPART_QUERY, &query, 0); ++ rc = curl_url_get(h, CURLUPART_FRAGMENT, &fragment, 0); ++.fi ++ ++Extracted parts are not URL decoded unless the user also asks for it with the ++CURLU_URLDECODE flag set in the fourth bitmask argument. ++ ++Remember to free the returned string with \fIcurl_free(3)\fP when you're done ++with it! ++.SH "SET PARTS" ++A user set individual URL parts, either after having parsed a full URL or ++instead of parsing such. ++ ++.nf ++ rc = curl_url_set(urlp, CURLUPART_HOST, "www.example.com", 0); ++ rc = curl_url_set(urlp, CURLUPART_SCHEME, "https", 0); ++ rc = curl_url_set(urlp, CURLUPART_USER, "john", 0); ++ rc = curl_url_set(urlp, CURLUPART_PASSWORD, "doe", 0); ++ rc = curl_url_set(urlp, CURLUPART_PORT, "443", 0); ++ rc = curl_url_set(urlp, CURLUPART_PATH, "/index.html", 0); ++ rc = curl_url_set(urlp, CURLUPART_QUERY, "name=john", 0); ++ rc = curl_url_set(urlp, CURLUPART_FRAGMENT, "anchor", 0); ++.fi ++ ++Set parts are not URL encoded unless the user asks for it with the ++`CURLU_URLENCODE` flag. ++.SH "APPENDQUERY" ++An application can append a string to the right end of the query part with the ++`CURLU_APPENDQUERY` flag to \fIcurl_url_set(3)\fP. ++ ++Imagine a handle that holds the URL `https://example.com/?shoes=2`. An ++application can then add the string `hat=1` to the query part like this: ++ ++.nf ++ rc = curl_url_set(urlp, CURLUPART_QUERY, "hat=1", CURLU_APPENDQUERY); ++.fi ++ ++It will even notice the lack of an ampersand (`&`) separator so it will inject ++one too, and the handle's full URL will then equal ++`https://example.com/?shoes=2&hat=1`. ++ ++The appended string can of course also get URL encoded on add, and if asked to ++URL encode, the encoding process will skip the '=' character. For example, ++append `candy=N&N` to what we already have, and URL encode it to deal with the ++ampersand in the data: ++ ++.nf ++ rc = curl_url_set(urlp, CURLUPART_QUERY, "candy=N&N", ++ CURLU_APPENDQUERY | CURLU_URLENCODE); ++.fi ++ ++Now the URL looks like ++.nf ++ https://example.com/?shoes=2&hat=1&candy=N%26N` ++.fi ++.SH "SEE ALSO" ++.BR curl_url "(3), " curl_url_cleanup "(3), " curl_url_get "(3), " ++.BR curl_url_dup "(3), " curl_url_set "(3), " CURLOPT_URL "(3), " +-- +2.17.2 + + +From 4c235b460cf40f8ce0c6ad06b44ecb4dddc128e4 Mon Sep 17 00:00:00 2001 +From: Dave Reisner +Date: Mon, 10 Sep 2018 09:38:46 -0400 +Subject: [PATCH 07/14] curl_url_set.3: fix typo in reference to + CURLU_APPENDQUERY + +Upstream-commit: 04110573801feb2f278e2f774087a0525d5e8d0a +Signed-off-by: Kamil Dudka +--- + docs/libcurl/curl_url_set.3 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 +index 0d6e9aa..b2b273f 100644 +--- a/docs/libcurl/curl_url_set.3 ++++ b/docs/libcurl/curl_url_set.3 +@@ -75,7 +75,7 @@ If used in with \fICURLU_APPENDQUERY\fP, the provided part will be appended on + the end of the existing query - and if the previous part didn't end with an + ampersand (&), an ampersand will be inserted before the new appended part. + +-When \fCURLU_APPENDQUERY\fP is used together with \fICURLU_URLENCODE\fP, ++When \fICURLU_APPENDQUERY\fP is used together with \fICURLU_URLENCODE\fP, + the '=' symbols will not be URL encoded. + + The question mark in the URL is not part of the actual query contents. +-- +2.17.2 + + +From fb07ea0cf9c612b2fad6a113b1d40aa7896fe43a Mon Sep 17 00:00:00 2001 +From: Dave Reisner +Date: Mon, 10 Sep 2018 09:39:33 -0400 +Subject: [PATCH 08/14] curl_url_set.3: properly escape \n in example code + +This yields + + "the scheme is %s\n" + +instead of + + "the scheme is %s0 + +Closes #2970 + +Upstream-commit: c1e5980f6672a2bd2d26894f093b435f2deb04e0 +Signed-off-by: Kamil Dudka +--- + docs/libcurl/curl_url_get.3 | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/docs/libcurl/curl_url_get.3 b/docs/libcurl/curl_url_get.3 +index b1313ea..53f7954 100644 +--- a/docs/libcurl/curl_url_get.3 ++++ b/docs/libcurl/curl_url_get.3 +@@ -97,7 +97,7 @@ If this function returns an error, no URL part is returned. + char *scheme; + rc = curl_url_get(url, CURLUPART_SCHEME, &scheme, 0); + if(!rc) { +- printf("the scheme is %s\n", scheme); ++ printf("the scheme is %s\\n", scheme); + curl_free(scheme); + } + curl_url_cleanup(url); +-- +2.17.2 + + +From 376ae7de5a5a5f5b5513e6055700d010f21d4da3 Mon Sep 17 00:00:00 2001 +From: Daniel Gustafsson +Date: Wed, 19 Sep 2018 13:44:10 +0200 +Subject: [PATCH 09/14] urlapi: don't set value which is never read + +In the CURLUPART_URL case, there is no codepath which invokes url +decoding so remove the assignment of the urldecode variable. This +fixes the deadstore bug-report from clang static analysis. + +Closes #3015 +Reviewed-by: Daniel Stenberg + +Upstream-commit: 522e647cc52c45ebdb58d57f242204f9a72c45dd +Signed-off-by: Kamil Dudka +--- + lib/urlapi.c | 1 - + 1 file changed, 1 deletion(-) + +diff --git a/lib/urlapi.c b/lib/urlapi.c +index 3183598..127f390 100644 +--- a/lib/urlapi.c ++++ b/lib/urlapi.c +@@ -970,7 +970,6 @@ CURLUcode curl_url_get(CURLU *u, CURLUPart what, + char *scheme; + char *options = u->options; + char *port = u->port; +- urldecode = FALSE; /* not for the whole thing */ + if(u->scheme && strcasecompare("file", u->scheme)) { + url = aprintf("file://%s%s%s", + u->path, +-- +2.17.2 + + +From 26dd137f3ca894e6402a98889d3b182f608d3c7f Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Wed, 19 Sep 2018 10:17:03 +0200 +Subject: [PATCH 10/14] urlapi: add CURLU_GUESS_SCHEME and fix hostname + acceptance + +In order for this API to fully work for libcurl itself, it now offers a +CURLU_GUESS_SCHEME flag that makes it "guess" scheme based on the host +name prefix just like libcurl always did. If there's no known prefix, it +will guess "http://". + +Separately, it relaxes the check of the host name so that IDN host names +can be passed in as well. + +Both these changes are necessary for libcurl itself to use this API. + +Assisted-by: Daniel Gustafsson +Closes #3018 + +Upstream-commit: 9307c219ad4741db860b864c860ac2f8bf9fad9d +Signed-off-by: Kamil Dudka +--- + docs/libcurl/curl_url_set.3 | 10 ++++++++ + include/curl/urlapi.h | 1 + + lib/urlapi.c | 48 ++++++++++++++++++++++++++++--------- + tests/data/test1560 | 6 +++++ + tests/libtest/lib1560.c | 26 +++++++++++++++++++- + 5 files changed, 79 insertions(+), 12 deletions(-) + +diff --git a/docs/libcurl/curl_url_set.3 b/docs/libcurl/curl_url_set.3 +index b2b273f..95b76bd 100644 +--- a/docs/libcurl/curl_url_set.3 ++++ b/docs/libcurl/curl_url_set.3 +@@ -96,6 +96,16 @@ The query part gets space-to-plus conversion before the URL conversion. + + This URL encoding is charset unaware and will convert the input on a + byte-by-byte manner. ++.IP CURLU_DEFAULT_SCHEME ++If set, will make libcurl allow the URL to be set without a scheme and then ++sets that to the default scheme: HTTPS. Overrides the \fICURLU_GUESS_SCHEME\fP ++option if both are set. ++.IP CURLU_GUESS_SCHEME ++If set, will make libcurl allow the URL to be set without a scheme and it ++instead "guesses" which scheme that was intended based on the host name. If ++the outermost sub-domain name matches DICT, FTP, IMAP, LDAP, POP3 or SMTP then ++that scheme will be used, otherwise it picks HTTP. Conflicts with the ++\fICURLU_DEFAULT_SCHEME\fP option which takes precendence if both are set. + .SH RETURN VALUE + Returns a CURLUcode error value, which is CURLUE_OK (0) if everything went + fine. +diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h +index b16cfce..319de35 100644 +--- a/include/curl/urlapi.h ++++ b/include/curl/urlapi.h +@@ -75,6 +75,7 @@ typedef enum { + #define CURLU_URLDECODE (1<<6) /* URL decode on get */ + #define CURLU_URLENCODE (1<<7) /* URL encode on set */ + #define CURLU_APPENDQUERY (1<<8) /* append a form style part */ ++#define CURLU_GUESS_SCHEME (1<<9) /* legacy curl-style guessing */ + + typedef struct Curl_URL CURLU; + +diff --git a/lib/urlapi.c b/lib/urlapi.c +index 127f390..45f1e14 100644 +--- a/lib/urlapi.c ++++ b/lib/urlapi.c +@@ -554,7 +554,7 @@ static CURLUcode junkscan(char *part) + + static CURLUcode hostname_check(char *hostname, unsigned int flags) + { +- const char *l; /* accepted characters */ ++ const char *l = NULL; /* accepted characters */ + size_t len; + size_t hlen = strlen(hostname); + (void)flags; +@@ -564,14 +564,21 @@ static CURLUcode hostname_check(char *hostname, unsigned int flags) + l = "0123456789abcdefABCDEF::."; + hlen -= 2; + } +- else /* % for URL escaped letters */ +- l = "0123456789abcdefghijklimnopqrstuvwxyz-_.ABCDEFGHIJKLIMNOPQRSTUVWXYZ%"; +- +- len = strspn(hostname, l); +- if(hlen != len) +- /* hostname with bad content */ +- return CURLUE_MALFORMED_INPUT; + ++ if(l) { ++ /* only valid letters are ok */ ++ len = strspn(hostname, l); ++ if(hlen != len) ++ /* hostname with bad content */ ++ return CURLUE_MALFORMED_INPUT; ++ } ++ else { ++ /* letters from the second string is not ok */ ++ len = strcspn(hostname, " "); ++ if(hlen != len) ++ /* hostname with bad content */ ++ return CURLUE_MALFORMED_INPUT; ++ } + return CURLUE_OK; + } + +@@ -587,7 +594,7 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) + CURLUcode result; + bool url_has_scheme = FALSE; + char schemebuf[MAX_SCHEME_LEN]; +- char *schemep; ++ char *schemep = NULL; + size_t schemelen = 0; + size_t urllen; + const struct Curl_handler *h = NULL; +@@ -723,9 +730,10 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) + else { + /* no scheme! */ + +- if(!(flags & CURLU_DEFAULT_SCHEME)) ++ if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) + return CURLUE_MALFORMED_INPUT; +- schemep = (char *) DEFAULT_SCHEME; ++ if(flags & CURLU_DEFAULT_SCHEME) ++ schemep = (char *) DEFAULT_SCHEME; + + /* + * The URL was badly formatted, let's try without scheme specified. +@@ -744,6 +752,24 @@ static CURLUcode seturl(const char *url, CURLU *u, unsigned int flags) + memcpy(hostname, hostp, len); + hostname[len] = 0; + ++ if((flags & CURLU_GUESS_SCHEME) && !schemep) { ++ /* legacy curl-style guess based on host name */ ++ if(checkprefix("ftp.", hostname)) ++ schemep = (char *)"ftp"; ++ else if(checkprefix("dict.", hostname)) ++ schemep = (char *)"dict"; ++ else if(checkprefix("ldap.", hostname)) ++ schemep = (char *)"ldap"; ++ else if(checkprefix("imap.", hostname)) ++ schemep = (char *)"imap"; ++ else if(checkprefix("smtp.", hostname)) ++ schemep = (char *)"smtp"; ++ else if(checkprefix("pop3.", hostname)) ++ schemep = (char *)"pop3"; ++ else ++ schemep = (char *)"http"; ++ } ++ + len = strlen(p); + memcpy(path, p, len); + path[len] = 0; +diff --git a/tests/data/test1560 b/tests/data/test1560 +index 720df03..4b6c97a 100644 +--- a/tests/data/test1560 ++++ b/tests/data/test1560 +@@ -16,6 +16,12 @@ none + file + https + http ++pop3 ++smtp ++imap ++ldap ++dict ++ftp + + + URL API +diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c +index 669ea9a..30fb582 100644 +--- a/tests/libtest/lib1560.c ++++ b/tests/libtest/lib1560.c +@@ -246,8 +246,32 @@ static struct testcase get_parts_list[] ={ + }; + + static struct urltestcase get_url_list[] = { ++ {"smtp.example.com/path/html", ++ "smtp://smtp.example.com/path/html", ++ CURLU_GUESS_SCHEME, 0, CURLUE_OK}, ++ {"https.example.com/path/html", ++ "http://https.example.com/path/html", ++ CURLU_GUESS_SCHEME, 0, CURLUE_OK}, ++ {"dict.example.com/path/html", ++ "dict://dict.example.com/path/html", ++ CURLU_GUESS_SCHEME, 0, CURLUE_OK}, ++ {"pop3.example.com/path/html", ++ "pop3://pop3.example.com/path/html", ++ CURLU_GUESS_SCHEME, 0, CURLUE_OK}, ++ {"ldap.example.com/path/html", ++ "ldap://ldap.example.com/path/html", ++ CURLU_GUESS_SCHEME, 0, CURLUE_OK}, ++ {"imap.example.com/path/html", ++ "imap://imap.example.com/path/html", ++ CURLU_GUESS_SCHEME, 0, CURLUE_OK}, ++ {"ftp.example.com/path/html", ++ "ftp://ftp.example.com/path/html", ++ CURLU_GUESS_SCHEME, 0, CURLUE_OK}, ++ {"example.com/path/html", ++ "http://example.com/path/html", ++ CURLU_GUESS_SCHEME, 0, CURLUE_OK}, + {"HTTP://test/", "http://test/", 0, 0, CURLUE_OK}, +- {"http://HO0_-st..~./", "", 0, 0, CURLUE_MALFORMED_INPUT}, ++ {"http://HO0_-st..~./", "http://HO0_-st..~./", 0, 0, CURLUE_OK}, + {"http:/@example.com: 123/", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, + {"http:/@example.com:123 /", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, + {"http:/@example.com:123a/", "", 0, 0, CURLUE_BAD_PORT_NUMBER}, +-- +2.17.2 + + +From 4e335817d4ac0ee5596363004bfcaaad15bc6127 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Wed, 19 Sep 2018 11:28:40 +0200 +Subject: [PATCH 11/14] urlapi: document the error codes, remove two unused + ones + +Assisted-by: Daniel Gustafsson +Closes #3019 + +Upstream-commit: 5c73093edb3bd527db9c8abdee53d0f18e6a4cc1 +Signed-off-by: Kamil Dudka +--- + docs/libcurl/libcurl-errors.3 | 37 ++++++++++++++++++++++++++++- + docs/libcurl/symbols-in-versions | 2 -- + include/curl/urlapi.h | 8 +++---- + tests/libtest/lib1560.c | 40 ++++++++++++++++---------------- + 4 files changed, 59 insertions(+), 28 deletions(-) + +diff --git a/docs/libcurl/libcurl-errors.3 b/docs/libcurl/libcurl-errors.3 +index 30c57b3..411a272 100644 +--- a/docs/libcurl/libcurl-errors.3 ++++ b/docs/libcurl/libcurl-errors.3 +@@ -5,7 +5,7 @@ + .\" * | (__| |_| | _ <| |___ + .\" * \___|\___/|_| \_\_____| + .\" * +-.\" * Copyright (C) 1998 - 2015, Daniel Stenberg, , et al. ++.\" * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. + .\" * + .\" * This software is licensed as described in the file COPYING, which + .\" * you should have received as part of this distribution. The terms +@@ -307,6 +307,41 @@ Not enough memory was available. + .IP "CURLSHE_NOT_BUILT_IN (5)" + The requested sharing could not be done because the library you use don't have + that particular feature enabled. (Added in 7.23.0) ++.SH "CURLUcode" ++.IP "CURLUE_BAD_HANDLE (1)" ++An argument that should be a CURLU pointer was passed in as a NULL. ++.IP "CURLUE_BAD_PARTPOINTER (2)" ++A NULL pointer was passed to the 'part' argument of \fIcurl_url_get(3)\fP. ++.IP "CURLUE_MALFORMED_INPUT (3)" ++A malformed input was passed to a URL API function. ++.IP "CURLUE_BAD_PORT_NUMBER (4)" ++The port number was not a decimal number between 0 and 65535. ++.IP "CURLUE_UNSUPPORTED_SCHEME (5)" ++This libcurl build doesn't support the given URL scheme. ++.IP "CURLUE_URLDECODE (6)" ++URL decode error, most likely because of rubbish in the input. ++.IP "CURLUE_OUT_OF_MEMORY (7)" ++A memory function failed. ++.IP "CURLUE_USER_NOT_ALLOWED (8)" ++Credentials was passed in the URL when prohibited. ++.IP "CURLUE_UNKNOWN_PART (9)" ++An unknown part ID was passed to a URL API function. ++.IP "CURLUE_NO_SCHEME (10)" ++There is no scheme part in the URL. ++.IP "CURLUE_NO_USER (11)" ++There is no user part in the URL. ++.IP "CURLUE_NO_PASSWORD (12)" ++There is no password part in the URL. ++.IP "CURLUE_NO_OPTIONS (13)" ++There is no options part in the URL. ++.IP "CURLUE_NO_HOST (14)" ++There is no host part in the URL. ++.IP "CURLUE_NO_PORT (15)" ++There is no port part in the URL. ++.IP "CURLUE_NO_QUERY (16)" ++There is no query part in the URL. ++.IP "CURLUE_NO_FRAGMENT (17)" ++There is no fragment part in the URL. + .SH "SEE ALSO" + .BR curl_easy_strerror "(3), " curl_multi_strerror "(3), " + .BR curl_share_strerror "(3), " CURLOPT_ERRORBUFFER "(3), " +diff --git a/docs/libcurl/symbols-in-versions b/docs/libcurl/symbols-in-versions +index c797cb7..3b3861f 100644 +--- a/docs/libcurl/symbols-in-versions ++++ b/docs/libcurl/symbols-in-versions +@@ -736,14 +736,12 @@ CURLUE_NO_FRAGMENT 7.62.0 + CURLUE_NO_HOST 7.62.0 + CURLUE_NO_OPTIONS 7.62.0 + CURLUE_NO_PASSWORD 7.62.0 +-CURLUE_NO_PATH 7.62.0 + CURLUE_NO_PORT 7.62.0 + CURLUE_NO_QUERY 7.62.0 + CURLUE_NO_SCHEME 7.62.0 + CURLUE_NO_USER 7.62.0 + CURLUE_OK 7.62.0 + CURLUE_OUT_OF_MEMORY 7.62.0 +-CURLUE_RELATIVE 7.62.0 + CURLUE_UNKNOWN_PART 7.62.0 + CURLUE_UNSUPPORTED_SCHEME 7.62.0 + CURLUE_URLDECODE 7.62.0 +diff --git a/include/curl/urlapi.h b/include/curl/urlapi.h +index 319de35..90dd56c 100644 +--- a/include/curl/urlapi.h ++++ b/include/curl/urlapi.h +@@ -35,7 +35,7 @@ typedef enum { + CURLUE_BAD_PORT_NUMBER, /* 4 */ + CURLUE_UNSUPPORTED_SCHEME, /* 5 */ + CURLUE_URLDECODE, /* 6 */ +- CURLUE_RELATIVE, /* 7 */ ++ CURLUE_OUT_OF_MEMORY, /* 7 */ + CURLUE_USER_NOT_ALLOWED, /* 8 */ + CURLUE_UNKNOWN_PART, /* 9 */ + CURLUE_NO_SCHEME, /* 10 */ +@@ -44,10 +44,8 @@ typedef enum { + CURLUE_NO_OPTIONS, /* 13 */ + CURLUE_NO_HOST, /* 14 */ + CURLUE_NO_PORT, /* 15 */ +- CURLUE_NO_PATH, /* 16 */ +- CURLUE_NO_QUERY, /* 17 */ +- CURLUE_NO_FRAGMENT, /* 18 */ +- CURLUE_OUT_OF_MEMORY /* 19 */ ++ CURLUE_NO_QUERY, /* 16 */ ++ CURLUE_NO_FRAGMENT /* 17 */ + } CURLUcode; + + typedef enum { +diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c +index 30fb582..224cb88 100644 +--- a/tests/libtest/lib1560.c ++++ b/tests/libtest/lib1560.c +@@ -129,7 +129,7 @@ struct querycase { + + static struct testcase get_parts_list[] ={ + {"https://127.0.0.1:443", +- "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [17] | [18]", ++ "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [16] | [17]", + 0, CURLU_NO_DEFAULT_PORT, CURLUE_OK}, + {"http://%3a:%3a@ex%0ample/%3f+?+%3f+%23#+%23%3f%g7", + "http | : | : | [13] | [6] | [15] | /?+ | ? # | +#?%g7", +@@ -138,43 +138,43 @@ static struct testcase get_parts_list[] ={ + "http | %3a | %3a | [13] | ex%0ample | [15] | /%3f | %3f%35 | %35%3f%g7", + 0, 0, CURLUE_OK}, + {"http://HO0_-st%41/", +- "http | [11] | [12] | [13] | HO0_-st%41 | [15] | / | [17] | [18]", ++ "http | [11] | [12] | [13] | HO0_-st%41 | [15] | / | [16] | [17]", + 0, 0, CURLUE_OK}, + {"file://hello.html", + "", + 0, 0, CURLUE_MALFORMED_INPUT}, + {"http://HO0_-st/", +- "http | [11] | [12] | [13] | HO0_-st | [15] | / | [17] | [18]", ++ "http | [11] | [12] | [13] | HO0_-st | [15] | / | [16] | [17]", + 0, 0, CURLUE_OK}, + {"imap://user:pass;option@server/path", +- "imap | user | pass | option | server | [15] | /path | [17] | [18]", ++ "imap | user | pass | option | server | [15] | /path | [16] | [17]", + 0, 0, CURLUE_OK}, + {"http://user:pass;option@server/path", +- "http | user | pass;option | [13] | server | [15] | /path | [17] | [18]", ++ "http | user | pass;option | [13] | server | [15] | /path | [16] | [17]", + 0, 0, CURLUE_OK}, + {"file:/hello.html", +- "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [17] | [18]", ++ "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [16] | [17]", + 0, 0, CURLUE_OK}, + {"file://127.0.0.1/hello.html", +- "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [17] | [18]", ++ "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [16] | [17]", + 0, 0, CURLUE_OK}, + {"file:////hello.html", +- "file | [11] | [12] | [13] | [14] | [15] | //hello.html | [17] | [18]", ++ "file | [11] | [12] | [13] | [14] | [15] | //hello.html | [16] | [17]", + 0, 0, CURLUE_OK}, + {"file:///hello.html", +- "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [17] | [18]", ++ "file | [11] | [12] | [13] | [14] | [15] | /hello.html | [16] | [17]", + 0, 0, CURLUE_OK}, + {"https://127.0.0.1", +- "https | [11] | [12] | [13] | 127.0.0.1 | 443 | / | [17] | [18]", ++ "https | [11] | [12] | [13] | 127.0.0.1 | 443 | / | [16] | [17]", + 0, CURLU_DEFAULT_PORT, CURLUE_OK}, + {"https://127.0.0.1", +- "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [17] | [18]", ++ "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [16] | [17]", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"https://[::1]:1234", +- "https | [11] | [12] | [13] | [::1] | 1234 | / | [17] | [18]", ++ "https | [11] | [12] | [13] | [::1] | 1234 | / | [16] | [17]", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"https://127abc.com", +- "https | [11] | [12] | [13] | 127abc.com | [15] | / | [17] | [18]", ++ "https | [11] | [12] | [13] | 127abc.com | [15] | / | [16] | [17]", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"https:// example.com?check", + "", +@@ -183,7 +183,7 @@ static struct testcase get_parts_list[] ={ + "", + CURLU_DEFAULT_SCHEME, 0, CURLUE_MALFORMED_INPUT}, + {"https://example.com?check", +- "https | [11] | [12] | [13] | example.com | [15] | / | check | [18]", ++ "https | [11] | [12] | [13] | example.com | [15] | / | check | [17]", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"https://example.com:65536", + "", +@@ -193,27 +193,27 @@ static struct testcase get_parts_list[] ={ + CURLU_DEFAULT_SCHEME, 0, CURLUE_BAD_PORT_NUMBER}, + {"https://example.com:01#moo", + "https | [11] | [12] | [13] | example.com | 1 | / | " +- "[17] | moo", ++ "[16] | moo", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"https://example.com:1#moo", + "https | [11] | [12] | [13] | example.com | 1 | / | " +- "[17] | moo", ++ "[16] | moo", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"http://example.com#moo", + "http | [11] | [12] | [13] | example.com | [15] | / | " +- "[17] | moo", ++ "[16] | moo", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"http://example.com", + "http | [11] | [12] | [13] | example.com | [15] | / | " +- "[17] | [18]", ++ "[16] | [17]", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"http://example.com/path/html", + "http | [11] | [12] | [13] | example.com | [15] | /path/html | " +- "[17] | [18]", ++ "[16] | [17]", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"http://example.com/path/html?query=name", + "http | [11] | [12] | [13] | example.com | [15] | /path/html | " +- "query=name | [18]", ++ "query=name | [17]", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"http://example.com/path/html?query=name#anchor", + "http | [11] | [12] | [13] | example.com | [15] | /path/html | " +-- +2.17.2 + + +From 88dfdac2fc1b34a321a323868ea06116c72fe6d2 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Fri, 21 Sep 2018 08:17:39 +0200 +Subject: [PATCH 12/14] urlapi: fix support for address scope in IPv6 numerical + addresses + +Closes #3024 + +Upstream-commit: 2097cd515289581df5dfb6eeb5942d083a871fa4 +Signed-off-by: Kamil Dudka +--- + lib/urlapi-int.h | 4 ++++ + lib/urlapi.c | 8 ++------ + tests/libtest/lib1560.c | 3 +++ + 3 files changed, 9 insertions(+), 6 deletions(-) + +diff --git a/lib/urlapi-int.h b/lib/urlapi-int.h +index 7ac09fd..a5bb8ea 100644 +--- a/lib/urlapi-int.h ++++ b/lib/urlapi-int.h +@@ -22,6 +22,10 @@ + * + ***************************************************************************/ + #include "curl_setup.h" ++/* scheme is not URL encoded, the longest libcurl supported ones are 6 ++ letters */ ++#define MAX_SCHEME_LEN 8 ++ + bool Curl_is_absolute_url(const char *url, char *scheme, size_t buflen); + char *Curl_concat_url(const char *base, const char *relurl); + size_t Curl_strlen_url(const char *url, bool relative); +diff --git a/lib/urlapi.c b/lib/urlapi.c +index 45f1e14..a12112e 100644 +--- a/lib/urlapi.c ++++ b/lib/urlapi.c +@@ -53,10 +53,6 @@ struct Curl_URL { + + #define DEFAULT_SCHEME "https" + +-/* scheme is not URL encoded, the longest libcurl supported ones are 6 +- letters */ +-#define MAX_SCHEME_LEN 8 +- + static void free_urlhandle(struct Curl_URL *u) + { + free(u->scheme); +@@ -480,7 +476,7 @@ static CURLUcode parse_port(struct Curl_URL *u, char *hostname) + char endbracket; + int len; + +- if((1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.]%c%n", ++ if((1 == sscanf(hostname, "[%*45[0123456789abcdefABCDEF:.%%]%c%n", + &endbracket, &len)) && + (']' == endbracket)) { + /* this is a RFC2732-style specified IP-address */ +@@ -561,7 +557,7 @@ static CURLUcode hostname_check(char *hostname, unsigned int flags) + + if(hostname[0] == '[') { + hostname++; +- l = "0123456789abcdefABCDEF::."; ++ l = "0123456789abcdefABCDEF::.%"; + hlen -= 2; + } + +diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c +index 224cb88..7a5be81 100644 +--- a/tests/libtest/lib1560.c ++++ b/tests/libtest/lib1560.c +@@ -128,6 +128,9 @@ struct querycase { + }; + + static struct testcase get_parts_list[] ={ ++ {"https://[::1%252]:1234", ++ "https | [11] | [12] | [13] | [::1%252] | 1234 | / | [16] | [17]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"https://127.0.0.1:443", + "https | [11] | [12] | [13] | 127.0.0.1 | [15] | / | [16] | [17]", + 0, CURLU_NO_DEFAULT_PORT, CURLUE_OK}, +-- +2.17.2 + + +From 6c9f3f4bc604ba06a4f43807ace9189503a5e9fc Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Fri, 2 Nov 2018 15:11:16 +0100 +Subject: [PATCH 13/14] URL: fix IPv6 numeral address parser + +Regression from 46e164069d1a52. Extended test 1560 to verify. + +Reported-by: tpaukrt on github +Fixes #3218 +Closes #3219 + +Upstream-commit: b28094833a971870fd8c07960b3b12bf6fbbaad3 +Signed-off-by: Kamil Dudka +--- + lib/urlapi.c | 8 ++++++-- + tests/libtest/lib1560.c | 9 +++++++++ + 2 files changed, 15 insertions(+), 2 deletions(-) + +diff --git a/lib/urlapi.c b/lib/urlapi.c +index a12112e..8626052 100644 +--- a/lib/urlapi.c ++++ b/lib/urlapi.c +@@ -481,8 +481,12 @@ static CURLUcode parse_port(struct Curl_URL *u, char *hostname) + (']' == endbracket)) { + /* this is a RFC2732-style specified IP-address */ + portptr = &hostname[len]; +- if (*portptr != ':') +- return CURLUE_MALFORMED_INPUT; ++ if(*portptr) { ++ if(*portptr != ':') ++ return CURLUE_MALFORMED_INPUT; ++ } ++ else ++ portptr = NULL; + } + else + portptr = strchr(hostname, ':'); +diff --git a/tests/libtest/lib1560.c b/tests/libtest/lib1560.c +index 7a5be81..483035c 100644 +--- a/tests/libtest/lib1560.c ++++ b/tests/libtest/lib1560.c +@@ -128,6 +128,15 @@ struct querycase { + }; + + static struct testcase get_parts_list[] ={ ++ {"http://[fd00:a41::50]:8080", ++ "http | [11] | [12] | [13] | [fd00:a41::50] | 8080 | / | [16] | [17]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://[fd00:a41::50]/", ++ "http | [11] | [12] | [13] | [fd00:a41::50] | [15] | / | [16] | [17]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, ++ {"http://[fd00:a41::50]", ++ "http | [11] | [12] | [13] | [fd00:a41::50] | [15] | / | [16] | [17]", ++ CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, + {"https://[::1%252]:1234", + "https | [11] | [12] | [13] | [::1%252] | 1234 | / | [16] | [17]", + CURLU_DEFAULT_SCHEME, 0, CURLUE_OK}, +-- +2.17.2 + + +From 9fa7298750c1d66331dc55a202277b131868c048 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Wed, 2 Jan 2019 20:18:27 +0100 +Subject: [PATCH 14/14] xattr: strip credentials from any URL that is stored + +Both user and password are cleared uncondtitionally. + +Added unit test 1621 to verify. + +Fixes #3423 +Closes #3433 + +Upstream-commit: 98e6629154044e4ab1ee7cff8351c7ebcb131e88 +Signed-off-by: Kamil Dudka +--- + src/tool_xattr.c | 63 +++++++++++++++++++++++++---- + tests/data/Makefile.inc | 2 +- + tests/data/test1621 | 27 +++++++++++++ + tests/unit/Makefile.inc | 6 ++- + tests/unit/unit1621.c | 89 +++++++++++++++++++++++++++++++++++++++++ + 5 files changed, 177 insertions(+), 10 deletions(-) + create mode 100644 tests/data/test1621 + create mode 100644 tests/unit/unit1621.c + +diff --git a/src/tool_xattr.c b/src/tool_xattr.c +index 92b99db..730381b 100644 +--- a/src/tool_xattr.c ++++ b/src/tool_xattr.c +@@ -5,7 +5,7 @@ + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * +- * Copyright (C) 1998 - 2014, Daniel Stenberg, , et al. ++ * Copyright (C) 1998 - 2019, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms +@@ -49,6 +49,46 @@ static const struct xattr_mapping { + { NULL, CURLINFO_NONE } /* last element, abort loop here */ + }; + ++/* returns TRUE if a new URL is returned, that then needs to be freed */ ++/* @unittest: 1621 */ ++#ifdef UNITTESTS ++bool stripcredentials(char **url); ++#else ++static ++#endif ++bool stripcredentials(char **url) ++{ ++ CURLU *u; ++ CURLUcode uc; ++ char *nurl; ++ u = curl_url(); ++ if(u) { ++ uc = curl_url_set(u, CURLUPART_URL, *url, 0); ++ if(uc) ++ goto error; ++ ++ uc = curl_url_set(u, CURLUPART_USER, NULL, 0); ++ if(uc) ++ goto error; ++ ++ uc = curl_url_set(u, CURLUPART_PASSWORD, NULL, 0); ++ if(uc) ++ goto error; ++ ++ uc = curl_url_get(u, CURLUPART_URL, &nurl, 0); ++ if(uc) ++ goto error; ++ ++ curl_url_cleanup(u); ++ ++ *url = nurl; ++ return TRUE; ++ } ++ error: ++ curl_url_cleanup(u); ++ return FALSE; ++} ++ + /* store metadata from the curl request alongside the downloaded + * file using extended attributes + */ +@@ -62,17 +102,24 @@ int fwrite_xattr(CURL *curl, int fd) + char *value = NULL; + CURLcode result = curl_easy_getinfo(curl, mappings[i].info, &value); + if(!result && value) { ++ bool freeptr = FALSE; ++ if(CURLINFO_EFFECTIVE_URL == mappings[i].info) ++ freeptr = stripcredentials(&value); ++ if(value) { + #ifdef HAVE_FSETXATTR_6 +- err = fsetxattr(fd, mappings[i].attr, value, strlen(value), 0, 0); ++ err = fsetxattr(fd, mappings[i].attr, value, strlen(value), 0, 0); + #elif defined(HAVE_FSETXATTR_5) +- err = fsetxattr(fd, mappings[i].attr, value, strlen(value), 0); ++ err = fsetxattr(fd, mappings[i].attr, value, strlen(value), 0); + #elif defined(__FreeBSD_version) +- err = extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, mappings[i].attr, value, +- strlen(value)); +- /* FreeBSD's extattr_set_fd returns the length of the extended attribute +- */ +- err = err < 0 ? err : 0; ++ err = extattr_set_fd(fd, EXTATTR_NAMESPACE_USER, mappings[i].attr, ++ value, strlen(value)); ++ /* FreeBSD's extattr_set_fd returns the length of the extended ++ attribute */ ++ err = err < 0 ? err : 0; + #endif ++ if(freeptr) ++ curl_free(value); ++ } + } + i++; + } +diff --git a/tests/data/Makefile.inc b/tests/data/Makefile.inc +index dd38f89..6172b77 100644 +--- a/tests/data/Makefile.inc ++++ b/tests/data/Makefile.inc +@@ -182,7 +182,7 @@ test1560 \ + \ + test1590 \ + test1600 test1601 test1602 test1603 test1604 test1605 test1606 test1607 \ +-test1608 test1609 \ ++test1608 test1609 test1621 \ + \ + test1700 test1701 test1702 \ + \ +diff --git a/tests/data/test1621 b/tests/data/test1621 +new file mode 100644 +index 0000000..1117d1b +--- /dev/null ++++ b/tests/data/test1621 +@@ -0,0 +1,27 @@ ++ ++ ++ ++unittest ++stripcredentials ++ ++ ++ ++# ++# Client-side ++ ++ ++none ++ ++ ++unittest ++https ++ ++ ++unit tests for stripcredentials from URL ++ ++ ++unit1621 ++ ++ ++ ++ +diff --git a/tests/unit/Makefile.inc b/tests/unit/Makefile.inc +index 8b1a607..82eaec7 100644 +--- a/tests/unit/Makefile.inc ++++ b/tests/unit/Makefile.inc +@@ -10,7 +10,7 @@ UNITPROGS = unit1300 unit1301 unit1302 unit1303 unit1304 unit1305 unit1307 \ + unit1330 unit1394 unit1395 unit1396 unit1397 unit1398 \ + unit1399 \ + unit1600 unit1601 unit1602 unit1603 unit1604 unit1605 unit1606 unit1607 \ +- unit1608 unit1609 ++ unit1608 unit1609 unit1621 + + unit1300_SOURCES = unit1300.c $(UNITFILES) + unit1300_CPPFLAGS = $(AM_CPPFLAGS) +@@ -95,3 +95,7 @@ unit1608_CPPFLAGS = $(AM_CPPFLAGS) + + unit1609_SOURCES = unit1609.c $(UNITFILES) + unit1609_CPPFLAGS = $(AM_CPPFLAGS) ++ ++unit1621_SOURCES = unit1621.c $(UNITFILES) ++unit1621_CPPFLAGS = $(AM_CPPFLAGS) ++unit1621_LDADD = $(top_builddir)/src/libcurltool.la $(top_builddir)/lib/libcurl.la +diff --git a/tests/unit/unit1621.c b/tests/unit/unit1621.c +new file mode 100644 +index 0000000..6e07b6e +--- /dev/null ++++ b/tests/unit/unit1621.c +@@ -0,0 +1,89 @@ ++/*************************************************************************** ++ * _ _ ____ _ ++ * Project ___| | | | _ \| | ++ * / __| | | | |_) | | ++ * | (__| |_| | _ <| |___ ++ * \___|\___/|_| \_\_____| ++ * ++ * Copyright (C) 1998 - 2019, Daniel Stenberg, , et al. ++ * ++ * This software is licensed as described in the file COPYING, which ++ * you should have received as part of this distribution. The terms ++ * are also available at https://curl.haxx.se/docs/copyright.html. ++ * ++ * You may opt to use, copy, modify, merge, publish, distribute and/or sell ++ * copies of the Software, and permit persons to whom the Software is ++ * furnished to do so, under the terms of the COPYING file. ++ * ++ * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY ++ * KIND, either express or implied. ++ * ++ ***************************************************************************/ ++#include "curlcheck.h" ++ ++#include "urldata.h" ++#include "url.h" ++ ++#include "memdebug.h" /* LAST include file */ ++ ++static CURLcode unit_setup(void) ++{ ++ return CURLE_OK; ++} ++ ++static void unit_stop(void) ++{ ++} ++ ++#ifdef __MINGW32__ ++UNITTEST_START ++{ ++ return 0; ++} ++UNITTEST_STOP ++#else ++ ++bool stripcredentials(char **url); ++ ++struct checkthis { ++ const char *input; ++ const char *output; ++}; ++ ++static struct checkthis tests[] = { ++ { "ninja://foo@example.com", "ninja://foo@example.com" }, ++ { "https://foo@example.com", "https://example.com/" }, ++ { "https://localhost:45", "https://localhost:45/" }, ++ { "https://foo@localhost:45", "https://localhost:45/" }, ++ { "http://daniel:password@localhost", "http://localhost/" }, ++ { "http://daniel@localhost", "http://localhost/" }, ++ { "http://localhost/", "http://localhost/" }, ++ { NULL, NULL } /* end marker */ ++}; ++ ++UNITTEST_START ++{ ++ bool cleanup; ++ char *url; ++ int i; ++ int rc = 0; ++ ++ for(i = 0; tests[i].input; i++) { ++ url = (char *)tests[i].input; ++ cleanup = stripcredentials(&url); ++ printf("Test %u got input \"%s\", output: \"%s\"\n", ++ i, tests[i].input, url); ++ ++ if(strcmp(tests[i].output, url)) { ++ fprintf(stderr, "Test %u got input \"%s\", expected output \"%s\"\n" ++ " Actual output: \"%s\"\n", i, tests[i].input, tests[i].output, ++ url); ++ rc++; ++ } ++ if(cleanup) ++ curl_free(url); ++ } ++ return rc; ++} ++UNITTEST_STOP ++#endif +-- +2.17.2 + diff --git a/SOURCES/0009-curl-7.61.1-CVE-2018-16890.patch b/SOURCES/0009-curl-7.61.1-CVE-2018-16890.patch new file mode 100644 index 0000000..0a15ade --- /dev/null +++ b/SOURCES/0009-curl-7.61.1-CVE-2018-16890.patch @@ -0,0 +1,36 @@ +From 81c0e81531623251a0e78f7779c049f530abe733 Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Wed, 2 Jan 2019 20:33:08 +0100 +Subject: [PATCH] NTLM: fix size check condition for type2 received data + +Bug: https://curl.haxx.se/docs/CVE-2018-16890.html +Reported-by: Wenxiang Qian +CVE-2018-16890 + +Upstream-commit: b780b30d1377adb10bbe774835f49e9b237fb9bb +Signed-off-by: Kamil Dudka +--- + lib/vauth/ntlm.c | 5 +++-- + 1 file changed, 3 insertions(+), 2 deletions(-) + +diff --git a/lib/vauth/ntlm.c b/lib/vauth/ntlm.c +index cdb8d8f..b614cda 100644 +--- a/lib/vauth/ntlm.c ++++ b/lib/vauth/ntlm.c +@@ -182,10 +182,11 @@ static CURLcode ntlm_decode_type2_target(struct Curl_easy *data, + target_info_len = Curl_read16_le(&buffer[40]); + target_info_offset = Curl_read32_le(&buffer[44]); + if(target_info_len > 0) { +- if(((target_info_offset + target_info_len) > size) || ++ if((target_info_offset >= size) || ++ ((target_info_offset + target_info_len) > size) || + (target_info_offset < 48)) { + infof(data, "NTLM handshake failure (bad type-2 message). " +- "Target Info Offset Len is set incorrect by the peer\n"); ++ "Target Info Offset Len is set incorrect by the peer\n"); + return CURLE_BAD_CONTENT_ENCODING; + } + +-- +2.17.2 + diff --git a/SOURCES/0010-curl-7.61.1-CVE-2019-3822.patch b/SOURCES/0010-curl-7.61.1-CVE-2019-3822.patch new file mode 100644 index 0000000..c860817 --- /dev/null +++ b/SOURCES/0010-curl-7.61.1-CVE-2019-3822.patch @@ -0,0 +1,41 @@ +From ab22e3a00f04b458039c21111cfa448051e5777d Mon Sep 17 00:00:00 2001 +From: Daniel Stenberg +Date: Thu, 3 Jan 2019 12:59:28 +0100 +Subject: [PATCH] ntlm: fix *_type3_message size check to avoid buffer overflow + +Bug: https://curl.haxx.se/docs/CVE-2019-3822.html +Reported-by: Wenxiang Qian +CVE-2019-3822 + +Upstream-commit: 50c9484278c63b958655a717844f0721263939cc +Signed-off-by: Kamil Dudka +--- + lib/vauth/ntlm.c | 11 +++++++---- + 1 file changed, 7 insertions(+), 4 deletions(-) + +diff --git a/lib/vauth/ntlm.c b/lib/vauth/ntlm.c +index b614cda..a3a55d9 100644 +--- a/lib/vauth/ntlm.c ++++ b/lib/vauth/ntlm.c +@@ -777,11 +777,14 @@ CURLcode Curl_auth_create_ntlm_type3_message(struct Curl_easy *data, + }); + + #ifdef USE_NTRESPONSES +- if(size < (NTLM_BUFSIZE - ntresplen)) { +- DEBUGASSERT(size == (size_t)ntrespoff); +- memcpy(&ntlmbuf[size], ptr_ntresp, ntresplen); +- size += ntresplen; ++ /* ntresplen + size should not be risking an integer overflow here */ ++ if(ntresplen + size > sizeof(ntlmbuf)) { ++ failf(data, "incoming NTLM message too big"); ++ return CURLE_OUT_OF_MEMORY; + } ++ DEBUGASSERT(size == (size_t)ntrespoff); ++ memcpy(&ntlmbuf[size], ptr_ntresp, ntresplen); ++ size += ntresplen; + + DEBUG_OUT({ + fprintf(stderr, "\n ntresp="); +-- +2.17.2 + diff --git a/SOURCES/0011-curl-7.61.1-CVE-2019-3823.patch b/SOURCES/0011-curl-7.61.1-CVE-2019-3823.patch new file mode 100644 index 0000000..d1d259f --- /dev/null +++ b/SOURCES/0011-curl-7.61.1-CVE-2019-3823.patch @@ -0,0 +1,50 @@ +From d26f1025d0a0a6c602d758a2e0917759492473e9 Mon Sep 17 00:00:00 2001 +From: Daniel Gustafsson +Date: Sat, 19 Jan 2019 00:42:47 +0100 +Subject: [PATCH] smtp: avoid risk of buffer overflow in strtol + +If the incoming len 5, but the buffer does not have a termination +after 5 bytes, the strtol() call may keep reading through the line +buffer until is exceeds its boundary. Fix by ensuring that we are +using a bounded read with a temporary buffer on the stack. + +Bug: https://curl.haxx.se/docs/CVE-2019-3823.html +Reported-by: Brian Carpenter (Geeknik Labs) +CVE-2019-3823 + +Upstream-commit: 39df4073e5413fcdbb5a38da0c1ce6f1c0ceb484 +Signed-off-by: Kamil Dudka +--- + lib/smtp.c | 8 ++++++-- + 1 file changed, 6 insertions(+), 2 deletions(-) + +diff --git a/lib/smtp.c b/lib/smtp.c +index ecf10a4..1b9f92d 100644 +--- a/lib/smtp.c ++++ b/lib/smtp.c +@@ -5,7 +5,7 @@ + * | (__| |_| | _ <| |___ + * \___|\___/|_| \_\_____| + * +- * Copyright (C) 1998 - 2018, Daniel Stenberg, , et al. ++ * Copyright (C) 1998 - 2019, Daniel Stenberg, , et al. + * + * This software is licensed as described in the file COPYING, which + * you should have received as part of this distribution. The terms +@@ -207,8 +207,12 @@ static bool smtp_endofresp(struct connectdata *conn, char *line, size_t len, + Section 4. Examples of RFC-4954 but some e-mail servers ignore this and + only send the response code instead as per Section 4.2. */ + if(line[3] == ' ' || len == 5) { ++ char tmpline[6]; ++ + result = TRUE; +- *resp = curlx_sltosi(strtol(line, NULL, 10)); ++ memset(tmpline, '\0', sizeof(tmpline)); ++ memcpy(tmpline, line, (len == 5 ? 5 : 3)); ++ *resp = curlx_sltosi(strtol(tmpline, NULL, 10)); + + /* Make sure real server never sends internal value */ + if(*resp == 1) +-- +2.17.2 + diff --git a/SOURCES/0014-curl-7.61.1-libssh-socket.patch b/SOURCES/0014-curl-7.61.1-libssh-socket.patch new file mode 100644 index 0000000..83c9cc7 --- /dev/null +++ b/SOURCES/0014-curl-7.61.1-libssh-socket.patch @@ -0,0 +1,66 @@ +From 095d4cf3b1c388b2871e3783f8c41b1e01200a25 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Felix=20H=C3=A4dicke?= +Date: Wed, 23 Jan 2019 23:47:55 +0100 +Subject: [PATCH] libssh: do not let libssh create socket + +By default, libssh creates a new socket, instead of using the socket +created by curl for SSH connections. + +Pass the socket created by curl to libssh using ssh_options_set() with +SSH_OPTIONS_FD directly after ssh_new(). So libssh uses our socket +instead of creating a new one. + +This approach is very similar to what is done in the libssh2 code, where +the socket created by curl is passed to libssh2 when +libssh2_session_startup() is called. + +Fixes #3491 +Closes #3495 + +Upstream-commit: 15c94b310bf9e0c92d71fca5a88eb67a1e2548a6 +Signed-off-by: Kamil Dudka +--- + lib/ssh-libssh.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/lib/ssh-libssh.c b/lib/ssh-libssh.c +index 7d59089..4110be2 100644 +--- a/lib/ssh-libssh.c ++++ b/lib/ssh-libssh.c +@@ -549,6 +549,7 @@ static CURLcode myssh_statemach_act(struct connectdata *conn, bool *block) + struct Curl_easy *data = conn->data; + struct SSHPROTO *protop = data->req.protop; + struct ssh_conn *sshc = &conn->proto.sshc; ++ curl_socket_t sock = conn->sock[FIRSTSOCKET]; + int rc = SSH_NO_ERROR, err; + char *new_readdir_line; + int seekerr = CURL_SEEKFUNC_OK; +@@ -792,7 +793,7 @@ static CURLcode myssh_statemach_act(struct connectdata *conn, bool *block) + + Curl_pgrsTime(conn->data, TIMER_APPCONNECT); /* SSH is connected */ + +- conn->sockfd = ssh_get_fd(sshc->ssh_session); ++ conn->sockfd = sock; + conn->writesockfd = CURL_SOCKET_BAD; + + if(conn->handler->protocol == CURLPROTO_SFTP) { +@@ -2048,6 +2049,7 @@ static CURLcode myssh_connect(struct connectdata *conn, bool *done) + { + struct ssh_conn *ssh; + CURLcode result; ++ curl_socket_t sock = conn->sock[FIRSTSOCKET]; + struct Curl_easy *data = conn->data; + int rc; + +@@ -2076,6 +2078,8 @@ static CURLcode myssh_connect(struct connectdata *conn, bool *done) + return CURLE_FAILED_INIT; + } + ++ ssh_options_set(ssh->ssh_session, SSH_OPTIONS_FD, &sock); ++ + if(conn->user) { + infof(data, "User: %s\n", conn->user); + ssh_options_set(ssh->ssh_session, SSH_OPTIONS_USER, conn->user); +-- +2.17.2 + diff --git a/SPECS/curl.spec b/SPECS/curl.spec index 1ec256f..41725c9 100644 --- a/SPECS/curl.spec +++ b/SPECS/curl.spec @@ -1,7 +1,7 @@ Summary: A utility for getting files from remote servers (FTP, HTTP, and others) Name: curl Version: 7.61.1 -Release: 8%{?dist} +Release: 11%{?dist} License: MIT Source: https://curl.haxx.se/download/%{name}-%{version}.tar.xz @@ -28,6 +28,21 @@ Patch6: 0006-curl-7.61.1-CVE-2018-16839.patch # curl -J: do not append to the destination file (#1660827) Patch7: 0007-curl-7.63.0-JO-preserve-local-file.patch +# xattr: strip credentials from any URL that is stored (CVE-2018-20483) +Patch8: 0008-curl-7.61.1-CVE-2018-20483.patch + +# fix NTLM type-2 out-of-bounds buffer read (CVE-2018-16890) +Patch9: 0009-curl-7.61.1-CVE-2018-16890.patch + +# fix NTLMv2 type-3 header stack buffer overflow (CVE-2019-3822) +Patch10: 0010-curl-7.61.1-CVE-2019-3822.patch + +# fix SMTP end-of-response out-of-bounds read (CVE-2019-3823) +Patch11: 0011-curl-7.61.1-CVE-2019-3823.patch + +# do not let libssh create a new socket for SCP/SFTP (#1669156) +Patch14: 0014-curl-7.61.1-libssh-socket.patch + # patch making libcurl multilib ready Patch101: 0101-curl-7.32.0-multilib.patch @@ -194,6 +209,11 @@ git apply %{PATCH4} %patch5 -p1 %patch6 -p1 %patch7 -p1 +%patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch14 -p1 # Fedora patches %patch101 -p1 @@ -360,6 +380,18 @@ rm -f ${RPM_BUILD_ROOT}%{_libdir}/libcurl.la %{_libdir}/libcurl.so.4.[0-9].[0-9].minimal %changelog +* Mon May 13 2019 Kamil Dudka - 7.61.1-11 +- rebuild with updated annobin to prevent Execshield RPMDiff check from failing + +* Fri May 10 2019 Kamil Dudka - 7.61.1-10 +- fix SMTP end-of-response out-of-bounds read (CVE-2019-3823) +- fix NTLMv2 type-3 header stack buffer overflow (CVE-2019-3822) +- fix NTLM type-2 out-of-bounds buffer read (CVE-2018-16890) +- xattr: strip credentials from any URL that is stored (CVE-2018-20483) + +* Mon Feb 18 2019 Kamil Dudka - 7.61.1-9 +- do not let libssh create a new socket for SCP/SFTP (#1669156) + * Fri Jan 11 2019 Kamil Dudka - 7.61.1-8 - curl -J: do not append to the destination file (#1660827)