|
|
bc22e6 |
From 0a33fa22c597234ab133f63127b4a5e00cf048b9 Mon Sep 17 00:00:00 2001
|
|
|
bc22e6 |
From: Tomas Hozza <thozza@redhat.com>
|
|
|
bc22e6 |
Date: Mon, 20 Jun 2016 12:10:38 +0200
|
|
|
bc22e6 |
Subject: [PATCH] Support non-ASCII characters
|
|
|
bc22e6 |
|
|
|
bc22e6 |
Upstream commit 59b920874daa565a1323ffa1e756e80493190686
|
|
|
bc22e6 |
|
|
|
bc22e6 |
Signed-off-by: Tomas Hozza <thozza@redhat.com>
|
|
|
bc22e6 |
---
|
|
|
bc22e6 |
src/url.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++--
|
|
|
bc22e6 |
tests/Test-ftp-iri.px | 4 +--
|
|
|
bc22e6 |
2 files changed, 87 insertions(+), 4 deletions(-)
|
|
|
bc22e6 |
|
|
|
bc22e6 |
diff --git a/src/url.c b/src/url.c
|
|
|
bc22e6 |
index 6bca719..d0d9e27 100644
|
|
|
bc22e6 |
--- a/src/url.c
|
|
|
bc22e6 |
+++ b/src/url.c
|
|
|
bc22e6 |
@@ -42,6 +42,11 @@ as that of the covered work. */
|
|
|
bc22e6 |
#include "url.h"
|
|
|
bc22e6 |
#include "host.h" /* for is_valid_ipv6_address */
|
|
|
bc22e6 |
|
|
|
bc22e6 |
+#if HAVE_ICONV
|
|
|
bc22e6 |
+#include <iconv.h>
|
|
|
bc22e6 |
+#include <langinfo.h>
|
|
|
bc22e6 |
+#endif
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
#ifdef __VMS
|
|
|
bc22e6 |
#include "vms.h"
|
|
|
bc22e6 |
#endif /* def __VMS */
|
|
|
bc22e6 |
@@ -1335,8 +1340,8 @@ UWC, C, C, C, C, C, C, C, /* NUL SOH STX ETX EOT ENQ ACK BEL */
|
|
|
bc22e6 |
0, 0, 0, 0, 0, 0, 0, 0, /* p q r s t u v w */
|
|
|
bc22e6 |
0, 0, 0, 0, W, 0, 0, C, /* x y z { | } ~ DEL */
|
|
|
bc22e6 |
|
|
|
bc22e6 |
- C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 128-143 */
|
|
|
bc22e6 |
- C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, C, /* 144-159 */
|
|
|
bc22e6 |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 128-143 */
|
|
|
bc22e6 |
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 144-159 */
|
|
|
bc22e6 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
bc22e6 |
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
bc22e6 |
|
|
|
bc22e6 |
@@ -1456,6 +1461,82 @@ append_uri_pathel (const char *b, const char *e, bool escaped,
|
|
|
bc22e6 |
TAIL_INCR (dest, outlen);
|
|
|
bc22e6 |
}
|
|
|
bc22e6 |
|
|
|
bc22e6 |
+static char *
|
|
|
bc22e6 |
+convert_fname (const char *fname)
|
|
|
bc22e6 |
+{
|
|
|
bc22e6 |
+ char *converted_fname = (char *)fname;
|
|
|
bc22e6 |
+#if HAVE_ICONV
|
|
|
bc22e6 |
+ const char *from_encoding = opt.encoding_remote;
|
|
|
bc22e6 |
+ const char *to_encoding = opt.locale;
|
|
|
bc22e6 |
+ iconv_t cd;
|
|
|
bc22e6 |
+ size_t len, done, inlen, outlen;
|
|
|
bc22e6 |
+ char *s;
|
|
|
bc22e6 |
+ const char *orig_fname = fname;;
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
+ /* Defaults for remote and local encodings. */
|
|
|
bc22e6 |
+ if (!from_encoding)
|
|
|
bc22e6 |
+ from_encoding = "UTF-8";
|
|
|
bc22e6 |
+ if (!to_encoding)
|
|
|
bc22e6 |
+ to_encoding = nl_langinfo (CODESET);
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
+ cd = iconv_open (to_encoding, from_encoding);
|
|
|
bc22e6 |
+ if (cd == (iconv_t)(-1))
|
|
|
bc22e6 |
+ logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"),
|
|
|
bc22e6 |
+ quote (from_encoding), quote (to_encoding));
|
|
|
bc22e6 |
+ else
|
|
|
bc22e6 |
+ {
|
|
|
bc22e6 |
+ inlen = strlen (fname);
|
|
|
bc22e6 |
+ len = outlen = inlen * 2;
|
|
|
bc22e6 |
+ converted_fname = s = xmalloc (outlen + 1);
|
|
|
bc22e6 |
+ done = 0;
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
+ for (;;)
|
|
|
bc22e6 |
+ {
|
|
|
bc22e6 |
+ if (iconv (cd, &fname, &inlen, &s, &outlen) != (size_t)(-1)
|
|
|
bc22e6 |
+ && iconv (cd, NULL, NULL, &s, &outlen) != (size_t)(-1))
|
|
|
bc22e6 |
+ {
|
|
|
bc22e6 |
+ *(converted_fname + len - outlen - done) = '\0';
|
|
|
bc22e6 |
+ iconv_close(cd);
|
|
|
bc22e6 |
+ DEBUGP (("Converted file name '%s' (%s) -> '%s' (%s)\n",
|
|
|
bc22e6 |
+ orig_fname, from_encoding, converted_fname, to_encoding));
|
|
|
bc22e6 |
+ xfree (orig_fname);
|
|
|
bc22e6 |
+ return converted_fname;
|
|
|
bc22e6 |
+ }
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
+ /* Incomplete or invalid multibyte sequence */
|
|
|
bc22e6 |
+ if (errno == EINVAL || errno == EILSEQ)
|
|
|
bc22e6 |
+ {
|
|
|
bc22e6 |
+ logprintf (LOG_VERBOSE,
|
|
|
bc22e6 |
+ _("Incomplete or invalid multibyte sequence encountered\n"));
|
|
|
bc22e6 |
+ xfree (converted_fname);
|
|
|
bc22e6 |
+ converted_fname = (char *)orig_fname;
|
|
|
bc22e6 |
+ break;
|
|
|
bc22e6 |
+ }
|
|
|
bc22e6 |
+ else if (errno == E2BIG) /* Output buffer full */
|
|
|
bc22e6 |
+ {
|
|
|
bc22e6 |
+ done = len;
|
|
|
bc22e6 |
+ len = outlen = done + inlen * 2;
|
|
|
bc22e6 |
+ converted_fname = xrealloc (converted_fname, outlen + 1);
|
|
|
bc22e6 |
+ s = converted_fname + done;
|
|
|
bc22e6 |
+ }
|
|
|
bc22e6 |
+ else /* Weird, we got an unspecified error */
|
|
|
bc22e6 |
+ {
|
|
|
bc22e6 |
+ logprintf (LOG_VERBOSE, _("Unhandled errno %d\n"), errno);
|
|
|
bc22e6 |
+ xfree (converted_fname);
|
|
|
bc22e6 |
+ converted_fname = (char *)orig_fname;
|
|
|
bc22e6 |
+ break;
|
|
|
bc22e6 |
+ }
|
|
|
bc22e6 |
+ }
|
|
|
bc22e6 |
+ DEBUGP (("Failed to convert file name '%s' (%s) -> '?' (%s)\n",
|
|
|
bc22e6 |
+ orig_fname, from_encoding, to_encoding));
|
|
|
bc22e6 |
+ }
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
+ iconv_close(cd);
|
|
|
bc22e6 |
+#endif
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
+ return converted_fname;
|
|
|
bc22e6 |
+}
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
/* Append to DEST the directory structure that corresponds the
|
|
|
bc22e6 |
directory part of URL's path. For example, if the URL is
|
|
|
bc22e6 |
http://server/dir1/dir2/file, this appends "/dir1/dir2".
|
|
|
bc22e6 |
@@ -1582,6 +1663,8 @@ url_file_name (const struct url *u, char *replaced_filename)
|
|
|
bc22e6 |
|
|
|
bc22e6 |
fname = fnres.base;
|
|
|
bc22e6 |
|
|
|
bc22e6 |
+ fname = convert_fname (fname);
|
|
|
bc22e6 |
+
|
|
|
bc22e6 |
/* Check the cases in which the unique extensions are not used:
|
|
|
bc22e6 |
1) Clobbering is turned off (-nc).
|
|
|
bc22e6 |
2) Retrieval with regetting.
|
|
|
bc22e6 |
diff --git a/tests/Test-ftp-iri.px b/tests/Test-ftp-iri.px
|
|
|
bc22e6 |
index a4b7fe1..24ac467 100755
|
|
|
bc22e6 |
--- a/tests/Test-ftp-iri.px
|
|
|
bc22e6 |
+++ b/tests/Test-ftp-iri.px
|
|
|
bc22e6 |
@@ -26,12 +26,12 @@ my %urls = (
|
|
|
bc22e6 |
},
|
|
|
bc22e6 |
);
|
|
|
bc22e6 |
|
|
|
bc22e6 |
-my $cmdline = $WgetTest::WGETPATH . " --local-encoding=iso-8859-1 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
|
|
|
bc22e6 |
+my $cmdline = $WgetTest::WGETPATH . " --local-encoding=iso-8859-1 --remote-encoding=utf-8 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
|
|
|
bc22e6 |
|
|
|
bc22e6 |
my $expected_error_code = 0;
|
|
|
bc22e6 |
|
|
|
bc22e6 |
my %expected_downloaded_files = (
|
|
|
bc22e6 |
- "fran${ccedilla_u8}ais.txt" => {
|
|
|
bc22e6 |
+ "fran${ccedilla_l1}ais.txt" => {
|
|
|
bc22e6 |
content => $francais,
|
|
|
bc22e6 |
},
|
|
|
bc22e6 |
);
|
|
|
bc22e6 |
--
|
|
|
bc22e6 |
2.5.5
|
|
|
bc22e6 |
|