Blame SOURCES/wget-1.14-support-non-ASCII-characters.patch

87e294
From 0a33fa22c597234ab133f63127b4a5e00cf048b9 Mon Sep 17 00:00:00 2001
87e294
From: Tomas Hozza <thozza@redhat.com>
87e294
Date: Mon, 20 Jun 2016 12:10:38 +0200
87e294
Subject: [PATCH] Support non-ASCII characters
87e294
87e294
Upstream commit 59b920874daa565a1323ffa1e756e80493190686
87e294
87e294
Signed-off-by: Tomas Hozza <thozza@redhat.com>
87e294
---
87e294
 src/url.c             | 87 +++++++++++++++++++++++++++++++++++++++++++++++++--
87e294
 tests/Test-ftp-iri.px |  4 +--
87e294
 2 files changed, 87 insertions(+), 4 deletions(-)
87e294
87e294
diff --git a/src/url.c b/src/url.c
87e294
index 6bca719..d0d9e27 100644
87e294
--- a/src/url.c
87e294
+++ b/src/url.c
87e294
@@ -42,6 +42,11 @@ as that of the covered work.  */
87e294
 #include "url.h"
87e294
 #include "host.h"  /* for is_valid_ipv6_address */
87e294
 
87e294
+#if HAVE_ICONV
87e294
+#include <iconv.h>
87e294
+#include <langinfo.h>
87e294
+#endif
87e294
+
87e294
 #ifdef __VMS
87e294
 #include "vms.h"
87e294
 #endif /* def __VMS */
87e294
@@ -1335,8 +1340,8 @@ UWC,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
87e294
   0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */
87e294
   0,  0,  0,  0,   W,  0,  0,  C,   /* x   y   z   {    |   }   ~   DEL */
87e294
 
87e294
-  C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 128-143 */
87e294
-  C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 144-159 */
87e294
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0, /* 128-143 */
87e294
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0, /* 144-159 */
87e294
   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
87e294
   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
87e294
 
87e294
@@ -1456,6 +1461,82 @@ append_uri_pathel (const char *b, const char *e, bool escaped,
87e294
   TAIL_INCR (dest, outlen);
87e294
 }
87e294
 
87e294
+static char *
87e294
+convert_fname (const char *fname)
87e294
+{
87e294
+  char *converted_fname = (char *)fname;
87e294
+#if HAVE_ICONV
87e294
+  const char *from_encoding = opt.encoding_remote;
87e294
+  const char *to_encoding = opt.locale;
87e294
+  iconv_t cd;
87e294
+  size_t len, done, inlen, outlen;
87e294
+  char *s;
87e294
+  const char *orig_fname = fname;;
87e294
+
87e294
+  /* Defaults for remote and local encodings.  */
87e294
+  if (!from_encoding)
87e294
+    from_encoding = "UTF-8";
87e294
+  if (!to_encoding)
87e294
+    to_encoding = nl_langinfo (CODESET);
87e294
+
87e294
+  cd = iconv_open (to_encoding, from_encoding);
87e294
+  if (cd == (iconv_t)(-1))
87e294
+    logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"),
87e294
+	       quote (from_encoding), quote (to_encoding));
87e294
+  else
87e294
+    {
87e294
+      inlen = strlen (fname);
87e294
+      len = outlen = inlen * 2;
87e294
+      converted_fname = s = xmalloc (outlen + 1);
87e294
+      done = 0;
87e294
+
87e294
+      for (;;)
87e294
+	{
87e294
+	  if (iconv (cd, &fname, &inlen, &s, &outlen) != (size_t)(-1)
87e294
+	      && iconv (cd, NULL, NULL, &s, &outlen) != (size_t)(-1))
87e294
+	    {
87e294
+	      *(converted_fname + len - outlen - done) = '\0';
87e294
+	      iconv_close(cd);
87e294
+	      DEBUGP (("Converted file name '%s' (%s) -> '%s' (%s)\n",
87e294
+		       orig_fname, from_encoding, converted_fname, to_encoding));
87e294
+	      xfree (orig_fname);
87e294
+	      return converted_fname;
87e294
+	    }
87e294
+
87e294
+	  /* Incomplete or invalid multibyte sequence */
87e294
+	  if (errno == EINVAL || errno == EILSEQ)
87e294
+	    {
87e294
+	      logprintf (LOG_VERBOSE,
87e294
+			 _("Incomplete or invalid multibyte sequence encountered\n"));
87e294
+	      xfree (converted_fname);
87e294
+	      converted_fname = (char *)orig_fname;
87e294
+	      break;
87e294
+	    }
87e294
+	  else if (errno == E2BIG) /* Output buffer full */
87e294
+	    {
87e294
+	      done = len;
87e294
+	      len = outlen = done + inlen * 2;
87e294
+	      converted_fname = xrealloc (converted_fname, outlen + 1);
87e294
+	      s = converted_fname + done;
87e294
+	    }
87e294
+	  else /* Weird, we got an unspecified error */
87e294
+	    {
87e294
+	      logprintf (LOG_VERBOSE, _("Unhandled errno %d\n"), errno);
87e294
+	      xfree (converted_fname);
87e294
+	      converted_fname = (char *)orig_fname;
87e294
+	      break;
87e294
+	    }
87e294
+	}
87e294
+      DEBUGP (("Failed to convert file name '%s' (%s) -> '?' (%s)\n",
87e294
+	       orig_fname, from_encoding, to_encoding));
87e294
+    }
87e294
+
87e294
+    iconv_close(cd);
87e294
+#endif
87e294
+
87e294
+  return converted_fname;
87e294
+}
87e294
+
87e294
 /* Append to DEST the directory structure that corresponds the
87e294
    directory part of URL's path.  For example, if the URL is
87e294
    http://server/dir1/dir2/file, this appends "/dir1/dir2".
87e294
@@ -1582,6 +1663,8 @@ url_file_name (const struct url *u, char *replaced_filename)
87e294
 
87e294
   fname = fnres.base;
87e294
 
87e294
+  fname = convert_fname (fname);
87e294
+
87e294
   /* Check the cases in which the unique extensions are not used:
87e294
      1) Clobbering is turned off (-nc).
87e294
      2) Retrieval with regetting.
87e294
diff --git a/tests/Test-ftp-iri.px b/tests/Test-ftp-iri.px
87e294
index a4b7fe1..24ac467 100755
87e294
--- a/tests/Test-ftp-iri.px
87e294
+++ b/tests/Test-ftp-iri.px
87e294
@@ -26,12 +26,12 @@ my %urls = (
87e294
     },
87e294
 );
87e294
 
87e294
-my $cmdline = $WgetTest::WGETPATH . " --local-encoding=iso-8859-1 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
87e294
+my $cmdline = $WgetTest::WGETPATH . " --local-encoding=iso-8859-1 --remote-encoding=utf-8 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
87e294
 
87e294
 my $expected_error_code = 0;
87e294
 
87e294
 my %expected_downloaded_files = (
87e294
-    "fran${ccedilla_u8}ais.txt" => {
87e294
+    "fran${ccedilla_l1}ais.txt" => {
87e294
         content => $francais,
87e294
     },
87e294
 );
87e294
-- 
87e294
2.5.5
87e294