Blame SOURCES/wget-1.14-support-non-ASCII-characters.patch

dba8a8
From 0a33fa22c597234ab133f63127b4a5e00cf048b9 Mon Sep 17 00:00:00 2001
dba8a8
From: Tomas Hozza <thozza@redhat.com>
dba8a8
Date: Mon, 20 Jun 2016 12:10:38 +0200
dba8a8
Subject: [PATCH] Support non-ASCII characters
dba8a8
dba8a8
Upstream commit 59b920874daa565a1323ffa1e756e80493190686
dba8a8
dba8a8
Signed-off-by: Tomas Hozza <thozza@redhat.com>
dba8a8
---
dba8a8
 src/url.c             | 87 +++++++++++++++++++++++++++++++++++++++++++++++++--
dba8a8
 tests/Test-ftp-iri.px |  4 +--
dba8a8
 2 files changed, 87 insertions(+), 4 deletions(-)
dba8a8
dba8a8
diff --git a/src/url.c b/src/url.c
dba8a8
index 6bca719..d0d9e27 100644
dba8a8
--- a/src/url.c
dba8a8
+++ b/src/url.c
dba8a8
@@ -42,6 +42,11 @@ as that of the covered work.  */
dba8a8
 #include "url.h"
dba8a8
 #include "host.h"  /* for is_valid_ipv6_address */
dba8a8
 
dba8a8
+#if HAVE_ICONV
dba8a8
+#include <iconv.h>
dba8a8
+#include <langinfo.h>
dba8a8
+#endif
dba8a8
+
dba8a8
 #ifdef __VMS
dba8a8
 #include "vms.h"
dba8a8
 #endif /* def __VMS */
dba8a8
@@ -1335,8 +1340,8 @@ UWC,  C,  C,  C,   C,  C,  C,  C,   /* NUL SOH STX ETX  EOT ENQ ACK BEL */
dba8a8
   0,  0,  0,  0,   0,  0,  0,  0,   /* p   q   r   s    t   u   v   w   */
dba8a8
   0,  0,  0,  0,   W,  0,  0,  C,   /* x   y   z   {    |   }   ~   DEL */
dba8a8
 
dba8a8
-  C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 128-143 */
dba8a8
-  C, C, C, C,  C, C, C, C,  C, C, C, C,  C, C, C, C, /* 144-159 */
dba8a8
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0, /* 128-143 */
dba8a8
+  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0, /* 144-159 */
dba8a8
   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
dba8a8
   0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,  0, 0, 0, 0,
dba8a8
 
dba8a8
@@ -1456,6 +1461,82 @@ append_uri_pathel (const char *b, const char *e, bool escaped,
dba8a8
   TAIL_INCR (dest, outlen);
dba8a8
 }
dba8a8
 
dba8a8
+static char *
dba8a8
+convert_fname (const char *fname)
dba8a8
+{
dba8a8
+  char *converted_fname = (char *)fname;
dba8a8
+#if HAVE_ICONV
dba8a8
+  const char *from_encoding = opt.encoding_remote;
dba8a8
+  const char *to_encoding = opt.locale;
dba8a8
+  iconv_t cd;
dba8a8
+  size_t len, done, inlen, outlen;
dba8a8
+  char *s;
dba8a8
+  const char *orig_fname = fname;;
dba8a8
+
dba8a8
+  /* Defaults for remote and local encodings.  */
dba8a8
+  if (!from_encoding)
dba8a8
+    from_encoding = "UTF-8";
dba8a8
+  if (!to_encoding)
dba8a8
+    to_encoding = nl_langinfo (CODESET);
dba8a8
+
dba8a8
+  cd = iconv_open (to_encoding, from_encoding);
dba8a8
+  if (cd == (iconv_t)(-1))
dba8a8
+    logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"),
dba8a8
+	       quote (from_encoding), quote (to_encoding));
dba8a8
+  else
dba8a8
+    {
dba8a8
+      inlen = strlen (fname);
dba8a8
+      len = outlen = inlen * 2;
dba8a8
+      converted_fname = s = xmalloc (outlen + 1);
dba8a8
+      done = 0;
dba8a8
+
dba8a8
+      for (;;)
dba8a8
+	{
dba8a8
+	  if (iconv (cd, &fname, &inlen, &s, &outlen) != (size_t)(-1)
dba8a8
+	      && iconv (cd, NULL, NULL, &s, &outlen) != (size_t)(-1))
dba8a8
+	    {
dba8a8
+	      *(converted_fname + len - outlen - done) = '\0';
dba8a8
+	      iconv_close(cd);
dba8a8
+	      DEBUGP (("Converted file name '%s' (%s) -> '%s' (%s)\n",
dba8a8
+		       orig_fname, from_encoding, converted_fname, to_encoding));
dba8a8
+	      xfree (orig_fname);
dba8a8
+	      return converted_fname;
dba8a8
+	    }
dba8a8
+
dba8a8
+	  /* Incomplete or invalid multibyte sequence */
dba8a8
+	  if (errno == EINVAL || errno == EILSEQ)
dba8a8
+	    {
dba8a8
+	      logprintf (LOG_VERBOSE,
dba8a8
+			 _("Incomplete or invalid multibyte sequence encountered\n"));
dba8a8
+	      xfree (converted_fname);
dba8a8
+	      converted_fname = (char *)orig_fname;
dba8a8
+	      break;
dba8a8
+	    }
dba8a8
+	  else if (errno == E2BIG) /* Output buffer full */
dba8a8
+	    {
dba8a8
+	      done = len;
dba8a8
+	      len = outlen = done + inlen * 2;
dba8a8
+	      converted_fname = xrealloc (converted_fname, outlen + 1);
dba8a8
+	      s = converted_fname + done;
dba8a8
+	    }
dba8a8
+	  else /* Weird, we got an unspecified error */
dba8a8
+	    {
dba8a8
+	      logprintf (LOG_VERBOSE, _("Unhandled errno %d\n"), errno);
dba8a8
+	      xfree (converted_fname);
dba8a8
+	      converted_fname = (char *)orig_fname;
dba8a8
+	      break;
dba8a8
+	    }
dba8a8
+	}
dba8a8
+      DEBUGP (("Failed to convert file name '%s' (%s) -> '?' (%s)\n",
dba8a8
+	       orig_fname, from_encoding, to_encoding));
dba8a8
+    }
dba8a8
+
dba8a8
+    iconv_close(cd);
dba8a8
+#endif
dba8a8
+
dba8a8
+  return converted_fname;
dba8a8
+}
dba8a8
+
dba8a8
 /* Append to DEST the directory structure that corresponds the
dba8a8
    directory part of URL's path.  For example, if the URL is
dba8a8
    http://server/dir1/dir2/file, this appends "/dir1/dir2".
dba8a8
@@ -1582,6 +1663,8 @@ url_file_name (const struct url *u, char *replaced_filename)
dba8a8
 
dba8a8
   fname = fnres.base;
dba8a8
 
dba8a8
+  fname = convert_fname (fname);
dba8a8
+
dba8a8
   /* Check the cases in which the unique extensions are not used:
dba8a8
      1) Clobbering is turned off (-nc).
dba8a8
      2) Retrieval with regetting.
dba8a8
diff --git a/tests/Test-ftp-iri.px b/tests/Test-ftp-iri.px
dba8a8
index a4b7fe1..24ac467 100755
dba8a8
--- a/tests/Test-ftp-iri.px
dba8a8
+++ b/tests/Test-ftp-iri.px
dba8a8
@@ -26,12 +26,12 @@ my %urls = (
dba8a8
     },
dba8a8
 );
dba8a8
 
dba8a8
-my $cmdline = $WgetTest::WGETPATH . " --local-encoding=iso-8859-1 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
dba8a8
+my $cmdline = $WgetTest::WGETPATH . " --local-encoding=iso-8859-1 --remote-encoding=utf-8 -S ftp://localhost:{{port}}/fran${ccedilla_l1}ais.txt";
dba8a8
 
dba8a8
 my $expected_error_code = 0;
dba8a8
 
dba8a8
 my %expected_downloaded_files = (
dba8a8
-    "fran${ccedilla_u8}ais.txt" => {
dba8a8
+    "fran${ccedilla_l1}ais.txt" => {
dba8a8
         content => $francais,
dba8a8
     },
dba8a8
 );
dba8a8
-- 
dba8a8
2.5.5
dba8a8