Blob Blame History Raw
From 3af65474cbec9485534ad00d604c442e42095ee5 Mon Sep 17 00:00:00 2001
From: Kamil Dudka <kdudka@redhat.com>
Date: Mon, 21 Mar 2022 15:57:49 +0100
Subject: [PATCH] backport IDN2 support from lynx2.9.0dev.10

---
 WWW/Library/Implementation/HTParse.c | 49 +++++++++++++++++++++++++---
 WWW/Library/Implementation/HTParse.h | 12 +++++++
 aclocal.m4                           | 31 ++++++++++++++----
 config.hin                           |  1 +
 src/LYMain.c                         |  3 ++
 src/LYOptions.c                      | 33 +++++++++++++++++++
 src/LYrcFile.c                       | 14 ++++++++
 src/LYrcFile.h                       |  1 +
 8 files changed, 132 insertions(+), 12 deletions(-)

diff --git a/WWW/Library/Implementation/HTParse.c b/WWW/Library/Implementation/HTParse.c
index c5d947f..f1f8208 100644
--- a/WWW/Library/Implementation/HTParse.c
+++ b/WWW/Library/Implementation/HTParse.c
@@ -22,9 +22,14 @@
 #endif /* __MINGW32__ */
 #endif
 
-#ifdef USE_IDNA
+#ifdef USE_IDN2
+#include <idn2.h>
+#define FreeIdna(out)             idn2_free(out)
+#elif defined(USE_IDNA)
 #include <idna.h>
 #include <idn-free.h>
+#define FreeIdna(out)             idn_free(out)
+#define IDN2_OK                   IDNA_SUCCESS
 #endif
 
 #define HEX_ESCAPE '%'
@@ -242,7 +247,7 @@ char *HTParsePort(char *host, int *portp)
     return result;
 }
 
-#ifdef USE_IDNA
+#if defined(USE_IDNA) || defined(USE_IDN2)
 static int hex_decode(int ch)
 {
     int result = -1;
@@ -299,8 +304,42 @@ static void convert_to_idna(char *host)
 	}
 	if (code) {
 	    *dst = '\0';
+#ifdef USE_IDN2
+#if (!defined(IDN2_VERSION_NUMBER) || IDN2_VERSION_NUMBER < 0x02000003)
+	    /*
+	     * Older libidn2 mishandles STD3, stripping underscores.
+	     */
+	    if (strchr(buffer, '_') != NULL) {
+		code = -1;
+	    } else
+#endif
+		switch (LYidnaMode) {
+		case LYidna2003:
+		    code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL);
+		    break;
+		case LYidna2008:
+		    /* IDNA2008 rules without the TR46 amendments */
+		    code = idn2_to_ascii_8z(buffer, &output, 0);
+		    break;
+		case LYidnaTR46:
+		    code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL
+					    | IDN2_NFC_INPUT);
+		    break;
+		case LYidnaCompat:
+		    /* IDNA2008 */
+		    code = idn2_to_ascii_8z(buffer, &output, IDN2_NONTRANSITIONAL
+					    | IDN2_NFC_INPUT);
+		    if (code == IDN2_DISALLOWED) {
+			/* IDNA2003 - compatible */
+			code = idn2_to_ascii_8z(buffer, &output, IDN2_TRANSITIONAL);
+		    }
+		    break;
+		}
+#else
 	    code = idna_to_ascii_8z(buffer, &output, IDNA_USE_STD3_ASCII_RULES);
-	    if (code == IDNA_SUCCESS) {
+#endif
+	    if (code == IDN2_OK) {
+		CTRACE((tfp, "convert_to_idna: `%s' -> `%s': OK\n", buffer, output));
 		strcpy(host, output);
 		strcat(host, params);
 	    } else {
@@ -309,7 +348,7 @@ static void convert_to_idna(char *host)
 			idna_strerror((Idna_rc) code)));
 	    }
 	    if (output)
-		idn_free(output);
+		FreeIdna(output);
 	}
     }
     free(buffer);
@@ -539,7 +578,7 @@ char *HTParse(const char *aName,
 		    }
 		}
 	    }
-#ifdef USE_IDNA
+#if defined(USE_IDNA) || defined(USE_IDN2)
 	    /*
 	     * Depending on locale-support, we could have a literal UTF-8
 	     * string as a host name, or a URL-encoded form of that.
diff --git a/WWW/Library/Implementation/HTParse.h b/WWW/Library/Implementation/HTParse.h
index ce1bff6..5496d82 100644
--- a/WWW/Library/Implementation/HTParse.h
+++ b/WWW/Library/Implementation/HTParse.h
@@ -49,6 +49,18 @@ extern "C" {
 #define URL_XALPHAS     UCH(1)
 #define URL_XPALPHAS    UCH(2)
 #define URL_PATH        UCH(4)
+
+#ifdef USE_IDN2
+    typedef enum {
+	LYidna2003 = 1,
+	LYidna2008,
+	LYidnaTR46,
+	LYidnaCompat
+    } HTIdnaModes;
+
+    extern int LYidnaMode;
+#endif
+
 /*	Strip white space off a string.				HTStrip()
  *	-------------------------------
  *
diff --git a/aclocal.m4 b/aclocal.m4
index 41139f9..4c68aec 100644
--- a/aclocal.m4
+++ b/aclocal.m4
@@ -3341,11 +3341,12 @@ test -d "$oldincludedir" && {
 $1="[$]$1 $cf_header_path_list"
 ])dnl
 dnl ---------------------------------------------------------------------------
-dnl CF_HELP_MESSAGE version: 3 updated: 1998/01/14 10:56:23
+dnl CF_HELP_MESSAGE version: 4 updated: 2019/12/31 08:53:54
 dnl ---------------
 dnl Insert text into the help-message, for readability, from AC_ARG_WITH.
 AC_DEFUN([CF_HELP_MESSAGE],
-[AC_DIVERT_HELP([$1])dnl
+[CF_ACVERSION_CHECK(2.53,[],[
+AC_DIVERT_HELP($1)])dnl
 ])dnl
 dnl ---------------------------------------------------------------------------
 dnl CF_INET_ADDR version: 7 updated: 2013/10/08 17:47:05
@@ -6600,25 +6601,41 @@ if test "$with_dmalloc" = yes ; then
 fi
 ])dnl
 dnl ---------------------------------------------------------------------------
-dnl CF_WITH_IDNA version: 10 updated: 2015/04/15 19:08:48
+dnl CF_WITH_IDNA version: 11 updated: 2021/07/05 09:09:42
 dnl ------------
-dnl Check for libidn, use it if found.
+dnl Check for libidn2, use it if found.  Otherwise, check for libidn, use that.
 dnl
 dnl $1 = optional path for headers/library
 AC_DEFUN([CF_WITH_IDNA],[
-  CF_ADD_OPTIONAL_PATH($1)
+CF_ADD_OPTIONAL_PATH($1)
 
-  CF_FIND_LINKAGE([
+CF_FIND_LINKAGE([
+#include <stdio.h>
+#include <idn2.h>
+],[
+	char *output = 0;
+	int code = idn2_to_ascii_8z("name", &output, IDN2_USE_STD3_ASCII_RULES);
+	(void) code;
+],idn2,,[CF_VERBOSE([unsuccessful, will try idn (older)])],,[$LIBICONV])
+
+if test "x$cf_cv_find_linkage_idn2" = xyes ; then
+	CF_VERBOSE(found idn2 library)
+	AC_DEFINE(USE_IDN2,1,[Define to 1 if we should use IDN2 library])
+else
+	CF_FIND_LINKAGE([
 #include <stdio.h>
 #include <idna.h>
 ],[
 	char *output = 0;
-    int code = idna_to_ascii_8z("name", &output, IDNA_USE_STD3_ASCII_RULES);
+	int code = idna_to_ascii_8z("name", &output, IDNA_USE_STD3_ASCII_RULES);
+	(void) code;
 ],idn,,,,[$LIBICONV])
 
 if test "x$cf_cv_find_linkage_idn" = xyes ; then
+	CF_VERBOSE(found idn library)
 	AC_DEFINE(USE_IDNA,1,[Define to 1 if we should use IDNA library])
 fi
+fi
 ])dnl
 dnl ---------------------------------------------------------------------------
 dnl CF_WITH_PATH version: 11 updated: 2012/09/29 15:04:19
diff --git a/config.hin b/config.hin
index fb0e4d3..58051bb 100644
--- a/config.hin
+++ b/config.hin
@@ -271,6 +271,7 @@
 #undef USE_FILE_UPLOAD		/* CF_ARG_DISABLE(file-upload) */
 #undef USE_GNUTLS_FUNCS		/* CF_GNUTLS */
 #undef USE_GNUTLS_INCL		/* CF_GNUTLS */
+#undef USE_IDN2			/* CF_ARG_DISABLE(idna) */
 #undef USE_IDNA			/* CF_ARG_DISABLE(idna) */
 #undef USE_JUSTIFY_ELTS		/* CF_ARG_DISABLE(justify-elts) */
 #undef USE_LOCALE_CHARSET	/* CF_ARG_DISABLE(locale-charset) */
diff --git a/src/LYMain.c b/src/LYMain.c
index d36707e..ecfbe69 100644
--- a/src/LYMain.c
+++ b/src/LYMain.c
@@ -178,6 +178,9 @@ lynx_list_item_type *externals = NULL;
 
 			    /* linked list of external options */
 #endif
+#ifdef USE_IDN2
+int LYidnaMode = LYidnaTR46;
+#endif
 
 lynx_list_item_type *uploaders = NULL;
 int LYShowColor = SHOW_COLOR_UNKNOWN;	/* to show or not */
diff --git a/src/LYOptions.c b/src/LYOptions.c
index 6b4b0e0..e0e4732 100644
--- a/src/LYOptions.c
+++ b/src/LYOptions.c
@@ -2356,6 +2356,18 @@ static const char *assume_char_set_string = RC_ASSUME_CHARSET;
 static const char *display_char_set_string = RC_CHARACTER_SET;
 static const char *raw_mode_string = RC_RAW_MODE;
 
+#ifdef USE_IDN2
+static const char *idna_mode_string = RC_IDNA_MODE;
+static OptValues idna_values[] =
+{
+    {LYidna2003, N_("IDNA 2003"), "idna2003"},
+    {LYidna2008, N_("IDNA 2008"), "idna2008"},
+    {LYidnaTR46, N_("IDNA TR46"), "idnaTR46"},
+    {LYidnaCompat, N_("IDNA Compatible"), "idnaCompat"},
+    END_OPTIONS
+};
+#endif
+
 #ifdef USE_LOCALE_CHARSET
 static const char *locale_charset_string = RC_LOCALE_CHARSET;
 #endif
@@ -3233,6 +3245,13 @@ int postoptions(DocInfo *newdoc)
 		    current_char_set = newval;
 	    }
 	}
+#ifdef USE_IDN2
+	/* Internationalized Domain Names: SELECT */
+	if (!strcmp(data[i].tag, idna_mode_string)
+	    && GetOptValues(idna_values, data[i].value, &code)) {
+	    LYidnaMode = code;
+	}
+#endif
 
 	/* Raw Mode: ON/OFF */
 	if (!strcmp(data[i].tag, raw_mode_string)
@@ -3933,6 +3952,20 @@ static int gen_options(char **newfile)
 	EndSelect(fp0);
     }
 
+#ifdef USE_IDN2
+    /* Internationalized Domain Names: SELECT */
+    {
+	PutLabel(fp0, gettext("Internationalized domain names"), idna_mode_string);
+	BeginSelect(fp0, idna_mode_string);
+	for (i = 0; idna_values[i].value != 0; i++) {
+	    PutOption(fp0, idna_values[i].value == LYidnaMode,
+		      idna_values[i].HtmlName,
+		      idna_values[i].LongName);
+	}
+	EndSelect(fp0);
+    }
+#endif
+
     /* Raw Mode: ON/OFF */
     if (LYHaveCJKCharacterSet) {
 	/*
diff --git a/src/LYrcFile.c b/src/LYrcFile.c
index 1754f12..b98bfc2 100644
--- a/src/LYrcFile.c
+++ b/src/LYrcFile.c
@@ -71,6 +71,16 @@ static Config_Enum tbl_file_sort[] = {
     { NULL,		-1 },
 };
 
+#ifdef USE_IDN2
+static Config_Enum tbl_idna_mode[] = {
+    { "IDNA2003",	LYidna2003 },
+    { "IDNA2008",	LYidna2008 },
+    { "TR46",		LYidnaTR46 },
+    { "Compatible",	LYidnaCompat },
+    { NULL,		-1 },
+};
+#endif
+
 Config_Enum tbl_keypad_mode[] = {
     { "FIELDS_ARE_NUMBERED", FIELDS_ARE_NUMBERED },
     { "LINKS_AND_FIELDS_ARE_NUMBERED", LINKS_AND_FIELDS_ARE_NUMBERED },
@@ -462,6 +472,10 @@ file lists such as FTP directories.  The options are:\n\
     MAYBE_SET(RC_HTML5_CHARSETS,        html5_charsets,     MSG_ENABLE_LYNXRC),
     MAYBE_FUN(RC_HTTP_PROTOCOL,         get_http_protocol,  put_http_protocol,
 	      MSG_ENABLE_LYNXRC),
+#ifdef USE_IDN2
+    MAYBE_ENU(RC_IDNA_MODE,             LYidnaMode,         tbl_idna_mode,
+	      MSG_ENABLE_LYNXRC),
+#endif
 #ifdef EXP_KEYBOARD_LAYOUT
     PARSE_ARY(RC_KBLAYOUT,              current_layout,     LYKbLayoutNames, NULL),
 #endif
diff --git a/src/LYrcFile.h b/src/LYrcFile.h
index 3cf07c0..cd41a0f 100644
--- a/src/LYrcFile.h
+++ b/src/LYrcFile.h
@@ -110,6 +110,7 @@
 #define RC_HTTPS_PROXY                  "https_proxy"
 #define RC_HTTP_PROTOCOL                "http_protocol"
 #define RC_HTTP_PROXY                   "http_proxy"
+#define RC_IDNA_MODE                    "idna_mode"
 #define RC_INCLUDE                      "include"
 #define RC_INFLATE_PATH                 "inflate_path"
 #define RC_INFOSECS                     "infosecs"
-- 
2.34.1