8d0e4d
From 29117b2d07af00f4d4b87cf778e4294588ab1a83 Mon Sep 17 00:00:00 2001
8d0e4d
From: Kamil Dudka <kdudka@redhat.com>
8d0e4d
Date: Thu, 1 Dec 2016 15:10:04 +0100
8d0e4d
Subject: [PATCH] coreutils-i18n.patch
8d0e4d
8d0e4d
TODO: merge upstream
8d0e4d
---
8d0e4d
 lib/linebuffer.h            |   8 +
8d0e4d
 src/fold.c                  | 308 ++++++++++++++++--
8d0e4d
 src/join.c                  | 359 ++++++++++++++++++---
8d0e4d
 src/pr.c                    | 443 ++++++++++++++++++++++---
8d0e4d
 src/sort.c                  | 764 +++++++++++++++++++++++++++++++++++++++++---
8d0e4d
 src/uniq.c                  | 265 ++++++++++++++-
8d0e4d
 tests/i18n/sort.sh          |  29 ++
8d0e4d
 tests/local.mk              |   2 +
8d0e4d
 tests/misc/expand.pl        |  42 +++
8d0e4d
 tests/misc/fold.pl          |  50 ++-
8d0e4d
 tests/misc/join.pl          |  50 +++
8d0e4d
 tests/misc/sort-mb-tests.sh |  45 +++
8d0e4d
 tests/misc/sort-merge.pl    |  42 +++
8d0e4d
 tests/misc/sort.pl          |  40 ++-
8d0e4d
 tests/misc/unexpand.pl      |  39 +++
8d0e4d
 tests/misc/uniq.pl          |  55 ++++
8d0e4d
 tests/pr/pr-tests.pl        |  49 +++
8d0e4d
 17 files changed, 2430 insertions(+), 160 deletions(-)
8d0e4d
 create mode 100755 tests/i18n/sort.sh
8d0e4d
 create mode 100755 tests/misc/sort-mb-tests.sh
8d0e4d
8d0e4d
diff --git a/lib/linebuffer.h b/lib/linebuffer.h
8d0e4d
index 64181af..9b8fe5a 100644
8d0e4d
--- a/lib/linebuffer.h
8d0e4d
+++ b/lib/linebuffer.h
8d0e4d
@@ -21,6 +21,11 @@
8d0e4d
 
8d0e4d
 # include <stdio.h>
8d0e4d
 
8d0e4d
+/* Get mbstate_t.  */
8d0e4d
+# if HAVE_WCHAR_H
8d0e4d
+#  include <wchar.h>
8d0e4d
+# endif
8d0e4d
+
8d0e4d
 /* A 'struct linebuffer' holds a line of text. */
8d0e4d
 
8d0e4d
 struct linebuffer
8d0e4d
@@ -28,6 +33,9 @@ struct linebuffer
8d0e4d
   size_t size;                  /* Allocated. */
8d0e4d
   size_t length;                /* Used. */
8d0e4d
   char *buffer;
8d0e4d
+# if HAVE_WCHAR_H
8d0e4d
+  mbstate_t state;
8d0e4d
+# endif
8d0e4d
 };
8d0e4d
 
8d0e4d
 /* Initialize linebuffer LINEBUFFER for use. */
8d0e4d
diff --git a/src/fold.c b/src/fold.c
8d0e4d
index 8cd0d6b..d23edd5 100644
8d0e4d
--- a/src/fold.c
8d0e4d
+++ b/src/fold.c
8d0e4d
@@ -22,12 +22,34 @@
8d0e4d
 #include <getopt.h>
8d0e4d
 #include <sys/types.h>
8d0e4d
 
8d0e4d
+/* Get mbstate_t, mbrtowc(), wcwidth().  */
8d0e4d
+#if HAVE_WCHAR_H
8d0e4d
+# include <wchar.h>
8d0e4d
+#endif
8d0e4d
+
8d0e4d
+/* Get iswprint(), iswblank(), wcwidth().  */
8d0e4d
+#if HAVE_WCTYPE_H
8d0e4d
+# include <wctype.h>
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 #include "system.h"
8d0e4d
 #include "die.h"
8d0e4d
 #include "error.h"
8d0e4d
 #include "fadvise.h"
8d0e4d
 #include "xdectoint.h"
8d0e4d
 
8d0e4d
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
8d0e4d
+      installation; work around this configuration error.  */
8d0e4d
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
8d0e4d
+# undef MB_LEN_MAX
8d0e4d
+# define MB_LEN_MAX 16
8d0e4d
+#endif
8d0e4d
+
8d0e4d
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
8d0e4d
+#if HAVE_MBRTOWC && defined mbstate_t
8d0e4d
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 #define TAB_WIDTH 8
8d0e4d
 
8d0e4d
 /* The official name of this program (e.g., no 'g' prefix).  */
8d0e4d
@@ -35,20 +57,41 @@
8d0e4d
 
8d0e4d
 #define AUTHORS proper_name ("David MacKenzie")
8d0e4d
 
8d0e4d
+#define FATAL_ERROR(Message)                                            \
8d0e4d
+  do                                                                    \
8d0e4d
+    {                                                                   \
8d0e4d
+      error (0, 0, (Message));                                          \
8d0e4d
+      usage (2);                                                        \
8d0e4d
+    }                                                                   \
8d0e4d
+  while (0)
8d0e4d
+
8d0e4d
+enum operating_mode
8d0e4d
+{
8d0e4d
+  /* Fold texts by columns that are at the given positions. */
8d0e4d
+  column_mode,
8d0e4d
+
8d0e4d
+  /* Fold texts by bytes that are at the given positions. */
8d0e4d
+  byte_mode,
8d0e4d
+
8d0e4d
+  /* Fold texts by characters that are at the given positions. */
8d0e4d
+  character_mode,
8d0e4d
+};
8d0e4d
+
8d0e4d
+/* The argument shows current mode. (Default: column_mode) */
8d0e4d
+static enum operating_mode operating_mode;
8d0e4d
+
8d0e4d
 /* If nonzero, try to break on whitespace. */
8d0e4d
 static bool break_spaces;
8d0e4d
 
8d0e4d
-/* If nonzero, count bytes, not column positions. */
8d0e4d
-static bool count_bytes;
8d0e4d
-
8d0e4d
 /* If nonzero, at least one of the files we read was standard input. */
8d0e4d
 static bool have_read_stdin;
8d0e4d
 
8d0e4d
-static char const shortopts[] = "bsw:0::1::2::3::4::5::6::7::8::9::";
8d0e4d
+static char const shortopts[] = "bcsw:0::1::2::3::4::5::6::7::8::9::";
8d0e4d
 
8d0e4d
 static struct option const longopts[] =
8d0e4d
 {
8d0e4d
   {"bytes", no_argument, NULL, 'b'},
8d0e4d
+  {"characters", no_argument, NULL, 'c'},
8d0e4d
   {"spaces", no_argument, NULL, 's'},
8d0e4d
   {"width", required_argument, NULL, 'w'},
8d0e4d
   {GETOPT_HELP_OPTION_DECL},
8d0e4d
@@ -76,6 +119,7 @@ Wrap input lines in each FILE, writing to standard output.\n\
8d0e4d
 
8d0e4d
       fputs (_("\
8d0e4d
   -b, --bytes         count bytes rather than columns\n\
8d0e4d
+  -c, --characters    count characters rather than columns\n\
8d0e4d
   -s, --spaces        break at spaces\n\
8d0e4d
   -w, --width=WIDTH   use WIDTH columns instead of 80\n\
8d0e4d
 "), stdout);
8d0e4d
@@ -93,7 +137,7 @@ Wrap input lines in each FILE, writing to standard output.\n\
8d0e4d
 static size_t
8d0e4d
 adjust_column (size_t column, char c)
8d0e4d
 {
8d0e4d
-  if (!count_bytes)
8d0e4d
+  if (operating_mode != byte_mode)
8d0e4d
     {
8d0e4d
       if (c == '\b')
8d0e4d
         {
8d0e4d
@@ -116,30 +160,14 @@ adjust_column (size_t column, char c)
8d0e4d
    to stdout, with maximum line length WIDTH.
8d0e4d
    Return true if successful.  */
8d0e4d
 
8d0e4d
-static bool
8d0e4d
-fold_file (char const *filename, size_t width)
8d0e4d
+static void
8d0e4d
+fold_text (FILE *istream, size_t width, int *saved_errno)
8d0e4d
 {
8d0e4d
-  FILE *istream;
8d0e4d
   int c;
8d0e4d
   size_t column = 0;		/* Screen column where next char will go. */
8d0e4d
   size_t offset_out = 0;	/* Index in 'line_out' for next char. */
8d0e4d
   static char *line_out = NULL;
8d0e4d
   static size_t allocated_out = 0;
8d0e4d
-  int saved_errno;
8d0e4d
-
8d0e4d
-  if (STREQ (filename, "-"))
8d0e4d
-    {
8d0e4d
-      istream = stdin;
8d0e4d
-      have_read_stdin = true;
8d0e4d
-    }
8d0e4d
-  else
8d0e4d
-    istream = fopen (filename, "r");
8d0e4d
-
8d0e4d
-  if (istream == NULL)
8d0e4d
-    {
8d0e4d
-      error (0, errno, "%s", quotef (filename));
8d0e4d
-      return false;
8d0e4d
-    }
8d0e4d
 
8d0e4d
   fadvise (istream, FADVISE_SEQUENTIAL);
8d0e4d
 
8d0e4d
@@ -169,6 +197,15 @@ fold_file (char const *filename, size_t width)
8d0e4d
               bool found_blank = false;
8d0e4d
               size_t logical_end = offset_out;
8d0e4d
 
8d0e4d
+              /* If LINE_OUT has no wide character,
8d0e4d
+                 put a new wide character in LINE_OUT
8d0e4d
+                 if column is bigger than width. */
8d0e4d
+              if (offset_out == 0)
8d0e4d
+                {
8d0e4d
+                  line_out[offset_out++] = c;
8d0e4d
+                  continue;
8d0e4d
+                }
8d0e4d
+
8d0e4d
               /* Look for the last blank. */
8d0e4d
               while (logical_end)
8d0e4d
                 {
8d0e4d
@@ -215,11 +252,221 @@ fold_file (char const *filename, size_t width)
8d0e4d
       line_out[offset_out++] = c;
8d0e4d
     }
8d0e4d
 
8d0e4d
-  saved_errno = errno;
8d0e4d
+  *saved_errno = errno;
8d0e4d
 
8d0e4d
   if (offset_out)
8d0e4d
     fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
8d0e4d
 
8d0e4d
+}
8d0e4d
+
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+static void
8d0e4d
+fold_multibyte_text (FILE *istream, size_t width, int *saved_errno)
8d0e4d
+{
8d0e4d
+  char buf[MB_LEN_MAX + BUFSIZ];  /* For spooling a read byte sequence. */
8d0e4d
+  size_t buflen = 0;        /* The length of the byte sequence in buf. */
8d0e4d
+  char *bufpos = buf;         /* Next read position of BUF. */
8d0e4d
+  wint_t wc;                /* A gotten wide character. */
8d0e4d
+  size_t mblength;        /* The byte size of a multibyte character which shows
8d0e4d
+                           as same character as WC. */
8d0e4d
+  mbstate_t state, state_bak;        /* State of the stream. */
8d0e4d
+  int convfail = 0;                /* 1, when conversion is failed. Otherwise 0. */
8d0e4d
+
8d0e4d
+  static char *line_out = NULL;
8d0e4d
+  size_t offset_out = 0;        /* Index in `line_out' for next char. */
8d0e4d
+  static size_t allocated_out = 0;
8d0e4d
+
8d0e4d
+  int increment;
8d0e4d
+  size_t column = 0;
8d0e4d
+
8d0e4d
+  size_t last_blank_pos;
8d0e4d
+  size_t last_blank_column;
8d0e4d
+  int is_blank_seen;
8d0e4d
+  int last_blank_increment = 0;
8d0e4d
+  int is_bs_following_last_blank;
8d0e4d
+  size_t bs_following_last_blank_num;
8d0e4d
+  int is_cr_after_last_blank;
8d0e4d
+
8d0e4d
+#define CLEAR_FLAGS                                \
8d0e4d
+   do                                                \
8d0e4d
+     {                                                \
8d0e4d
+        last_blank_pos = 0;                        \
8d0e4d
+        last_blank_column = 0;                        \
8d0e4d
+        is_blank_seen = 0;                        \
8d0e4d
+        is_bs_following_last_blank = 0;                \
8d0e4d
+        bs_following_last_blank_num = 0;        \
8d0e4d
+        is_cr_after_last_blank = 0;                \
8d0e4d
+     }                                                \
8d0e4d
+   while (0)
8d0e4d
+
8d0e4d
+#define START_NEW_LINE                        \
8d0e4d
+   do                                        \
8d0e4d
+     {                                        \
8d0e4d
+      putchar ('\n');                        \
8d0e4d
+      column = 0;                        \
8d0e4d
+      offset_out = 0;                        \
8d0e4d
+      CLEAR_FLAGS;                        \
8d0e4d
+    }                                        \
8d0e4d
+   while (0)
8d0e4d
+
8d0e4d
+  CLEAR_FLAGS;
8d0e4d
+  memset (&state, '\0', sizeof(mbstate_t));
8d0e4d
+
8d0e4d
+  for (;; bufpos += mblength, buflen -= mblength)
8d0e4d
+    {
8d0e4d
+      if (buflen < MB_LEN_MAX && !feof (istream) && !ferror (istream))
8d0e4d
+        {
8d0e4d
+          memmove (buf, bufpos, buflen);
8d0e4d
+          buflen += fread (buf + buflen, sizeof(char), BUFSIZ, istream);
8d0e4d
+          bufpos = buf;
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      if (buflen < 1)
8d0e4d
+        break;
8d0e4d
+
8d0e4d
+      /* Get a wide character. */
8d0e4d
+      state_bak = state;
8d0e4d
+      mblength = mbrtowc ((wchar_t *)&wc, bufpos, buflen, &state);
8d0e4d
+
8d0e4d
+      switch (mblength)
8d0e4d
+        {
8d0e4d
+        case (size_t)-1:
8d0e4d
+        case (size_t)-2:
8d0e4d
+          convfail++;
8d0e4d
+          state = state_bak;
8d0e4d
+          /* Fall through. */
8d0e4d
+
8d0e4d
+        case 0:
8d0e4d
+          mblength = 1;
8d0e4d
+          break;
8d0e4d
+        }
8d0e4d
+
8d0e4d
+rescan:
8d0e4d
+      if (operating_mode == byte_mode)                        /* byte mode */
8d0e4d
+        increment = mblength;
8d0e4d
+      else if (operating_mode == character_mode)        /* character mode */
8d0e4d
+        increment = 1;
8d0e4d
+      else                                                /* column mode */
8d0e4d
+        {
8d0e4d
+          if (convfail)
8d0e4d
+            increment = 1;
8d0e4d
+          else
8d0e4d
+            {
8d0e4d
+              switch (wc)
8d0e4d
+                {
8d0e4d
+                case L'\n':
8d0e4d
+                  fwrite (line_out, sizeof(char), offset_out, stdout);
8d0e4d
+                  START_NEW_LINE;
8d0e4d
+                  continue;
8d0e4d
+
8d0e4d
+                case L'\b':
8d0e4d
+                  increment = (column > 0) ? -1 : 0;
8d0e4d
+                  break;
8d0e4d
+
8d0e4d
+                case L'\r':
8d0e4d
+                  increment = -1 * column;
8d0e4d
+                  break;
8d0e4d
+
8d0e4d
+                case L'\t':
8d0e4d
+                  increment = 8 - column % 8;
8d0e4d
+                  break;
8d0e4d
+
8d0e4d
+                default:
8d0e4d
+                  increment = wcwidth (wc);
8d0e4d
+                  increment = (increment < 0) ? 0 : increment;
8d0e4d
+                }
8d0e4d
+            }
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      if (column + increment > width && break_spaces && last_blank_pos)
8d0e4d
+        {
8d0e4d
+          fwrite (line_out, sizeof(char), last_blank_pos, stdout);
8d0e4d
+          putchar ('\n');
8d0e4d
+
8d0e4d
+          offset_out = offset_out - last_blank_pos;
8d0e4d
+          column = column - last_blank_column + ((is_cr_after_last_blank)
8d0e4d
+              ? last_blank_increment : bs_following_last_blank_num);
8d0e4d
+          memmove (line_out, line_out + last_blank_pos, offset_out);
8d0e4d
+          CLEAR_FLAGS;
8d0e4d
+          goto rescan;
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      if (column + increment > width && column != 0)
8d0e4d
+        {
8d0e4d
+          fwrite (line_out, sizeof(char), offset_out, stdout);
8d0e4d
+          START_NEW_LINE;
8d0e4d
+          goto rescan;
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      if (allocated_out < offset_out + mblength)
8d0e4d
+        {
8d0e4d
+          line_out = X2REALLOC (line_out, &allocated_out);
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      memcpy (line_out + offset_out, bufpos, mblength);
8d0e4d
+      offset_out += mblength;
8d0e4d
+      column += increment;
8d0e4d
+
8d0e4d
+      if (is_blank_seen && !convfail && wc == L'\r')
8d0e4d
+        is_cr_after_last_blank = 1;
8d0e4d
+
8d0e4d
+      if (is_bs_following_last_blank && !convfail && wc == L'\b')
8d0e4d
+        ++bs_following_last_blank_num;
8d0e4d
+      else
8d0e4d
+        is_bs_following_last_blank = 0;
8d0e4d
+
8d0e4d
+      if (break_spaces && !convfail && iswblank (wc))
8d0e4d
+        {
8d0e4d
+          last_blank_pos = offset_out;
8d0e4d
+          last_blank_column = column;
8d0e4d
+          is_blank_seen = 1;
8d0e4d
+          last_blank_increment = increment;
8d0e4d
+          is_bs_following_last_blank = 1;
8d0e4d
+          bs_following_last_blank_num = 0;
8d0e4d
+          is_cr_after_last_blank = 0;
8d0e4d
+        }
8d0e4d
+    }
8d0e4d
+
8d0e4d
+  *saved_errno = errno;
8d0e4d
+
8d0e4d
+  if (offset_out)
8d0e4d
+    fwrite (line_out, sizeof (char), (size_t) offset_out, stdout);
8d0e4d
+
8d0e4d
+}
8d0e4d
+#endif
8d0e4d
+
8d0e4d
+/* Fold file FILENAME, or standard input if FILENAME is "-",
8d0e4d
+   to stdout, with maximum line length WIDTH.
8d0e4d
+   Return 0 if successful, 1 if an error occurs. */
8d0e4d
+
8d0e4d
+static bool
8d0e4d
+fold_file (char const *filename, size_t width)
8d0e4d
+{
8d0e4d
+  FILE *istream;
8d0e4d
+  int saved_errno;
8d0e4d
+
8d0e4d
+  if (STREQ (filename, "-"))
8d0e4d
+    {
8d0e4d
+      istream = stdin;
8d0e4d
+      have_read_stdin = 1;
8d0e4d
+    }
8d0e4d
+  else
8d0e4d
+    istream = fopen (filename, "r");
8d0e4d
+
8d0e4d
+  if (istream == NULL)
8d0e4d
+    {
8d0e4d
+      error (0, errno, "%s", filename);
8d0e4d
+      return 1;
8d0e4d
+    }
8d0e4d
+
8d0e4d
+  /* Define how ISTREAM is being folded. */
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+  if (MB_CUR_MAX > 1)
8d0e4d
+    fold_multibyte_text (istream, width, &saved_errno);
8d0e4d
+  else
8d0e4d
+#endif
8d0e4d
+    fold_text (istream, width, &saved_errno);
8d0e4d
+
8d0e4d
   if (ferror (istream))
8d0e4d
     {
8d0e4d
       error (0, saved_errno, "%s", quotef (filename));
8d0e4d
@@ -252,7 +499,8 @@ main (int argc, char **argv)
8d0e4d
 
8d0e4d
   atexit (close_stdout);
8d0e4d
 
8d0e4d
-  break_spaces = count_bytes = have_read_stdin = false;
8d0e4d
+  operating_mode = column_mode;
8d0e4d
+  break_spaces = have_read_stdin = false;
8d0e4d
 
8d0e4d
   while ((optc = getopt_long (argc, argv, shortopts, longopts, NULL)) != -1)
8d0e4d
     {
8d0e4d
@@ -261,7 +509,15 @@ main (int argc, char **argv)
8d0e4d
       switch (optc)
8d0e4d
         {
8d0e4d
         case 'b':		/* Count bytes rather than columns. */
8d0e4d
-          count_bytes = true;
8d0e4d
+          if (operating_mode != column_mode)
8d0e4d
+            FATAL_ERROR (_("only one way of folding may be specified"));
8d0e4d
+          operating_mode = byte_mode;
8d0e4d
+          break;
8d0e4d
+
8d0e4d
+        case 'c':
8d0e4d
+          if (operating_mode != column_mode)
8d0e4d
+            FATAL_ERROR (_("only one way of folding may be specified"));
8d0e4d
+          operating_mode = character_mode;
8d0e4d
           break;
8d0e4d
 
8d0e4d
         case 's':		/* Break at word boundaries. */
8d0e4d
diff --git a/src/join.c b/src/join.c
8d0e4d
index 98b461c..9990f38 100644
8d0e4d
--- a/src/join.c
8d0e4d
+++ b/src/join.c
8d0e4d
@@ -22,19 +22,33 @@
8d0e4d
 #include <sys/types.h>
8d0e4d
 #include <getopt.h>
8d0e4d
 
8d0e4d
+/* Get mbstate_t, mbrtowc(), mbrtowc(), wcwidth().  */
8d0e4d
+#if HAVE_WCHAR_H
8d0e4d
+# include <wchar.h>
8d0e4d
+#endif
8d0e4d
+
8d0e4d
+/* Get iswblank(), towupper.  */
8d0e4d
+#if HAVE_WCTYPE_H
8d0e4d
+# include <wctype.h>
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 #include "system.h"
8d0e4d
 #include "die.h"
8d0e4d
 #include "error.h"
8d0e4d
 #include "fadvise.h"
8d0e4d
 #include "hard-locale.h"
8d0e4d
 #include "linebuffer.h"
8d0e4d
-#include "memcasecmp.h"
8d0e4d
 #include "quote.h"
8d0e4d
 #include "stdio--.h"
8d0e4d
 #include "xmemcoll.h"
8d0e4d
 #include "xstrtol.h"
8d0e4d
 #include "argmatch.h"
8d0e4d
 
8d0e4d
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
8d0e4d
+#if HAVE_MBRTOWC && defined mbstate_t
8d0e4d
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 /* The official name of this program (e.g., no 'g' prefix).  */
8d0e4d
 #define PROGRAM_NAME "join"
8d0e4d
 
8d0e4d
@@ -136,10 +150,12 @@ static struct outlist outlist_head;
8d0e4d
 /* Last element in 'outlist', where a new element can be added.  */
8d0e4d
 static struct outlist *outlist_end = &outlist_head;
8d0e4d
 
8d0e4d
-/* Tab character separating fields.  If negative, fields are separated
8d0e4d
-   by any nonempty string of blanks, otherwise by exactly one
8d0e4d
-   tab character whose value (when cast to unsigned char) equals TAB.  */
8d0e4d
-static int tab = -1;
8d0e4d
+/* Tab character separating fields.  If NULL, fields are separated
8d0e4d
+   by any nonempty string of blanks.  */
8d0e4d
+static char *tab = NULL;
8d0e4d
+
8d0e4d
+/* The number of bytes used for tab. */
8d0e4d
+static size_t tablen = 0;
8d0e4d
 
8d0e4d
 /* If nonzero, check that the input is correctly ordered. */
8d0e4d
 static enum
8d0e4d
@@ -276,13 +292,14 @@ xfields (struct line *line)
8d0e4d
   if (ptr == lim)
8d0e4d
     return;
8d0e4d
 
8d0e4d
-  if (0 <= tab && tab != '\n')
8d0e4d
+  if (tab != NULL)
8d0e4d
     {
8d0e4d
+      unsigned char t = tab[0];
8d0e4d
       char *sep;
8d0e4d
-      for (; (sep = memchr (ptr, tab, lim - ptr)) != NULL; ptr = sep + 1)
8d0e4d
+      for (; (sep = memchr (ptr, t, lim - ptr)) != NULL; ptr = sep + 1)
8d0e4d
         extract_field (line, ptr, sep - ptr);
8d0e4d
     }
8d0e4d
-  else if (tab < 0)
8d0e4d
+   else
8d0e4d
     {
8d0e4d
       /* Skip leading blanks before the first field.  */
8d0e4d
       while (field_sep (*ptr))
8d0e4d
@@ -306,6 +323,147 @@ xfields (struct line *line)
8d0e4d
   extract_field (line, ptr, lim - ptr);
8d0e4d
 }
8d0e4d
 
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+static void
8d0e4d
+xfields_multibyte (struct line *line)
8d0e4d
+{
8d0e4d
+  char *ptr = line->buf.buffer;
8d0e4d
+  char const *lim = ptr + line->buf.length - 1;
8d0e4d
+  wchar_t wc = 0;
8d0e4d
+  size_t mblength = 1;
8d0e4d
+  mbstate_t state, state_bak;
8d0e4d
+
8d0e4d
+  memset (&state, 0, sizeof (mbstate_t));
8d0e4d
+
8d0e4d
+  if (ptr >= lim)
8d0e4d
+    return;
8d0e4d
+
8d0e4d
+  if (tab != NULL)
8d0e4d
+    {
8d0e4d
+      char *sep = ptr;
8d0e4d
+      for (; ptr < lim; ptr = sep + mblength)
8d0e4d
+	{
8d0e4d
+	  sep = ptr;
8d0e4d
+	  while (sep < lim)
8d0e4d
+	    {
8d0e4d
+	      state_bak = state;
8d0e4d
+	      mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
8d0e4d
+
8d0e4d
+	      if (mblength == (size_t)-1 || mblength == (size_t)-2)
8d0e4d
+		{
8d0e4d
+		  mblength = 1;
8d0e4d
+		  state = state_bak;
8d0e4d
+		}
8d0e4d
+	      mblength = (mblength < 1) ? 1 : mblength;
8d0e4d
+
8d0e4d
+	      if (mblength == tablen && !memcmp (sep, tab, mblength))
8d0e4d
+		break;
8d0e4d
+	      else
8d0e4d
+		{
8d0e4d
+		  sep += mblength;
8d0e4d
+		  continue;
8d0e4d
+		}
8d0e4d
+	    }
8d0e4d
+
8d0e4d
+	  if (sep >= lim)
8d0e4d
+	    break;
8d0e4d
+
8d0e4d
+	  extract_field (line, ptr, sep - ptr);
8d0e4d
+	}
8d0e4d
+    }
8d0e4d
+  else
8d0e4d
+    {
8d0e4d
+      /* Skip leading blanks before the first field.  */
8d0e4d
+      while(ptr < lim)
8d0e4d
+      {
8d0e4d
+        state_bak = state;
8d0e4d
+        mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
8d0e4d
+
8d0e4d
+        if (mblength == (size_t)-1 || mblength == (size_t)-2)
8d0e4d
+          {
8d0e4d
+            mblength = 1;
8d0e4d
+            state = state_bak;
8d0e4d
+            break;
8d0e4d
+          }
8d0e4d
+        mblength = (mblength < 1) ? 1 : mblength;
8d0e4d
+
8d0e4d
+        if (!iswblank(wc) && wc != '\n')
8d0e4d
+          break;
8d0e4d
+        ptr += mblength;
8d0e4d
+      }
8d0e4d
+
8d0e4d
+      do
8d0e4d
+	{
8d0e4d
+	  char *sep;
8d0e4d
+	  state_bak = state;
8d0e4d
+	  mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
8d0e4d
+	  if (mblength == (size_t)-1 || mblength == (size_t)-2)
8d0e4d
+	    {
8d0e4d
+	      mblength = 1;
8d0e4d
+	      state = state_bak;
8d0e4d
+	      break;
8d0e4d
+	    }
8d0e4d
+	  mblength = (mblength < 1) ? 1 : mblength;
8d0e4d
+
8d0e4d
+	  sep = ptr + mblength;
8d0e4d
+	  while (sep < lim)
8d0e4d
+	    {
8d0e4d
+	      state_bak = state;
8d0e4d
+	      mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
8d0e4d
+	      if (mblength == (size_t)-1 || mblength == (size_t)-2)
8d0e4d
+		{
8d0e4d
+		  mblength = 1;
8d0e4d
+		  state = state_bak;
8d0e4d
+		  break;
8d0e4d
+		}
8d0e4d
+	      mblength = (mblength < 1) ? 1 : mblength;
8d0e4d
+
8d0e4d
+	      if (iswblank (wc) || wc == '\n')
8d0e4d
+		break;
8d0e4d
+
8d0e4d
+	      sep += mblength;
8d0e4d
+	    }
8d0e4d
+
8d0e4d
+	  extract_field (line, ptr, sep - ptr);
8d0e4d
+	  if (sep >= lim)
8d0e4d
+	    return;
8d0e4d
+
8d0e4d
+	  state_bak = state;
8d0e4d
+	  mblength = mbrtowc (&wc, sep, lim - sep + 1, &state);
8d0e4d
+	  if (mblength == (size_t)-1 || mblength == (size_t)-2)
8d0e4d
+	    {
8d0e4d
+	      mblength = 1;
8d0e4d
+	      state = state_bak;
8d0e4d
+	      break;
8d0e4d
+	    }
8d0e4d
+	  mblength = (mblength < 1) ? 1 : mblength;
8d0e4d
+
8d0e4d
+	  ptr = sep + mblength;
8d0e4d
+	  while (ptr < lim)
8d0e4d
+	    {
8d0e4d
+	      state_bak = state;
8d0e4d
+	      mblength = mbrtowc (&wc, ptr, lim - ptr + 1, &state);
8d0e4d
+	      if (mblength == (size_t)-1 || mblength == (size_t)-2)
8d0e4d
+		{
8d0e4d
+		  mblength = 1;
8d0e4d
+		  state = state_bak;
8d0e4d
+		  break;
8d0e4d
+		}
8d0e4d
+	      mblength = (mblength < 1) ? 1 : mblength;
8d0e4d
+
8d0e4d
+	      if (!iswblank (wc) && wc != '\n')
8d0e4d
+		break;
8d0e4d
+
8d0e4d
+	      ptr += mblength;
8d0e4d
+	    }
8d0e4d
+	}
8d0e4d
+      while (ptr < lim);
8d0e4d
+    }
8d0e4d
+
8d0e4d
+  extract_field (line, ptr, lim - ptr);
8d0e4d
+}
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 static void
8d0e4d
 freeline (struct line *line)
8d0e4d
 {
8d0e4d
@@ -327,56 +485,133 @@ keycmp (struct line const *line1, struct line const *line2,
8d0e4d
         size_t jf_1, size_t jf_2)
8d0e4d
 {
8d0e4d
   /* Start of field to compare in each file.  */
8d0e4d
-  char *beg1;
8d0e4d
-  char *beg2;
8d0e4d
-
8d0e4d
-  size_t len1;
8d0e4d
-  size_t len2;		/* Length of fields to compare.  */
8d0e4d
+  char *beg[2];
8d0e4d
+  char *copy[2];
8d0e4d
+  size_t len[2]; 	/* Length of fields to compare.  */
8d0e4d
   int diff;
8d0e4d
+  int i, j;
8d0e4d
+  int mallocd = 0;
8d0e4d
 
8d0e4d
   if (jf_1 < line1->nfields)
8d0e4d
     {
8d0e4d
-      beg1 = line1->fields[jf_1].beg;
8d0e4d
-      len1 = line1->fields[jf_1].len;
8d0e4d
+      beg[0] = line1->fields[jf_1].beg;
8d0e4d
+      len[0] = line1->fields[jf_1].len;
8d0e4d
     }
8d0e4d
   else
8d0e4d
     {
8d0e4d
-      beg1 = NULL;
8d0e4d
-      len1 = 0;
8d0e4d
+      beg[0] = NULL;
8d0e4d
+      len[0] = 0;
8d0e4d
     }
8d0e4d
 
8d0e4d
   if (jf_2 < line2->nfields)
8d0e4d
     {
8d0e4d
-      beg2 = line2->fields[jf_2].beg;
8d0e4d
-      len2 = line2->fields[jf_2].len;
8d0e4d
+      beg[1] = line2->fields[jf_2].beg;
8d0e4d
+      len[1] = line2->fields[jf_2].len;
8d0e4d
     }
8d0e4d
   else
8d0e4d
     {
8d0e4d
-      beg2 = NULL;
8d0e4d
-      len2 = 0;
8d0e4d
+      beg[1] = NULL;
8d0e4d
+      len[1] = 0;
8d0e4d
     }
8d0e4d
 
8d0e4d
-  if (len1 == 0)
8d0e4d
-    return len2 == 0 ? 0 : -1;
8d0e4d
-  if (len2 == 0)
8d0e4d
+  if (len[0] == 0)
8d0e4d
+    return len[1] == 0 ? 0 : -1;
8d0e4d
+  if (len[1] == 0)
8d0e4d
     return 1;
8d0e4d
 
8d0e4d
   if (ignore_case)
8d0e4d
     {
8d0e4d
-      /* FIXME: ignore_case does not work with NLS (in particular,
8d0e4d
-         with multibyte chars).  */
8d0e4d
-      diff = memcasecmp (beg1, beg2, MIN (len1, len2));
8d0e4d
+#ifdef HAVE_MBRTOWC
8d0e4d
+      if (MB_CUR_MAX > 1)
8d0e4d
+      {
8d0e4d
+        size_t mblength;
8d0e4d
+        wchar_t wc, uwc;
8d0e4d
+        mbstate_t state, state_bak;
8d0e4d
+
8d0e4d
+        memset (&state, '\0', sizeof (mbstate_t));
8d0e4d
+
8d0e4d
+        for (i = 0; i < 2; i++)
8d0e4d
+          {
8d0e4d
+            mallocd = 1;
8d0e4d
+            copy[i] = xmalloc (len[i] + 1);
8d0e4d
+            memset (copy[i], '\0',len[i] + 1);
8d0e4d
+
8d0e4d
+            for (j = 0; j < MIN (len[0], len[1]);)
8d0e4d
+              {
8d0e4d
+                state_bak = state;
8d0e4d
+                mblength = mbrtowc (&wc, beg[i] + j, len[i] - j, &state);
8d0e4d
+
8d0e4d
+                switch (mblength)
8d0e4d
+                  {
8d0e4d
+                  case (size_t) -1:
8d0e4d
+                  case (size_t) -2:
8d0e4d
+                    state = state_bak;
8d0e4d
+                    /* Fall through */
8d0e4d
+                  case 0:
8d0e4d
+                    mblength = 1;
8d0e4d
+                    break;
8d0e4d
+
8d0e4d
+                  default:
8d0e4d
+                    uwc = towupper (wc);
8d0e4d
+
8d0e4d
+                    if (uwc != wc)
8d0e4d
+                      {
8d0e4d
+                        mbstate_t state_wc;
8d0e4d
+                        size_t mblen;
8d0e4d
+
8d0e4d
+                        memset (&state_wc, '\0', sizeof (mbstate_t));
8d0e4d
+                        mblen = wcrtomb (copy[i] + j, uwc, &state_wc);
8d0e4d
+                        assert (mblen != (size_t)-1);
8d0e4d
+                      }
8d0e4d
+                    else
8d0e4d
+                      memcpy (copy[i] + j, beg[i] + j, mblength);
8d0e4d
+                  }
8d0e4d
+                j += mblength;
8d0e4d
+              }
8d0e4d
+            copy[i][j] = '\0';
8d0e4d
+          }
8d0e4d
+      }
8d0e4d
+      else
8d0e4d
+#endif
8d0e4d
+      {
8d0e4d
+        for (i = 0; i < 2; i++)
8d0e4d
+          {
8d0e4d
+            mallocd = 1;
8d0e4d
+            copy[i] = xmalloc (len[i] + 1);
8d0e4d
+
8d0e4d
+            for (j = 0; j < MIN (len[0], len[1]); j++)
8d0e4d
+              copy[i][j] = toupper (beg[i][j]);
8d0e4d
+
8d0e4d
+            copy[i][j] = '\0';
8d0e4d
+          }
8d0e4d
+      }
8d0e4d
     }
8d0e4d
   else
8d0e4d
     {
8d0e4d
-      if (hard_LC_COLLATE)
8d0e4d
-        return xmemcoll (beg1, len1, beg2, len2);
8d0e4d
-      diff = memcmp (beg1, beg2, MIN (len1, len2));
8d0e4d
+      copy[0] = beg[0];
8d0e4d
+      copy[1] = beg[1];
8d0e4d
     }
8d0e4d
 
8d0e4d
+  if (hard_LC_COLLATE)
8d0e4d
+    {
8d0e4d
+      diff = xmemcoll ((char *) copy[0], len[0], (char *) copy[1], len[1]);
8d0e4d
+
8d0e4d
+      if (mallocd)
8d0e4d
+        for (i = 0; i < 2; i++)
8d0e4d
+          free (copy[i]);
8d0e4d
+
8d0e4d
+      return diff;
8d0e4d
+    }
8d0e4d
+  diff = memcmp (copy[0], copy[1], MIN (len[0], len[1]));
8d0e4d
+
8d0e4d
+  if (mallocd)
8d0e4d
+    for (i = 0; i < 2; i++)
8d0e4d
+      free (copy[i]);
8d0e4d
+
8d0e4d
+
8d0e4d
   if (diff)
8d0e4d
     return diff;
8d0e4d
-  return len1 < len2 ? -1 : len1 != len2;
8d0e4d
+  return len[0] - len[1];
8d0e4d
 }
8d0e4d
 
8d0e4d
 /* Check that successive input lines PREV and CURRENT from input file
8d0e4d
@@ -468,6 +703,11 @@ get_line (FILE *fp, struct line **linep, int which)
8d0e4d
     }
8d0e4d
   ++line_no[which - 1];
8d0e4d
 
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+  if (MB_CUR_MAX > 1)
8d0e4d
+    xfields_multibyte (line);
8d0e4d
+  else
8d0e4d
+#endif
8d0e4d
   xfields (line);
8d0e4d
 
8d0e4d
   if (prevline[which - 1])
8d0e4d
@@ -563,21 +803,28 @@ prfield (size_t n, struct line const *line)
8d0e4d
 
8d0e4d
 /* Output all the fields in line, other than the join field.  */
8d0e4d
 
8d0e4d
+#define PUT_TAB_CHAR							\
8d0e4d
+  do									\
8d0e4d
+    {									\
8d0e4d
+      (tab != NULL) ?							\
8d0e4d
+	fwrite(tab, sizeof(char), tablen, stdout) : putchar (' ');	\
8d0e4d
+    }									\
8d0e4d
+  while (0)
8d0e4d
+
8d0e4d
 static void
8d0e4d
 prfields (struct line const *line, size_t join_field, size_t autocount)
8d0e4d
 {
8d0e4d
   size_t i;
8d0e4d
   size_t nfields = autoformat ? autocount : line->nfields;
8d0e4d
-  char output_separator = tab < 0 ? ' ' : tab;
8d0e4d
 
8d0e4d
   for (i = 0; i < join_field && i < nfields; ++i)
8d0e4d
     {
8d0e4d
-      putchar (output_separator);
8d0e4d
+      PUT_TAB_CHAR;
8d0e4d
       prfield (i, line);
8d0e4d
     }
8d0e4d
   for (i = join_field + 1; i < nfields; ++i)
8d0e4d
     {
8d0e4d
-      putchar (output_separator);
8d0e4d
+      PUT_TAB_CHAR;
8d0e4d
       prfield (i, line);
8d0e4d
     }
8d0e4d
 }
8d0e4d
@@ -588,7 +835,6 @@ static void
8d0e4d
 prjoin (struct line const *line1, struct line const *line2)
8d0e4d
 {
8d0e4d
   const struct outlist *outlist;
8d0e4d
-  char output_separator = tab < 0 ? ' ' : tab;
8d0e4d
   size_t field;
8d0e4d
   struct line const *line;
8d0e4d
 
8d0e4d
@@ -622,7 +868,7 @@ prjoin (struct line const *line1, struct line const *line2)
8d0e4d
           o = o->next;
8d0e4d
           if (o == NULL)
8d0e4d
             break;
8d0e4d
-          putchar (output_separator);
8d0e4d
+          PUT_TAB_CHAR;
8d0e4d
         }
8d0e4d
       putchar (eolchar);
8d0e4d
     }
8d0e4d
@@ -1099,20 +1345,43 @@ main (int argc, char **argv)
8d0e4d
 
8d0e4d
         case 't':
8d0e4d
           {
8d0e4d
-            unsigned char newtab = optarg[0];
8d0e4d
+            char *newtab = NULL;
8d0e4d
+            size_t newtablen;
8d0e4d
+            newtab = xstrdup (optarg);
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+            if (MB_CUR_MAX > 1)
8d0e4d
+              {
8d0e4d
+                mbstate_t state;
8d0e4d
+
8d0e4d
+                memset (&state, 0, sizeof (mbstate_t));
8d0e4d
+                newtablen = mbrtowc (NULL, newtab,
8d0e4d
+                                     strnlen (newtab, MB_LEN_MAX),
8d0e4d
+                                     &state);
8d0e4d
+                if (newtablen == (size_t) 0
8d0e4d
+                    || newtablen == (size_t) -1
8d0e4d
+                    || newtablen == (size_t) -2)
8d0e4d
+                  newtablen = 1;
8d0e4d
+              }
8d0e4d
+            else
8d0e4d
+#endif
8d0e4d
+              newtablen = 1;
8d0e4d
             if (! newtab)
8d0e4d
-              newtab = '\n'; /* '' => process the whole line.  */
8d0e4d
+              newtab = (char*)"\n"; /* '' => process the whole line.  */
8d0e4d
             else if (optarg[1])
8d0e4d
               {
8d0e4d
-                if (STREQ (optarg, "\\0"))
8d0e4d
-                  newtab = '\0';
8d0e4d
-                else
8d0e4d
-                  die (EXIT_FAILURE, 0, _("multi-character tab %s"),
8d0e4d
-                       quote (optarg));
8d0e4d
+                if (newtablen == 1 && newtab[1])
8d0e4d
+                {
8d0e4d
+                  if (STREQ (newtab, "\\0"))
8d0e4d
+                     newtab[0] = '\0';
8d0e4d
+                }
8d0e4d
+              }
8d0e4d
+            if (tab != NULL && strcmp (tab, newtab))
8d0e4d
+              {
8d0e4d
+                free (newtab);
8d0e4d
+                die (EXIT_FAILURE, 0, _("incompatible tabs"));
8d0e4d
               }
8d0e4d
-            if (0 <= tab && tab != newtab)
8d0e4d
-              die (EXIT_FAILURE, 0, _("incompatible tabs"));
8d0e4d
             tab = newtab;
8d0e4d
+            tablen = newtablen;
8d0e4d
           }
8d0e4d
           break;
8d0e4d
 
8d0e4d
diff --git a/src/pr.c b/src/pr.c
8d0e4d
index 26f221f..633f50e 100644
8d0e4d
--- a/src/pr.c
8d0e4d
+++ b/src/pr.c
8d0e4d
@@ -311,6 +311,24 @@
8d0e4d
 
8d0e4d
 #include <getopt.h>
8d0e4d
 #include <sys/types.h>
8d0e4d
+
8d0e4d
+/* Get MB_LEN_MAX.  */
8d0e4d
+#include <limits.h>
8d0e4d
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
8d0e4d
+   installation; work around this configuration error.  */
8d0e4d
+#if !defined MB_LEN_MAX || MB_LEN_MAX == 1
8d0e4d
+# define MB_LEN_MAX 16
8d0e4d
+#endif
8d0e4d
+
8d0e4d
+/* Get MB_CUR_MAX.  */
8d0e4d
+#include <stdlib.h>
8d0e4d
+
8d0e4d
+/* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
8d0e4d
+/* Get mbstate_t, mbrtowc(), wcwidth().  */
8d0e4d
+#if HAVE_WCHAR_H
8d0e4d
+# include <wchar.h>
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 #include "system.h"
8d0e4d
 #include "die.h"
8d0e4d
 #include "error.h"
8d0e4d
@@ -324,6 +342,18 @@
8d0e4d
 #include "xstrtol.h"
8d0e4d
 #include "xdectoint.h"
8d0e4d
 
8d0e4d
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
8d0e4d
+#if HAVE_MBRTOWC && defined mbstate_t
8d0e4d
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
8d0e4d
+#endif
8d0e4d
+
8d0e4d
+#ifndef HAVE_DECL_WCWIDTH
8d0e4d
+"this configure-time declaration test was not run"
8d0e4d
+#endif
8d0e4d
+#if !HAVE_DECL_WCWIDTH
8d0e4d
+extern int wcwidth ();
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 /* The official name of this program (e.g., no 'g' prefix).  */
8d0e4d
 #define PROGRAM_NAME "pr"
8d0e4d
 
8d0e4d
@@ -416,7 +446,20 @@ struct COLUMN
8d0e4d
 
8d0e4d
 typedef struct COLUMN COLUMN;
8d0e4d
 
8d0e4d
-static int char_to_clump (char c);
8d0e4d
+/* Funtion pointers to switch functions for single byte locale or for
8d0e4d
+   multibyte locale. If multibyte functions do not exist in your sysytem,
8d0e4d
+   these pointers always point the function for single byte locale. */
8d0e4d
+static void (*print_char) (char c);
8d0e4d
+static int (*char_to_clump) (char c);
8d0e4d
+
8d0e4d
+/* Functions for single byte locale. */
8d0e4d
+static void print_char_single (char c);
8d0e4d
+static int char_to_clump_single (char c);
8d0e4d
+
8d0e4d
+/* Functions for multibyte locale. */
8d0e4d
+static void print_char_multi (char c);
8d0e4d
+static int char_to_clump_multi (char c);
8d0e4d
+
8d0e4d
 static bool read_line (COLUMN *p);
8d0e4d
 static bool print_page (void);
8d0e4d
 static bool print_stored (COLUMN *p);
8d0e4d
@@ -428,6 +471,7 @@ static void add_line_number (COLUMN *p);
8d0e4d
 static void getoptnum (const char *n_str, int min, int *num,
8d0e4d
                        const char *errfmt);
8d0e4d
 static void getoptarg (char *arg, char switch_char, char *character,
8d0e4d
+                       int *character_length, int *character_width,
8d0e4d
                        int *number);
8d0e4d
 static void print_files (int number_of_files, char **av);
8d0e4d
 static void init_parameters (int number_of_files);
8d0e4d
@@ -441,7 +485,6 @@ static void store_char (char c);
8d0e4d
 static void pad_down (unsigned int lines);
8d0e4d
 static void read_rest_of_line (COLUMN *p);
8d0e4d
 static void skip_read (COLUMN *p, int column_number);
8d0e4d
-static void print_char (char c);
8d0e4d
 static void cleanup (void);
8d0e4d
 static void print_sep_string (void);
8d0e4d
 static void separator_string (const char *optarg_S);
8d0e4d
@@ -453,7 +496,7 @@ static COLUMN *column_vector;
8d0e4d
    we store the leftmost columns contiguously in buff.
8d0e4d
    To print a line from buff, get the index of the first character
8d0e4d
    from line_vector[i], and print up to line_vector[i + 1]. */
8d0e4d
-static char *buff;
8d0e4d
+static unsigned char *buff;
8d0e4d
 
8d0e4d
 /* Index of the position in buff where the next character
8d0e4d
    will be stored. */
8d0e4d
@@ -557,7 +600,7 @@ static int chars_per_column;
8d0e4d
 static bool untabify_input = false;
8d0e4d
 
8d0e4d
 /* (-e) The input tab character. */
8d0e4d
-static char input_tab_char = '\t';
8d0e4d
+static char input_tab_char[MB_LEN_MAX] = "\t";
8d0e4d
 
8d0e4d
 /* (-e) Tabstops are at chars_per_tab, 2*chars_per_tab, 3*chars_per_tab, ...
8d0e4d
    where the leftmost column is 1. */
8d0e4d
@@ -567,7 +610,10 @@ static int chars_per_input_tab = 8;
8d0e4d
 static bool tabify_output = false;
8d0e4d
 
8d0e4d
 /* (-i) The output tab character. */
8d0e4d
-static char output_tab_char = '\t';
8d0e4d
+static char output_tab_char[MB_LEN_MAX] = "\t";
8d0e4d
+
8d0e4d
+/* (-i) The byte length of output tab character. */
8d0e4d
+static int output_tab_char_length = 1;
8d0e4d
 
8d0e4d
 /* (-i) The width of the output tab. */
8d0e4d
 static int chars_per_output_tab = 8;
8d0e4d
@@ -637,7 +683,13 @@ static int line_number;
8d0e4d
 static bool numbered_lines = false;
8d0e4d
 
8d0e4d
 /* (-n) Character which follows each line number. */
8d0e4d
-static char number_separator = '\t';
8d0e4d
+static char number_separator[MB_LEN_MAX] = "\t";
8d0e4d
+
8d0e4d
+/* (-n) The byte length of the character which follows each line number. */
8d0e4d
+static int number_separator_length = 1;
8d0e4d
+
8d0e4d
+/* (-n) The character width of the character which follows each line number. */
8d0e4d
+static int number_separator_width = 0;
8d0e4d
 
8d0e4d
 /* (-n) line counting starts with 1st line of input file (not with 1st
8d0e4d
    line of 1st page printed). */
8d0e4d
@@ -690,6 +742,7 @@ static bool use_col_separator = false;
8d0e4d
    -a|COLUMN|-m is a 'space' and with the -J option a 'tab'. */
8d0e4d
 static char const *col_sep_string = "";
8d0e4d
 static int col_sep_length = 0;
8d0e4d
+static int col_sep_width = 0;
8d0e4d
 static char *column_separator = (char *) " ";
8d0e4d
 static char *line_separator = (char *) "\t";
8d0e4d
 
8d0e4d
@@ -851,6 +904,13 @@ separator_string (const char *optarg_S)
8d0e4d
     integer_overflow ();
8d0e4d
   col_sep_length = len;
8d0e4d
   col_sep_string = optarg_S;
8d0e4d
+
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+  if (MB_CUR_MAX > 1)
8d0e4d
+    col_sep_width = mbswidth (col_sep_string, 0);
8d0e4d
+  else
8d0e4d
+#endif
8d0e4d
+    col_sep_width = col_sep_length;
8d0e4d
 }
8d0e4d
 
8d0e4d
 int
8d0e4d
@@ -875,6 +935,21 @@ main (int argc, char **argv)
8d0e4d
 
8d0e4d
   atexit (close_stdout);
8d0e4d
 
8d0e4d
+/* Define which functions are used, the ones for single byte locale or the ones
8d0e4d
+   for multibyte locale. */
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+  if (MB_CUR_MAX > 1)
8d0e4d
+    {
8d0e4d
+      print_char = print_char_multi;
8d0e4d
+      char_to_clump = char_to_clump_multi;
8d0e4d
+    }
8d0e4d
+  else
8d0e4d
+#endif
8d0e4d
+    {
8d0e4d
+      print_char = print_char_single;
8d0e4d
+      char_to_clump = char_to_clump_single;
8d0e4d
+    }
8d0e4d
+
8d0e4d
   n_files = 0;
8d0e4d
   file_names = (argc > 1
8d0e4d
                 ? xnmalloc (argc - 1, sizeof (char *))
8d0e4d
@@ -951,8 +1026,12 @@ main (int argc, char **argv)
8d0e4d
           break;
8d0e4d
         case 'e':
8d0e4d
           if (optarg)
8d0e4d
-            getoptarg (optarg, 'e', &input_tab_char,
8d0e4d
-                       &chars_per_input_tab);
8d0e4d
+            {
8d0e4d
+              int dummy_length, dummy_width;
8d0e4d
+
8d0e4d
+              getoptarg (optarg, 'e', input_tab_char, &dummy_length,
8d0e4d
+                         &dummy_width, &chars_per_input_tab);
8d0e4d
+            }
8d0e4d
           /* Could check tab width > 0. */
8d0e4d
           untabify_input = true;
8d0e4d
           break;
8d0e4d
@@ -965,8 +1044,12 @@ main (int argc, char **argv)
8d0e4d
           break;
8d0e4d
         case 'i':
8d0e4d
           if (optarg)
8d0e4d
-            getoptarg (optarg, 'i', &output_tab_char,
8d0e4d
-                       &chars_per_output_tab);
8d0e4d
+            {
8d0e4d
+              int dummy_width;
8d0e4d
+
8d0e4d
+              getoptarg (optarg, 'i', output_tab_char, &output_tab_char_length,
8d0e4d
+                         &dummy_width, &chars_per_output_tab);
8d0e4d
+            }
8d0e4d
           /* Could check tab width > 0. */
8d0e4d
           tabify_output = true;
8d0e4d
           break;
8d0e4d
@@ -984,8 +1067,8 @@ main (int argc, char **argv)
8d0e4d
         case 'n':
8d0e4d
           numbered_lines = true;
8d0e4d
           if (optarg)
8d0e4d
-            getoptarg (optarg, 'n', &number_separator,
8d0e4d
-                       &chars_per_number);
8d0e4d
+            getoptarg (optarg, 'n', number_separator, &number_separator_length,
8d0e4d
+                       &number_separator_width, &chars_per_number);
8d0e4d
           break;
8d0e4d
         case 'N':
8d0e4d
           skip_count = false;
8d0e4d
@@ -1010,6 +1093,7 @@ main (int argc, char **argv)
8d0e4d
           /* Reset an additional input of -s, -S dominates -s */
8d0e4d
           col_sep_string = "";
8d0e4d
           col_sep_length = 0;
8d0e4d
+          col_sep_width = 0;
8d0e4d
           use_col_separator = true;
8d0e4d
           if (optarg)
8d0e4d
             separator_string (optarg);
8d0e4d
@@ -1165,10 +1249,45 @@ getoptnum (const char *n_str, int min, int *num, const char *err)
8d0e4d
    a number. */
8d0e4d
 
8d0e4d
 static void
8d0e4d
-getoptarg (char *arg, char switch_char, char *character, int *number)
8d0e4d
+getoptarg (char *arg, char switch_char, char *character, int *character_length,
8d0e4d
+           int *character_width, int *number)
8d0e4d
 {
8d0e4d
   if (!ISDIGIT (*arg))
8d0e4d
-    *character = *arg++;
8d0e4d
+    {
8d0e4d
+#ifdef HAVE_MBRTOWC
8d0e4d
+      if (MB_CUR_MAX > 1)        /* for multibyte locale. */
8d0e4d
+        {
8d0e4d
+          wchar_t wc;
8d0e4d
+          size_t mblength;
8d0e4d
+          int width;
8d0e4d
+          mbstate_t state = {'\0'};
8d0e4d
+
8d0e4d
+          mblength = mbrtowc (&wc, arg, strnlen(arg, MB_LEN_MAX), &state);
8d0e4d
+
8d0e4d
+          if (mblength == (size_t)-1 || mblength == (size_t)-2)
8d0e4d
+            {
8d0e4d
+              *character_length = 1;
8d0e4d
+              *character_width = 1;
8d0e4d
+            }
8d0e4d
+          else
8d0e4d
+            {
8d0e4d
+              *character_length = (mblength < 1) ? 1 : mblength;
8d0e4d
+              width = wcwidth (wc);
8d0e4d
+              *character_width = (width < 0) ? 0 : width;
8d0e4d
+            }
8d0e4d
+
8d0e4d
+          strncpy (character, arg, *character_length);
8d0e4d
+          arg += *character_length;
8d0e4d
+        }
8d0e4d
+      else                        /* for single byte locale. */
8d0e4d
+#endif
8d0e4d
+        {
8d0e4d
+          *character = *arg++;
8d0e4d
+          *character_length = 1;
8d0e4d
+          *character_width = 1;
8d0e4d
+        }
8d0e4d
+    }
8d0e4d
+
8d0e4d
   if (*arg)
8d0e4d
     {
8d0e4d
       long int tmp_long;
8d0e4d
@@ -1190,6 +1309,11 @@ static void
8d0e4d
 init_parameters (int number_of_files)
8d0e4d
 {
8d0e4d
   int chars_used_by_number = 0;
8d0e4d
+  int mb_len = 1;
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+  if (MB_CUR_MAX > 1)
8d0e4d
+    mb_len = MB_LEN_MAX;
8d0e4d
+#endif
8d0e4d
 
8d0e4d
   lines_per_body = lines_per_page - lines_per_header - lines_per_footer;
8d0e4d
   if (lines_per_body <= 0)
8d0e4d
@@ -1227,7 +1351,7 @@ init_parameters (int number_of_files)
8d0e4d
           else
8d0e4d
             col_sep_string = column_separator;
8d0e4d
 
8d0e4d
-          col_sep_length = 1;
8d0e4d
+          col_sep_length = col_sep_width = 1;
8d0e4d
           use_col_separator = true;
8d0e4d
         }
8d0e4d
       /* It's rather pointless to define a TAB separator with column
8d0e4d
@@ -1257,11 +1381,11 @@ init_parameters (int number_of_files)
8d0e4d
              + TAB_WIDTH (chars_per_input_tab, chars_per_number);   */
8d0e4d
 
8d0e4d
       /* Estimate chars_per_text without any margin and keep it constant. */
8d0e4d
-      if (number_separator == '\t')
8d0e4d
+      if (number_separator[0] == '\t')
8d0e4d
         number_width = (chars_per_number
8d0e4d
                         + TAB_WIDTH (chars_per_default_tab, chars_per_number));
8d0e4d
       else
8d0e4d
-        number_width = chars_per_number + 1;
8d0e4d
+        number_width = chars_per_number + number_separator_width;
8d0e4d
 
8d0e4d
       /* The number is part of the column width unless we are
8d0e4d
          printing files in parallel. */
8d0e4d
@@ -1270,7 +1394,7 @@ init_parameters (int number_of_files)
8d0e4d
     }
8d0e4d
 
8d0e4d
   int sep_chars, useful_chars;
8d0e4d
-  if (INT_MULTIPLY_WRAPV (columns - 1, col_sep_length, &sep_chars))
8d0e4d
+  if (INT_MULTIPLY_WRAPV (columns - 1, col_sep_width, &sep_chars))
8d0e4d
     sep_chars = INT_MAX;
8d0e4d
   if (INT_SUBTRACT_WRAPV (chars_per_line - chars_used_by_number, sep_chars,
8d0e4d
                           &useful_chars))
8d0e4d
@@ -1293,7 +1417,7 @@ init_parameters (int number_of_files)
8d0e4d
      We've to use 8 as the lower limit, if we use chars_per_default_tab = 8
8d0e4d
      to expand a tab which is not an input_tab-char. */
8d0e4d
   free (clump_buff);
8d0e4d
-  clump_buff = xmalloc (MAX (8, chars_per_input_tab));
8d0e4d
+  clump_buff = xmalloc (mb_len * MAX (8, chars_per_input_tab));
8d0e4d
 }
8d0e4d
 
8d0e4d
 /* Open the necessary files,
8d0e4d
@@ -1399,7 +1523,7 @@ init_funcs (void)
8d0e4d
 
8d0e4d
   /* Enlarge p->start_position of first column to use the same form of
8d0e4d
      padding_not_printed with all columns. */
8d0e4d
-  h = h + col_sep_length;
8d0e4d
+  h = h + col_sep_width;
8d0e4d
 
8d0e4d
   /* This loop takes care of all but the rightmost column. */
8d0e4d
 
8d0e4d
@@ -1433,7 +1557,7 @@ init_funcs (void)
8d0e4d
         }
8d0e4d
       else
8d0e4d
         {
8d0e4d
-          h = h_next + col_sep_length;
8d0e4d
+          h = h_next + col_sep_width;
8d0e4d
           h_next = h + chars_per_column;
8d0e4d
         }
8d0e4d
     }
8d0e4d
@@ -1724,9 +1848,9 @@ static void
8d0e4d
 align_column (COLUMN *p)
8d0e4d
 {
8d0e4d
   padding_not_printed = p->start_position;
8d0e4d
-  if (col_sep_length < padding_not_printed)
8d0e4d
+  if (col_sep_width < padding_not_printed)
8d0e4d
     {
8d0e4d
-      pad_across_to (padding_not_printed - col_sep_length);
8d0e4d
+      pad_across_to (padding_not_printed - col_sep_width);
8d0e4d
       padding_not_printed = ANYWHERE;
8d0e4d
     }
8d0e4d
 
8d0e4d
@@ -2001,13 +2125,13 @@ store_char (char c)
8d0e4d
       /* May be too generous. */
8d0e4d
       buff = X2REALLOC (buff, &buff_allocated);
8d0e4d
     }
8d0e4d
-  buff[buff_current++] = c;
8d0e4d
+  buff[buff_current++] = (unsigned char) c;
8d0e4d
 }
8d0e4d
 
8d0e4d
 static void
8d0e4d
 add_line_number (COLUMN *p)
8d0e4d
 {
8d0e4d
-  int i;
8d0e4d
+  int i, j;
8d0e4d
   char *s;
8d0e4d
   int num_width;
8d0e4d
 
8d0e4d
@@ -2024,22 +2148,24 @@ add_line_number (COLUMN *p)
8d0e4d
       /* Tabification is assumed for multiple columns, also for n-separators,
8d0e4d
          but 'default n-separator = TAB' hasn't been given priority over
8d0e4d
          equal column_width also specified by POSIX. */
8d0e4d
-      if (number_separator == '\t')
8d0e4d
+      if (number_separator[0] == '\t')
8d0e4d
         {
8d0e4d
           i = number_width - chars_per_number;
8d0e4d
           while (i-- > 0)
8d0e4d
             (p->char_func) (' ');
8d0e4d
         }
8d0e4d
       else
8d0e4d
-        (p->char_func) (number_separator);
8d0e4d
+        for (j = 0; j < number_separator_length; j++)
8d0e4d
+          (p->char_func) (number_separator[j]);
8d0e4d
     }
8d0e4d
   else
8d0e4d
     /* To comply with POSIX, we avoid any expansion of default TAB
8d0e4d
        separator with a single column output. No column_width requirement
8d0e4d
        has to be considered. */
8d0e4d
     {
8d0e4d
-      (p->char_func) (number_separator);
8d0e4d
-      if (number_separator == '\t')
8d0e4d
+      for (j = 0; j < number_separator_length; j++)
8d0e4d
+        (p->char_func) (number_separator[j]);
8d0e4d
+      if (number_separator[0] == '\t')
8d0e4d
         output_position = POS_AFTER_TAB (chars_per_output_tab,
8d0e4d
                           output_position);
8d0e4d
     }
8d0e4d
@@ -2198,7 +2324,7 @@ print_white_space (void)
8d0e4d
   while (goal - h_old > 1
8d0e4d
          && (h_new = POS_AFTER_TAB (chars_per_output_tab, h_old)) <= goal)
8d0e4d
     {
8d0e4d
-      putchar (output_tab_char);
8d0e4d
+      fwrite (output_tab_char, sizeof(char), output_tab_char_length, stdout);
8d0e4d
       h_old = h_new;
8d0e4d
     }
8d0e4d
   while (++h_old <= goal)
8d0e4d
@@ -2218,6 +2344,7 @@ print_sep_string (void)
8d0e4d
 {
8d0e4d
   char const *s = col_sep_string;
8d0e4d
   int l = col_sep_length;
8d0e4d
+  int not_space_flag;
8d0e4d
 
8d0e4d
   if (separators_not_printed <= 0)
8d0e4d
     {
8d0e4d
@@ -2229,6 +2356,7 @@ print_sep_string (void)
8d0e4d
     {
8d0e4d
       for (; separators_not_printed > 0; --separators_not_printed)
8d0e4d
         {
8d0e4d
+          not_space_flag = 0;
8d0e4d
           while (l-- > 0)
8d0e4d
             {
8d0e4d
               /* 3 types of sep_strings: spaces only, spaces and chars,
8d0e4d
@@ -2242,12 +2370,15 @@ print_sep_string (void)
8d0e4d
                 }
8d0e4d
               else
8d0e4d
                 {
8d0e4d
+                  not_space_flag = 1;
8d0e4d
                   if (spaces_not_printed > 0)
8d0e4d
                     print_white_space ();
8d0e4d
                   putchar (*s++);
8d0e4d
-                  ++output_position;
8d0e4d
                 }
8d0e4d
             }
8d0e4d
+          if (not_space_flag)
8d0e4d
+            output_position += col_sep_width;
8d0e4d
+
8d0e4d
           /* sep_string ends with some spaces */
8d0e4d
           if (spaces_not_printed > 0)
8d0e4d
             print_white_space ();
8d0e4d
@@ -2275,7 +2406,7 @@ print_clump (COLUMN *p, int n, char *clump)
8d0e4d
    required number of tabs and spaces. */
8d0e4d
 
8d0e4d
 static void
8d0e4d
-print_char (char c)
8d0e4d
+print_char_single (char c)
8d0e4d
 {
8d0e4d
   if (tabify_output)
8d0e4d
     {
8d0e4d
@@ -2299,6 +2430,74 @@ print_char (char c)
8d0e4d
   putchar (c);
8d0e4d
 }
8d0e4d
 
8d0e4d
+#ifdef HAVE_MBRTOWC
8d0e4d
+static void
8d0e4d
+print_char_multi (char c)
8d0e4d
+{
8d0e4d
+  static size_t mbc_pos = 0;
8d0e4d
+  static char mbc[MB_LEN_MAX] = {'\0'};
8d0e4d
+  static mbstate_t state = {'\0'};
8d0e4d
+  mbstate_t state_bak;
8d0e4d
+  wchar_t wc;
8d0e4d
+  size_t mblength;
8d0e4d
+  int width;
8d0e4d
+
8d0e4d
+  if (tabify_output)
8d0e4d
+    {
8d0e4d
+      state_bak = state;
8d0e4d
+      mbc[mbc_pos++] = c;
8d0e4d
+      mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
8d0e4d
+
8d0e4d
+      while (mbc_pos > 0)
8d0e4d
+        {
8d0e4d
+          switch (mblength)
8d0e4d
+            {
8d0e4d
+            case (size_t)-2:
8d0e4d
+              state = state_bak;
8d0e4d
+              return;
8d0e4d
+
8d0e4d
+            case (size_t)-1:
8d0e4d
+              state = state_bak;
8d0e4d
+              ++output_position;
8d0e4d
+              putchar (mbc[0]);
8d0e4d
+              memmove (mbc, mbc + 1, MB_CUR_MAX - 1);
8d0e4d
+              --mbc_pos;
8d0e4d
+              break;
8d0e4d
+
8d0e4d
+            case 0:
8d0e4d
+              mblength = 1;
8d0e4d
+
8d0e4d
+            default:
8d0e4d
+              if (wc == L' ')
8d0e4d
+                {
8d0e4d
+                  memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
8d0e4d
+                  --mbc_pos;
8d0e4d
+                  ++spaces_not_printed;
8d0e4d
+                  return;
8d0e4d
+                }
8d0e4d
+              else if (spaces_not_printed > 0)
8d0e4d
+                print_white_space ();
8d0e4d
+
8d0e4d
+              /* Nonprintables are assumed to have width 0, except L'\b'. */
8d0e4d
+              if ((width = wcwidth (wc)) < 1)
8d0e4d
+                {
8d0e4d
+                  if (wc == L'\b')
8d0e4d
+                    --output_position;
8d0e4d
+                }
8d0e4d
+              else
8d0e4d
+                output_position += width;
8d0e4d
+
8d0e4d
+              fwrite (mbc, sizeof(char), mblength, stdout);
8d0e4d
+              memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
8d0e4d
+              mbc_pos -= mblength;
8d0e4d
+            }
8d0e4d
+        }
8d0e4d
+      return;
8d0e4d
+    }
8d0e4d
+  putchar (c);
8d0e4d
+}
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 /* Skip to page PAGE before printing.
8d0e4d
    PAGE may be larger than total number of pages. */
8d0e4d
 
8d0e4d
@@ -2476,9 +2675,9 @@ read_line (COLUMN *p)
8d0e4d
           align_empty_cols = false;
8d0e4d
         }
8d0e4d
 
8d0e4d
-      if (col_sep_length < padding_not_printed)
8d0e4d
+      if (col_sep_width < padding_not_printed)
8d0e4d
         {
8d0e4d
-          pad_across_to (padding_not_printed - col_sep_length);
8d0e4d
+          pad_across_to (padding_not_printed - col_sep_width);
8d0e4d
           padding_not_printed = ANYWHERE;
8d0e4d
         }
8d0e4d
 
8d0e4d
@@ -2547,7 +2746,7 @@ print_stored (COLUMN *p)
8d0e4d
   COLUMN *q;
8d0e4d
 
8d0e4d
   int line = p->current_line++;
8d0e4d
-  char *first = &buff[line_vector[line]];
8d0e4d
+  unsigned char *first = &buff[line_vector[line]];
8d0e4d
   /* FIXME
8d0e4d
      UMR: Uninitialized memory read:
8d0e4d
      * This is occurring while in:
8d0e4d
@@ -2559,7 +2758,7 @@ print_stored (COLUMN *p)
8d0e4d
      xmalloc        [xmalloc.c:94]
8d0e4d
      init_store_cols [pr.c:1648]
8d0e4d
      */
8d0e4d
-  char *last = &buff[line_vector[line + 1]];
8d0e4d
+  unsigned char *last = &buff[line_vector[line + 1]];
8d0e4d
 
8d0e4d
   pad_vertically = true;
8d0e4d
 
8d0e4d
@@ -2579,9 +2778,9 @@ print_stored (COLUMN *p)
8d0e4d
         }
8d0e4d
     }
8d0e4d
 
8d0e4d
-  if (col_sep_length < padding_not_printed)
8d0e4d
+  if (col_sep_width < padding_not_printed)
8d0e4d
     {
8d0e4d
-      pad_across_to (padding_not_printed - col_sep_length);
8d0e4d
+      pad_across_to (padding_not_printed - col_sep_width);
8d0e4d
       padding_not_printed = ANYWHERE;
8d0e4d
     }
8d0e4d
 
8d0e4d
@@ -2594,8 +2793,8 @@ print_stored (COLUMN *p)
8d0e4d
   if (spaces_not_printed == 0)
8d0e4d
     {
8d0e4d
       output_position = p->start_position + end_vector[line];
8d0e4d
-      if (p->start_position - col_sep_length == chars_per_margin)
8d0e4d
-        output_position -= col_sep_length;
8d0e4d
+      if (p->start_position - col_sep_width == chars_per_margin)
8d0e4d
+        output_position -= col_sep_width;
8d0e4d
     }
8d0e4d
 
8d0e4d
   return true;
8d0e4d
@@ -2614,7 +2813,7 @@ print_stored (COLUMN *p)
8d0e4d
    number of characters is 1.) */
8d0e4d
 
8d0e4d
 static int
8d0e4d
-char_to_clump (char c)
8d0e4d
+char_to_clump_single (char c)
8d0e4d
 {
8d0e4d
   unsigned char uc = c;
8d0e4d
   char *s = clump_buff;
8d0e4d
@@ -2624,10 +2823,10 @@ char_to_clump (char c)
8d0e4d
   int chars;
8d0e4d
   int chars_per_c = 8;
8d0e4d
 
8d0e4d
-  if (c == input_tab_char)
8d0e4d
+  if (c == input_tab_char[0])
8d0e4d
     chars_per_c = chars_per_input_tab;
8d0e4d
 
8d0e4d
-  if (c == input_tab_char || c == '\t')
8d0e4d
+  if (c == input_tab_char[0] || c == '\t')
8d0e4d
     {
8d0e4d
       width = TAB_WIDTH (chars_per_c, input_position);
8d0e4d
 
8d0e4d
@@ -2708,6 +2907,164 @@ char_to_clump (char c)
8d0e4d
   return chars;
8d0e4d
 }
8d0e4d
 
8d0e4d
+#ifdef HAVE_MBRTOWC
8d0e4d
+static int
8d0e4d
+char_to_clump_multi (char c)
8d0e4d
+{
8d0e4d
+  static size_t mbc_pos = 0;
8d0e4d
+  static char mbc[MB_LEN_MAX] = {'\0'};
8d0e4d
+  static mbstate_t state = {'\0'};
8d0e4d
+  mbstate_t state_bak;
8d0e4d
+  wchar_t wc;
8d0e4d
+  size_t mblength;
8d0e4d
+  int wc_width;
8d0e4d
+  register char *s = clump_buff;
8d0e4d
+  register int i, j;
8d0e4d
+  char esc_buff[4];
8d0e4d
+  int width;
8d0e4d
+  int chars;
8d0e4d
+  int chars_per_c = 8;
8d0e4d
+
8d0e4d
+  state_bak = state;
8d0e4d
+  mbc[mbc_pos++] = c;
8d0e4d
+  mblength = mbrtowc (&wc, mbc, mbc_pos, &state);
8d0e4d
+
8d0e4d
+  width = 0;
8d0e4d
+  chars = 0;
8d0e4d
+  while (mbc_pos > 0)
8d0e4d
+    {
8d0e4d
+      switch (mblength)
8d0e4d
+        {
8d0e4d
+        case (size_t)-2:
8d0e4d
+          state = state_bak;
8d0e4d
+          return 0;
8d0e4d
+
8d0e4d
+        case (size_t)-1:
8d0e4d
+          state = state_bak;
8d0e4d
+          mblength = 1;
8d0e4d
+
8d0e4d
+          if (use_esc_sequence || use_cntrl_prefix)
8d0e4d
+            {
8d0e4d
+              width = +4;
8d0e4d
+              chars = +4;
8d0e4d
+              *s++ = '\\';
8d0e4d
+              sprintf (esc_buff, "%03o", (unsigned char) mbc[0]);
8d0e4d
+              for (i = 0; i <= 2; ++i)
8d0e4d
+                *s++ = (int) esc_buff[i];
8d0e4d
+            }
8d0e4d
+          else
8d0e4d
+            {
8d0e4d
+              width += 1;
8d0e4d
+              chars += 1;
8d0e4d
+              *s++ = mbc[0];
8d0e4d
+            }
8d0e4d
+          break;
8d0e4d
+
8d0e4d
+        case 0:
8d0e4d
+          mblength = 1;
8d0e4d
+                /* Fall through */
8d0e4d
+
8d0e4d
+        default:
8d0e4d
+          if (memcmp (mbc, input_tab_char, mblength) == 0)
8d0e4d
+            chars_per_c = chars_per_input_tab;
8d0e4d
+
8d0e4d
+          if (memcmp (mbc, input_tab_char, mblength) == 0 || c == '\t')
8d0e4d
+            {
8d0e4d
+              int  width_inc;
8d0e4d
+
8d0e4d
+              width_inc = TAB_WIDTH (chars_per_c, input_position);
8d0e4d
+              width += width_inc;
8d0e4d
+
8d0e4d
+              if (untabify_input)
8d0e4d
+                {
8d0e4d
+                  for (i = width_inc; i; --i)
8d0e4d
+                    *s++ = ' ';
8d0e4d
+                  chars += width_inc;
8d0e4d
+                }
8d0e4d
+              else
8d0e4d
+                {
8d0e4d
+                  for (i = 0; i <  mblength; i++)
8d0e4d
+                    *s++ = mbc[i];
8d0e4d
+                  chars += mblength;
8d0e4d
+                }
8d0e4d
+            }
8d0e4d
+          else if ((wc_width = wcwidth (wc)) < 1)
8d0e4d
+            {
8d0e4d
+              if (use_esc_sequence)
8d0e4d
+                {
8d0e4d
+                  for (i = 0; i < mblength; i++)
8d0e4d
+                    {
8d0e4d
+                      width += 4;
8d0e4d
+                      chars += 4;
8d0e4d
+                      *s++ = '\\';
8d0e4d
+                      sprintf (esc_buff, "%03o", (unsigned char) mbc[i]);
8d0e4d
+                      for (j = 0; j <= 2; ++j)
8d0e4d
+                        *s++ = (int) esc_buff[j];
8d0e4d
+                    }
8d0e4d
+                }
8d0e4d
+              else if (use_cntrl_prefix)
8d0e4d
+                {
8d0e4d
+                  if (wc < 0200)
8d0e4d
+                    {
8d0e4d
+                      width += 2;
8d0e4d
+                      chars += 2;
8d0e4d
+                      *s++ = '^';
8d0e4d
+                      *s++ = wc ^ 0100;
8d0e4d
+                    }
8d0e4d
+                  else
8d0e4d
+                    {
8d0e4d
+                      for (i = 0; i < mblength; i++)
8d0e4d
+                        {
8d0e4d
+                          width += 4;
8d0e4d
+                          chars += 4;
8d0e4d
+                          *s++ = '\\';
8d0e4d
+                          sprintf (esc_buff, "%03o", (unsigned char) mbc[i]);
8d0e4d
+                          for (j = 0; j <= 2; ++j)
8d0e4d
+                            *s++ = (int) esc_buff[j];
8d0e4d
+                        }
8d0e4d
+                    }
8d0e4d
+                }
8d0e4d
+              else if (wc == L'\b')
8d0e4d
+                {
8d0e4d
+                  width += -1;
8d0e4d
+                  chars += 1;
8d0e4d
+                  *s++ = c;
8d0e4d
+                }
8d0e4d
+              else
8d0e4d
+                {
8d0e4d
+                  width += 0;
8d0e4d
+                  chars += mblength;
8d0e4d
+                  for (i = 0; i < mblength; i++)
8d0e4d
+                    *s++ = mbc[i];
8d0e4d
+                }
8d0e4d
+            }
8d0e4d
+          else
8d0e4d
+            {
8d0e4d
+              width += wc_width;
8d0e4d
+              chars += mblength;
8d0e4d
+              for (i = 0; i < mblength; i++)
8d0e4d
+                *s++ = mbc[i];
8d0e4d
+            }
8d0e4d
+        }
8d0e4d
+      memmove (mbc, mbc + mblength, MB_CUR_MAX - mblength);
8d0e4d
+      mbc_pos -= mblength;
8d0e4d
+    }
8d0e4d
+
8d0e4d
+  /* Too many backspaces must put us in position 0 -- never negative. */
8d0e4d
+  if (width < 0 && input_position == 0)
8d0e4d
+    {
8d0e4d
+      chars = 0;
8d0e4d
+      input_position = 0;
8d0e4d
+    }
8d0e4d
+  else if (width < 0 && input_position <= -width)
8d0e4d
+    input_position = 0;
8d0e4d
+  else
8d0e4d
+   input_position += width;
8d0e4d
+
8d0e4d
+  return chars;
8d0e4d
+}
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 /* We've just printed some files and need to clean up things before
8d0e4d
    looking for more options and printing the next batch of files.
8d0e4d
 
8d0e4d
diff --git a/src/sort.c b/src/sort.c
8d0e4d
index 6d2eec5..f189a0d 100644
8d0e4d
--- a/src/sort.c
8d0e4d
+++ b/src/sort.c
8d0e4d
@@ -29,6 +29,14 @@
8d0e4d
 #include <sys/wait.h>
8d0e4d
 #include <signal.h>
8d0e4d
 #include <assert.h>
8d0e4d
+#if HAVE_WCHAR_H
8d0e4d
+# include <wchar.h>
8d0e4d
+#endif
8d0e4d
+/* Get isw* functions. */
8d0e4d
+#if HAVE_WCTYPE_H
8d0e4d
+# include <wctype.h>
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 #include "system.h"
8d0e4d
 #include "argmatch.h"
8d0e4d
 #include "die.h"
8d0e4d
@@ -169,14 +177,39 @@ static int decimal_point;
8d0e4d
 /* Thousands separator; if -1, then there isn't one.  */
8d0e4d
 static int thousands_sep;
8d0e4d
 
8d0e4d
+/* True if -f is specified.  */
8d0e4d
+static bool folding;
8d0e4d
+
8d0e4d
 /* Nonzero if the corresponding locales are hard.  */
8d0e4d
 static bool hard_LC_COLLATE;
8d0e4d
-#if HAVE_NL_LANGINFO
8d0e4d
+#if HAVE_LANGINFO_CODESET
8d0e4d
 static bool hard_LC_TIME;
8d0e4d
 #endif
8d0e4d
 
8d0e4d
 #define NONZERO(x) ((x) != 0)
8d0e4d
 
8d0e4d
+/* get a multibyte character's byte length. */
8d0e4d
+#define GET_BYTELEN_OF_CHAR(LIM, PTR, MBLENGTH, STATE)                        \
8d0e4d
+  do                                                                        \
8d0e4d
+    {                                                                        \
8d0e4d
+      wchar_t wc;                                                        \
8d0e4d
+      mbstate_t state_bak;                                                \
8d0e4d
+                                                                        \
8d0e4d
+      state_bak = STATE;                                                \
8d0e4d
+      mblength = mbrtowc (&wc, PTR, LIM - PTR, &STATE);                        \
8d0e4d
+                                                                        \
8d0e4d
+      switch (MBLENGTH)                                                        \
8d0e4d
+        {                                                                \
8d0e4d
+        case (size_t)-1:                                                \
8d0e4d
+        case (size_t)-2:                                                \
8d0e4d
+          STATE = state_bak;                                                \
8d0e4d
+                /* Fall through. */                                        \
8d0e4d
+        case 0:                                                                \
8d0e4d
+          MBLENGTH = 1;                                                        \
8d0e4d
+      }                                                                        \
8d0e4d
+    }                                                                        \
8d0e4d
+  while (0)
8d0e4d
+
8d0e4d
 /* The kind of blanks for '-b' to skip in various options. */
8d0e4d
 enum blanktype { bl_start, bl_end, bl_both };
8d0e4d
 
8d0e4d
@@ -350,13 +383,11 @@ static bool reverse;
8d0e4d
    they were read if all keys compare equal.  */
8d0e4d
 static bool stable;
8d0e4d
 
8d0e4d
-/* If TAB has this value, blanks separate fields.  */
8d0e4d
-enum { TAB_DEFAULT = CHAR_MAX + 1 };
8d0e4d
-
8d0e4d
-/* Tab character separating fields.  If TAB_DEFAULT, then fields are
8d0e4d
+/* Tab character separating fields.  If tab_length is 0, then fields are
8d0e4d
    separated by the empty string between a non-blank character and a blank
8d0e4d
    character. */
8d0e4d
-static int tab = TAB_DEFAULT;
8d0e4d
+static char tab[MB_LEN_MAX + 1];
8d0e4d
+static size_t tab_length = 0;
8d0e4d
 
8d0e4d
 /* Flag to remove consecutive duplicate lines from the output.
8d0e4d
    Only the last of a sequence of equal lines will be output. */
8d0e4d
@@ -814,6 +845,46 @@ reap_all (void)
8d0e4d
     reap (-1);
8d0e4d
 }
8d0e4d
 
8d0e4d
+/* Function pointers. */
8d0e4d
+static void
8d0e4d
+(*inittables) (void);
8d0e4d
+static char *
8d0e4d
+(*begfield) (const struct line*, const struct keyfield *);
8d0e4d
+static char *
8d0e4d
+(*limfield) (const struct line*, const struct keyfield *);
8d0e4d
+static void
8d0e4d
+(*skipblanks) (char **ptr, char *lim);
8d0e4d
+static int
8d0e4d
+(*getmonth) (char const *, size_t, char **);
8d0e4d
+static int
8d0e4d
+(*keycompare) (const struct line *, const struct line *);
8d0e4d
+static int
8d0e4d
+(*numcompare) (const char *, const char *);
8d0e4d
+
8d0e4d
+/* Test for white space multibyte character.
8d0e4d
+   Set LENGTH the byte length of investigated multibyte character. */
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+static int
8d0e4d
+ismbblank (const char *str, size_t len, size_t *length)
8d0e4d
+{
8d0e4d
+  size_t mblength;
8d0e4d
+  wchar_t wc;
8d0e4d
+  mbstate_t state;
8d0e4d
+
8d0e4d
+  memset (&state, '\0', sizeof(mbstate_t));
8d0e4d
+  mblength = mbrtowc (&wc, str, len, &state);
8d0e4d
+
8d0e4d
+  if (mblength == (size_t)-1 || mblength == (size_t)-2)
8d0e4d
+    {
8d0e4d
+      *length = 1;
8d0e4d
+      return 0;
8d0e4d
+    }
8d0e4d
+
8d0e4d
+  *length = (mblength < 1) ? 1 : mblength;
8d0e4d
+  return iswblank (wc) || wc == '\n';
8d0e4d
+}
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 /* Clean up any remaining temporary files.  */
8d0e4d
 
8d0e4d
 static void
8d0e4d
@@ -1264,7 +1335,7 @@ zaptemp (char const *name)
8d0e4d
   free (node);
8d0e4d
 }
8d0e4d
 
8d0e4d
-#if HAVE_NL_LANGINFO
8d0e4d
+#if HAVE_LANGINFO_CODESET
8d0e4d
 
8d0e4d
 static int
8d0e4d
 struct_month_cmp (void const *m1, void const *m2)
8d0e4d
@@ -1279,7 +1350,7 @@ struct_month_cmp (void const *m1, void const *m2)
8d0e4d
 /* Initialize the character class tables. */
8d0e4d
 
8d0e4d
 static void
8d0e4d
-inittables (void)
8d0e4d
+inittables_uni (void)
8d0e4d
 {
8d0e4d
   size_t i;
8d0e4d
 
8d0e4d
@@ -1291,7 +1362,7 @@ inittables (void)
8d0e4d
       fold_toupper[i] = toupper (i);
8d0e4d
     }
8d0e4d
 
8d0e4d
-#if HAVE_NL_LANGINFO
8d0e4d
+#if HAVE_LANGINFO_CODESET
8d0e4d
   /* If we're not in the "C" locale, read different names for months.  */
8d0e4d
   if (hard_LC_TIME)
8d0e4d
     {
8d0e4d
@@ -1373,6 +1444,84 @@ specify_nmerge (int oi, char c, char const *s)
8d0e4d
     xstrtol_fatal (e, oi, c, long_options, s);
8d0e4d
 }
8d0e4d
 
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+static void
8d0e4d
+inittables_mb (void)
8d0e4d
+{
8d0e4d
+  int i, j, k, l;
8d0e4d
+  char *name, *s, *lc_time, *lc_ctype;
8d0e4d
+  size_t s_len, mblength;
8d0e4d
+  char mbc[MB_LEN_MAX];
8d0e4d
+  wchar_t wc, pwc;
8d0e4d
+  mbstate_t state_mb, state_wc;
8d0e4d
+
8d0e4d
+  lc_time = setlocale (LC_TIME, "");
8d0e4d
+  if (lc_time)
8d0e4d
+    lc_time = xstrdup (lc_time);
8d0e4d
+
8d0e4d
+  lc_ctype = setlocale (LC_CTYPE, "");
8d0e4d
+  if (lc_ctype)
8d0e4d
+    lc_ctype = xstrdup (lc_ctype);
8d0e4d
+
8d0e4d
+  if (lc_time && lc_ctype)
8d0e4d
+    /* temporarily set LC_CTYPE to match LC_TIME, so that we can convert
8d0e4d
+     * the names of months to upper case */
8d0e4d
+    setlocale (LC_CTYPE, lc_time);
8d0e4d
+
8d0e4d
+  for (i = 0; i < MONTHS_PER_YEAR; i++)
8d0e4d
+    {
8d0e4d
+      s = (char *) nl_langinfo (ABMON_1 + i);
8d0e4d
+      s_len = strlen (s);
8d0e4d
+      monthtab[i].name = name = (char *) xmalloc (s_len + 1);
8d0e4d
+      monthtab[i].val = i + 1;
8d0e4d
+
8d0e4d
+      memset (&state_mb, '\0', sizeof (mbstate_t));
8d0e4d
+      memset (&state_wc, '\0', sizeof (mbstate_t));
8d0e4d
+
8d0e4d
+      for (j = 0; j < s_len;)
8d0e4d
+        {
8d0e4d
+          if (!ismbblank (s + j, s_len - j, &mblength))
8d0e4d
+            break;
8d0e4d
+          j += mblength;
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      for (k = 0; j < s_len;)
8d0e4d
+        {
8d0e4d
+          mblength = mbrtowc (&wc, (s + j), (s_len - j), &state_mb);
8d0e4d
+          assert (mblength != (size_t)-1 && mblength != (size_t)-2);
8d0e4d
+          if (mblength == 0)
8d0e4d
+            break;
8d0e4d
+
8d0e4d
+          pwc = towupper (wc);
8d0e4d
+          if (pwc == wc)
8d0e4d
+            {
8d0e4d
+              memcpy (mbc, s + j, mblength);
8d0e4d
+              j += mblength;
8d0e4d
+            }
8d0e4d
+          else
8d0e4d
+            {
8d0e4d
+              j += mblength;
8d0e4d
+              mblength = wcrtomb (mbc, pwc, &state_wc);
8d0e4d
+              assert (mblength != (size_t)0 && mblength != (size_t)-1);
8d0e4d
+            }
8d0e4d
+
8d0e4d
+          for (l = 0; l < mblength; l++)
8d0e4d
+            name[k++] = mbc[l];
8d0e4d
+        }
8d0e4d
+      name[k] = '\0';
8d0e4d
+    }
8d0e4d
+  qsort ((void *) monthtab, MONTHS_PER_YEAR,
8d0e4d
+      sizeof (struct month), struct_month_cmp);
8d0e4d
+
8d0e4d
+  if (lc_time && lc_ctype)
8d0e4d
+    /* restore the original locales */
8d0e4d
+    setlocale (LC_CTYPE, lc_ctype);
8d0e4d
+
8d0e4d
+  free (lc_ctype);
8d0e4d
+  free (lc_time);
8d0e4d
+}
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 /* Specify the amount of main memory to use when sorting.  */
8d0e4d
 static void
8d0e4d
 specify_sort_size (int oi, char c, char const *s)
8d0e4d
@@ -1604,7 +1753,7 @@ buffer_linelim (struct buffer const *buf)
8d0e4d
    by KEY in LINE. */
8d0e4d
 
8d0e4d
 static char *
8d0e4d
-begfield (struct line const *line, struct keyfield const *key)
8d0e4d
+begfield_uni (const struct line *line, const struct keyfield *key)
8d0e4d
 {
8d0e4d
   char *ptr = line->text, *lim = ptr + line->length - 1;
8d0e4d
   size_t sword = key->sword;
8d0e4d
@@ -1613,10 +1762,10 @@ begfield (struct line const *line, struct keyfield const *key)
8d0e4d
   /* The leading field separator itself is included in a field when -t
8d0e4d
      is absent.  */
8d0e4d
 
8d0e4d
-  if (tab != TAB_DEFAULT)
8d0e4d
+  if (tab_length)
8d0e4d
     while (ptr < lim && sword--)
8d0e4d
       {
8d0e4d
-        while (ptr < lim && *ptr != tab)
8d0e4d
+        while (ptr < lim && *ptr != tab[0])
8d0e4d
           ++ptr;
8d0e4d
         if (ptr < lim)
8d0e4d
           ++ptr;
8d0e4d
@@ -1642,11 +1791,70 @@ begfield (struct line const *line, struct keyfield const *key)
8d0e4d
   return ptr;
8d0e4d
 }
8d0e4d
 
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+static char *
8d0e4d
+begfield_mb (const struct line *line, const struct keyfield *key)
8d0e4d
+{
8d0e4d
+  int i;
8d0e4d
+  char *ptr = line->text, *lim = ptr + line->length - 1;
8d0e4d
+  size_t sword = key->sword;
8d0e4d
+  size_t schar = key->schar;
8d0e4d
+  size_t mblength;
8d0e4d
+  mbstate_t state;
8d0e4d
+
8d0e4d
+  memset (&state, '\0', sizeof(mbstate_t));
8d0e4d
+
8d0e4d
+  if (tab_length)
8d0e4d
+    while (ptr < lim && sword--)
8d0e4d
+      {
8d0e4d
+        while (ptr < lim && memcmp (ptr, tab, tab_length) != 0)
8d0e4d
+          {
8d0e4d
+            GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
8d0e4d
+            ptr += mblength;
8d0e4d
+          }
8d0e4d
+        if (ptr < lim)
8d0e4d
+          {
8d0e4d
+            GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
8d0e4d
+            ptr += mblength;
8d0e4d
+          }
8d0e4d
+      }
8d0e4d
+  else
8d0e4d
+    while (ptr < lim && sword--)
8d0e4d
+      {
8d0e4d
+        while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
8d0e4d
+          ptr += mblength;
8d0e4d
+        if (ptr < lim)
8d0e4d
+          {
8d0e4d
+            GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
8d0e4d
+            ptr += mblength;
8d0e4d
+          }
8d0e4d
+        while (ptr < lim && !ismbblank (ptr, lim - ptr, &mblength))
8d0e4d
+          ptr += mblength;
8d0e4d
+      }
8d0e4d
+
8d0e4d
+  if (key->skipsblanks)
8d0e4d
+    while (ptr < lim && ismbblank (ptr, lim - ptr, &mblength))
8d0e4d
+      ptr += mblength;
8d0e4d
+
8d0e4d
+  for (i = 0; i < schar; i++)
8d0e4d
+    {
8d0e4d
+      GET_BYTELEN_OF_CHAR (lim, ptr, mblength, state);
8d0e4d
+
8d0e4d
+      if (ptr + mblength > lim)
8d0e4d
+        break;
8d0e4d
+      else
8d0e4d
+        ptr += mblength;
8d0e4d
+    }
8d0e4d
+