Blame SOURCES/coreutils-i18n-cut-old.patch

8d0e4d
diff --git a/src/cut.c b/src/cut.c
8d0e4d
index 7ab6be4..022d0ad 100644
8d0e4d
--- a/src/cut.c
8d0e4d
+++ b/src/cut.c
8d0e4d
@@ -28,6 +28,11 @@
8d0e4d
 #include <assert.h>
8d0e4d
 #include <getopt.h>
8d0e4d
 #include <sys/types.h>
8d0e4d
+
8d0e4d
+/* Get mbstate_t, mbrtowc().  */
8d0e4d
+#if HAVE_WCHAR_H
8d0e4d
+# include <wchar.h>
8d0e4d
+#endif
8d0e4d
 #include "system.h"
8d0e4d
 
8d0e4d
 #include "error.h"
8d0e4d
@@ -38,6 +43,18 @@
8d0e4d
 
8d0e4d
 #include "set-fields.h"
8d0e4d
 
8d0e4d
+/* MB_LEN_MAX is incorrectly defined to be 1 in at least one GCC
8d0e4d
+   installation; work around this configuration error.        */
8d0e4d
+#if !defined MB_LEN_MAX || MB_LEN_MAX < 2
8d0e4d
+# undef MB_LEN_MAX
8d0e4d
+# define MB_LEN_MAX 16
8d0e4d
+#endif
8d0e4d
+
8d0e4d
+/* Some systems, like BeOS, have multibyte encodings but lack mbstate_t.  */
8d0e4d
+#if HAVE_MBRTOWC && defined mbstate_t
8d0e4d
+# define mbrtowc(pwc, s, n, ps) (mbrtowc) (pwc, s, n, 0)
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 /* The official name of this program (e.g., no 'g' prefix).  */
8d0e4d
 #define PROGRAM_NAME "cut"
8d0e4d
 
8d0e4d
@@ -54,6 +71,52 @@
8d0e4d
     }									\
8d0e4d
   while (0)
8d0e4d
 
8d0e4d
+/* Refill the buffer BUF to get a multibyte character. */
8d0e4d
+#define REFILL_BUFFER(BUF, BUFPOS, BUFLEN, STREAM)                        \
8d0e4d
+  do                                                                        \
8d0e4d
+    {                                                                        \
8d0e4d
+      if (BUFLEN < MB_LEN_MAX && !feof (STREAM) && !ferror (STREAM))        \
8d0e4d
+        {                                                                \
8d0e4d
+          memmove (BUF, BUFPOS, BUFLEN);                                \
8d0e4d
+          BUFLEN += fread (BUF + BUFLEN, sizeof(char), BUFSIZ, STREAM); \
8d0e4d
+          BUFPOS = BUF;                                                        \
8d0e4d
+        }                                                                \
8d0e4d
+    }                                                                        \
8d0e4d
+  while (0)
8d0e4d
+
8d0e4d
+/* Get wide character on BUFPOS. BUFPOS is not included after that.
8d0e4d
+   If byte sequence is not valid as a character, CONVFAIL is true. Otherwise false. */
8d0e4d
+#define GET_NEXT_WC_FROM_BUFFER(WC, BUFPOS, BUFLEN, MBLENGTH, STATE, CONVFAIL) \
8d0e4d
+  do                                                                        \
8d0e4d
+    {                                                                        \
8d0e4d
+      mbstate_t state_bak;                                                \
8d0e4d
+                                                                        \
8d0e4d
+      if (BUFLEN < 1)                                                        \
8d0e4d
+        {                                                                \
8d0e4d
+          WC = WEOF;                                                        \
8d0e4d
+          break;                                                        \
8d0e4d
+        }                                                                \
8d0e4d
+                                                                        \
8d0e4d
+      /* Get a wide character. */                                        \
8d0e4d
+      CONVFAIL = false;                                                        \
8d0e4d
+      state_bak = STATE;                                                \
8d0e4d
+      MBLENGTH = mbrtowc ((wchar_t *)&WC, BUFPOS, BUFLEN, &STATE);        \
8d0e4d
+                                                                        \
8d0e4d
+      switch (MBLENGTH)                                                        \
8d0e4d
+        {                                                                \
8d0e4d
+        case (size_t)-1:                                                \
8d0e4d
+        case (size_t)-2:                                                \
8d0e4d
+          CONVFAIL = true;                                                        \
8d0e4d
+          STATE = state_bak;                                                \
8d0e4d
+          /* Fall througn. */                                                \
8d0e4d
+                                                                        \
8d0e4d
+        case 0:                                                                \
8d0e4d
+          MBLENGTH = 1;                                                        \
8d0e4d
+          break;                                                        \
8d0e4d
+        }                                                                \
8d0e4d
+    }                                                                        \
8d0e4d
+  while (0)
8d0e4d
+
8d0e4d
 
8d0e4d
 /* Pointer inside RP.  When checking if a byte or field is selected
8d0e4d
    by a finite range, we check if it is between CURRENT_RP.LO
8d0e4d
@@ -61,6 +124,9 @@
8d0e4d
    CURRENT_RP.HI then we make CURRENT_RP to point to the next range pair. */
8d0e4d
 static struct field_range_pair *current_rp;
8d0e4d
 
8d0e4d
+/* Length of the delimiter given as argument to -d.  */
8d0e4d
+size_t delimlen;
8d0e4d
+
8d0e4d
 /* This buffer is used to support the semantics of the -s option
8d0e4d
    (or lack of same) when the specified field list includes (does
8d0e4d
    not include) the first field.  In both of those cases, the entire
8d0e4d
@@ -77,15 +143,25 @@ enum operating_mode
8d0e4d
   {
8d0e4d
     undefined_mode,
8d0e4d
 
8d0e4d
-    /* Output characters that are in the given bytes. */
8d0e4d
+    /* Output bytes that are at the given positions. */
8d0e4d
     byte_mode,
8d0e4d
 
8d0e4d
+    /* Output characters that are at the given positions. */
8d0e4d
+    character_mode,
8d0e4d
+
8d0e4d
     /* Output the given delimiter-separated fields. */
8d0e4d
     field_mode
8d0e4d
   };
8d0e4d
 
8d0e4d
 static enum operating_mode operating_mode;
8d0e4d
 
8d0e4d
+/* If nonzero, when in byte mode, don't split multibyte characters.  */
8d0e4d
+static int byte_mode_character_aware;
8d0e4d
+
8d0e4d
+/* If nonzero, the function for single byte locale is work
8d0e4d
+   if this program runs on multibyte locale. */
8d0e4d
+static int force_singlebyte_mode;
8d0e4d
+
8d0e4d
 /* If true do not output lines containing no delimiter characters.
8d0e4d
    Otherwise, all such lines are printed.  This option is valid only
8d0e4d
    with field mode.  */
8d0e4d
@@ -97,6 +173,9 @@ static bool complement;
8d0e4d
 
8d0e4d
 /* The delimiter character for field mode. */
8d0e4d
 static unsigned char delim;
8d0e4d
+#if HAVE_WCHAR_H
8d0e4d
+static wchar_t wcdelim;
8d0e4d
+#endif
8d0e4d
 
8d0e4d
 /* The delimiter for each line/record. */
8d0e4d
 static unsigned char line_delim = '\n';
8d0e4d
@@ -164,7 +243,7 @@ Print selected parts of lines from each FILE to standard output.\n\
8d0e4d
   -f, --fields=LIST       select only these fields;  also print any line\n\
8d0e4d
                             that contains no delimiter character, unless\n\
8d0e4d
                             the -s option is specified\n\
8d0e4d
-  -n                      (ignored)\n\
8d0e4d
+  -n                      with -b: don't split multibyte characters\n\
8d0e4d
 "), stdout);
8d0e4d
       fputs (_("\
8d0e4d
       --complement        complement the set of selected bytes, characters\n\
8d0e4d
@@ -280,6 +359,82 @@ cut_bytes (FILE *stream)
8d0e4d
     }
8d0e4d
 }
8d0e4d
 
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+/* This function is in use for the following case.
8d0e4d
+
8d0e4d
+   1. Read from the stream STREAM, printing to standard output any selected
8d0e4d
+   characters.
8d0e4d
+
8d0e4d
+   2. Read from stream STREAM, printing to standard output any selected bytes,
8d0e4d
+   without splitting multibyte characters.  */
8d0e4d
+
8d0e4d
+static void
8d0e4d
+cut_characters_or_cut_bytes_no_split (FILE *stream)
8d0e4d
+{
8d0e4d
+  uintmax_t idx;             /* number of bytes or characters in the line so far. */
8d0e4d
+  char buf[MB_LEN_MAX + BUFSIZ];  /* For spooling a read byte sequence. */
8d0e4d
+  char *bufpos;                /* Next read position of BUF. */
8d0e4d
+  size_t buflen;        /* The length of the byte sequence in buf. */
8d0e4d
+  wint_t wc;                /* A gotten wide character. */
8d0e4d
+  size_t mblength;        /* The byte size of a multibyte character which shows
8d0e4d
+                           as same character as WC. */
8d0e4d
+  mbstate_t state;        /* State of the stream. */
8d0e4d
+  bool convfail = false;  /* true, when conversion failed. Otherwise false. */
8d0e4d
+  /* Whether to begin printing delimiters between ranges for the current line.
8d0e4d
+     Set after we've begun printing data corresponding to the first range.  */
8d0e4d
+  bool print_delimiter = false;
8d0e4d
+
8d0e4d
+  idx = 0;
8d0e4d
+  buflen = 0;
8d0e4d
+  bufpos = buf;
8d0e4d
+  memset (&state, '\0', sizeof(mbstate_t));
8d0e4d
+
8d0e4d
+  current_rp = frp;
8d0e4d
+
8d0e4d
+  while (1)
8d0e4d
+    {
8d0e4d
+      REFILL_BUFFER (buf, bufpos, buflen, stream);
8d0e4d
+
8d0e4d
+      GET_NEXT_WC_FROM_BUFFER (wc, bufpos, buflen, mblength, state, convfail);
8d0e4d
+      (void) convfail;  /* ignore unused */
8d0e4d
+
8d0e4d
+      if (wc == WEOF)
8d0e4d
+        {
8d0e4d
+          if (idx > 0)
8d0e4d
+            putchar (line_delim);
8d0e4d
+          break;
8d0e4d
+        }
8d0e4d
+      else if (wc == line_delim)
8d0e4d
+        {
8d0e4d
+          putchar (line_delim);
8d0e4d
+          idx = 0;
8d0e4d
+          print_delimiter = false;
8d0e4d
+          current_rp = frp;
8d0e4d
+        }
8d0e4d
+      else
8d0e4d
+        {
8d0e4d
+          next_item (&idx);
8d0e4d
+          if (print_kth (idx))
8d0e4d
+            {
8d0e4d
+              if (output_delimiter_specified)
8d0e4d
+                {
8d0e4d
+                  if (print_delimiter && is_range_start_index (idx))
8d0e4d
+                    {
8d0e4d
+                      fwrite (output_delimiter_string, sizeof (char),
8d0e4d
+                              output_delimiter_length, stdout);
8d0e4d
+                    }
8d0e4d
+                  print_delimiter = true;
8d0e4d
+                }
8d0e4d
+              fwrite (bufpos, mblength, sizeof(char), stdout);
8d0e4d
+            }
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      buflen -= mblength;
8d0e4d
+      bufpos += mblength;
8d0e4d
+    }
8d0e4d
+}
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 /* Read from stream STREAM, printing to standard output any selected fields.  */
8d0e4d
 
8d0e4d
 static void
8d0e4d
@@ -425,13 +580,211 @@ cut_fields (FILE *stream)
8d0e4d
     }
8d0e4d
 }
8d0e4d
 
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+static void
8d0e4d
+cut_fields_mb (FILE *stream)
8d0e4d
+{
8d0e4d
+  int c;
8d0e4d
+  uintmax_t field_idx;
8d0e4d
+  int found_any_selected_field;
8d0e4d
+  int buffer_first_field;
8d0e4d
+  int empty_input;
8d0e4d
+  char buf[MB_LEN_MAX + BUFSIZ];  /* For spooling a read byte sequence. */
8d0e4d
+  char *bufpos;                /* Next read position of BUF. */
8d0e4d
+  size_t buflen;        /* The length of the byte sequence in buf. */
8d0e4d
+  wint_t wc = 0;        /* A gotten wide character. */
8d0e4d
+  size_t mblength;        /* The byte size of a multibyte character which shows
8d0e4d
+                           as same character as WC. */
8d0e4d
+  mbstate_t state;        /* State of the stream. */
8d0e4d
+  bool convfail = false;  /* true, when conversion failed. Otherwise false. */
8d0e4d
+
8d0e4d
+  current_rp = frp;
8d0e4d
+
8d0e4d
+  found_any_selected_field = 0;
8d0e4d
+  field_idx = 1;
8d0e4d
+  bufpos = buf;
8d0e4d
+  buflen = 0;
8d0e4d
+  memset (&state, '\0', sizeof(mbstate_t));
8d0e4d
+
8d0e4d
+  c = getc (stream);
8d0e4d
+  empty_input = (c == EOF);
8d0e4d
+  if (c != EOF)
8d0e4d
+  {
8d0e4d
+    ungetc (c, stream);
8d0e4d
+    wc = 0;
8d0e4d
+  }
8d0e4d
+  else
8d0e4d
+    wc = WEOF;
8d0e4d
+
8d0e4d
+  /* To support the semantics of the -s flag, we may have to buffer
8d0e4d
+     all of the first field to determine whether it is `delimited.'
8d0e4d
+     But that is unnecessary if all non-delimited lines must be printed
8d0e4d
+     and the first field has been selected, or if non-delimited lines
8d0e4d
+     must be suppressed and the first field has *not* been selected.
8d0e4d
+     That is because a non-delimited line has exactly one field.  */
8d0e4d
+  buffer_first_field = (suppress_non_delimited ^ !print_kth (1));
8d0e4d
+
8d0e4d
+  while (1)
8d0e4d
+    {
8d0e4d
+      if (field_idx == 1 && buffer_first_field)
8d0e4d
+        {
8d0e4d
+          int len = 0;
8d0e4d
+
8d0e4d
+          while (1)
8d0e4d
+            {
8d0e4d
+              REFILL_BUFFER (buf, bufpos, buflen, stream);
8d0e4d
+
8d0e4d
+              GET_NEXT_WC_FROM_BUFFER
8d0e4d
+                (wc, bufpos, buflen, mblength, state, convfail);
8d0e4d
+
8d0e4d
+              if (wc == WEOF)
8d0e4d
+                break;
8d0e4d
+
8d0e4d
+              field_1_buffer = xrealloc (field_1_buffer, len + mblength);
8d0e4d
+              memcpy (field_1_buffer + len, bufpos, mblength);
8d0e4d
+              len += mblength;
8d0e4d
+              buflen -= mblength;
8d0e4d
+              bufpos += mblength;
8d0e4d
+
8d0e4d
+              if (!convfail && (wc == line_delim || wc == wcdelim))
8d0e4d
+                break;
8d0e4d
+            }
8d0e4d
+
8d0e4d
+          if (len <= 0 && wc == WEOF)
8d0e4d
+            break;
8d0e4d
+
8d0e4d
+          /* If the first field extends to the end of line (it is not
8d0e4d
+             delimited) and we are printing all non-delimited lines,
8d0e4d
+             print this one.  */
8d0e4d
+          if (convfail || (!convfail && wc != wcdelim))
8d0e4d
+            {
8d0e4d
+              if (suppress_non_delimited)
8d0e4d
+                {
8d0e4d
+                  /* Empty.        */
8d0e4d
+                }
8d0e4d
+              else
8d0e4d
+                {
8d0e4d
+                  fwrite (field_1_buffer, sizeof (char), len, stdout);
8d0e4d
+                  /* Make sure the output line is newline terminated.  */
8d0e4d
+                  if (convfail || (!convfail && wc != line_delim))
8d0e4d
+                    putchar (line_delim);
8d0e4d
+                }
8d0e4d
+              continue;
8d0e4d
+            }
8d0e4d
+
8d0e4d
+          if (print_kth (1))
8d0e4d
+            {
8d0e4d
+              /* Print the field, but not the trailing delimiter.  */
8d0e4d
+              fwrite (field_1_buffer, sizeof (char), len - 1, stdout);
8d0e4d
+              found_any_selected_field = 1;
8d0e4d
+            }
8d0e4d
+          next_item (&field_idx);
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      if (wc != WEOF)
8d0e4d
+        {
8d0e4d
+          if (print_kth (field_idx))
8d0e4d
+            {
8d0e4d
+              if (found_any_selected_field)
8d0e4d
+                {
8d0e4d
+                  fwrite (output_delimiter_string, sizeof (char),
8d0e4d
+                          output_delimiter_length, stdout);
8d0e4d
+                }
8d0e4d
+              found_any_selected_field = 1;
8d0e4d
+            }
8d0e4d
+
8d0e4d
+          while (1)
8d0e4d
+            {
8d0e4d
+              REFILL_BUFFER (buf, bufpos, buflen, stream);
8d0e4d
+
8d0e4d
+              GET_NEXT_WC_FROM_BUFFER
8d0e4d
+                (wc, bufpos, buflen, mblength, state, convfail);
8d0e4d
+
8d0e4d
+              if (wc == WEOF)
8d0e4d
+                break;
8d0e4d
+              else if (!convfail && (wc == wcdelim || wc == line_delim))
8d0e4d
+                {
8d0e4d
+                  buflen -= mblength;
8d0e4d
+                  bufpos += mblength;
8d0e4d
+                  break;
8d0e4d
+                }
8d0e4d
+
8d0e4d
+              if (print_kth (field_idx))
8d0e4d
+                fwrite (bufpos, mblength, sizeof(char), stdout);
8d0e4d
+
8d0e4d
+              buflen -= mblength;
8d0e4d
+              bufpos += mblength;
8d0e4d
+            }
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      if ((!convfail || wc == line_delim) && buflen < 1)
8d0e4d
+        wc = WEOF;
8d0e4d
+
8d0e4d
+      if (!convfail && wc == wcdelim)
8d0e4d
+        next_item (&field_idx);
8d0e4d
+      else if (wc == WEOF || (!convfail && wc == line_delim))
8d0e4d
+        {
8d0e4d
+          if (found_any_selected_field
8d0e4d
+              || (!empty_input && !(suppress_non_delimited && field_idx == 1)))
8d0e4d
+            putchar (line_delim);
8d0e4d
+          if (wc == WEOF)
8d0e4d
+            break;
8d0e4d
+          field_idx = 1;
8d0e4d
+          current_rp = frp;
8d0e4d
+          found_any_selected_field = 0;
8d0e4d
+        }
8d0e4d
+    }
8d0e4d
+}
8d0e4d
+#endif
8d0e4d
+
8d0e4d
 static void
8d0e4d
 cut_stream (FILE *stream)
8d0e4d
 {
8d0e4d
-  if (operating_mode == byte_mode)
8d0e4d
-    cut_bytes (stream);
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+  if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
8d0e4d
+    {
8d0e4d
+      switch (operating_mode)
8d0e4d
+        {
8d0e4d
+        case byte_mode:
8d0e4d
+          if (byte_mode_character_aware)
8d0e4d
+            cut_characters_or_cut_bytes_no_split (stream);
8d0e4d
+          else
8d0e4d
+            cut_bytes (stream);
8d0e4d
+          break;
8d0e4d
+
8d0e4d
+        case character_mode:
8d0e4d
+          cut_characters_or_cut_bytes_no_split (stream);
8d0e4d
+          break;
8d0e4d
+
8d0e4d
+        case field_mode:
8d0e4d
+          if (delimlen == 1)
8d0e4d
+            {
8d0e4d
+              /* Check if we have utf8 multibyte locale, so we can use this
8d0e4d
+                 optimization because of uniqueness of characters, which is
8d0e4d
+                 not true for e.g. SJIS */
8d0e4d
+              char * loc = setlocale(LC_CTYPE, NULL);
8d0e4d
+              if (loc && (strstr (loc, "UTF-8") || strstr (loc, "utf-8") ||
8d0e4d
+                  strstr (loc, "UTF8") || strstr (loc, "utf8")))
8d0e4d
+                {
8d0e4d
+                  cut_fields (stream);
8d0e4d
+                  break;
8d0e4d
+                }
8d0e4d
+            }
8d0e4d
+          cut_fields_mb (stream);
8d0e4d
+          break;
8d0e4d
+
8d0e4d
+        default:
8d0e4d
+          abort ();
8d0e4d
+        }
8d0e4d
+    }
8d0e4d
   else
8d0e4d
-    cut_fields (stream);
8d0e4d
+#endif
8d0e4d
+    {
8d0e4d
+      if (operating_mode == field_mode)
8d0e4d
+        cut_fields (stream);
8d0e4d
+      else
8d0e4d
+        cut_bytes (stream);
8d0e4d
+    }
8d0e4d
 }
8d0e4d
 
8d0e4d
 /* Process file FILE to standard output.
8d0e4d
@@ -483,6 +836,7 @@ main (int argc, char **argv)
8d0e4d
   bool ok;
8d0e4d
   bool delim_specified = false;
8d0e4d
   char *spec_list_string IF_LINT ( = NULL);
8d0e4d
+  char mbdelim[MB_LEN_MAX + 1];
8d0e4d
 
8d0e4d
   initialize_main (&argc, &argv);
8d0e4d
   set_program_name (argv[0]);
8d0e4d
@@ -505,7 +859,6 @@ main (int argc, char **argv)
8d0e4d
       switch (optc)
8d0e4d
         {
8d0e4d
         case 'b':
8d0e4d
-        case 'c':
8d0e4d
           /* Build the byte list. */
8d0e4d
           if (operating_mode != undefined_mode)
8d0e4d
             FATAL_ERROR (_("only one type of list may be specified"));
8d0e4d
@@ -513,6 +866,14 @@ main (int argc, char **argv)
8d0e4d
           spec_list_string = optarg;
8d0e4d
           break;
8d0e4d
 
8d0e4d
+        case 'c':
8d0e4d
+          /* Build the character list. */
8d0e4d
+          if (operating_mode != undefined_mode)
8d0e4d
+            FATAL_ERROR (_("only one type of list may be specified"));
8d0e4d
+          operating_mode = character_mode;
8d0e4d
+          spec_list_string = optarg;
8d0e4d
+          break;
8d0e4d
+
8d0e4d
         case 'f':
8d0e4d
           /* Build the field list. */
8d0e4d
           if (operating_mode != undefined_mode)
8d0e4d
@@ -524,10 +885,38 @@ main (int argc, char **argv)
8d0e4d
         case 'd':
8d0e4d
           /* New delimiter. */
8d0e4d
           /* Interpret -d '' to mean 'use the NUL byte as the delimiter.'  */
8d0e4d
-          if (optarg[0] != '\0' && optarg[1] != '\0')
8d0e4d
-            FATAL_ERROR (_("the delimiter must be a single character"));
8d0e4d
-          delim = optarg[0];
8d0e4d
-          delim_specified = true;
8d0e4d
+            {
8d0e4d
+#if HAVE_MBRTOWC
8d0e4d
+              if(MB_CUR_MAX > 1)
8d0e4d
+                {
8d0e4d
+                  mbstate_t state;
8d0e4d
+
8d0e4d
+                  memset (&state, '\0', sizeof(mbstate_t));
8d0e4d
+                  delimlen = mbrtowc (&wcdelim, optarg, strnlen(optarg, MB_LEN_MAX), &state);
8d0e4d
+
8d0e4d
+                  if (delimlen == (size_t)-1 || delimlen == (size_t)-2)
8d0e4d
+                    ++force_singlebyte_mode;
8d0e4d
+                  else
8d0e4d
+                    {
8d0e4d
+                      delimlen = (delimlen < 1) ? 1 : delimlen;
8d0e4d
+                      if (wcdelim != L'\0' && *(optarg + delimlen) != '\0')
8d0e4d
+                        FATAL_ERROR (_("the delimiter must be a single character"));
8d0e4d
+                      memcpy (mbdelim, optarg, delimlen);
8d0e4d
+                      mbdelim[delimlen] = '\0';
8d0e4d
+                      if (delimlen == 1)
8d0e4d
+                        delim = *optarg;
8d0e4d
+                    }
8d0e4d
+                }
8d0e4d
+
8d0e4d
+              if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
8d0e4d
+#endif
8d0e4d
+                {
8d0e4d
+                  if (optarg[0] != '\0' && optarg[1] != '\0')
8d0e4d
+                    FATAL_ERROR (_("the delimiter must be a single character"));
8d0e4d
+                  delim = (unsigned char) optarg[0];
8d0e4d
+                }
8d0e4d
+            delim_specified = true;
8d0e4d
+          }
8d0e4d
           break;
8d0e4d
 
8d0e4d
         case OUTPUT_DELIMITER_OPTION:
8d0e4d
@@ -540,6 +929,7 @@ main (int argc, char **argv)
8d0e4d
           break;
8d0e4d
 
8d0e4d
         case 'n':
8d0e4d
+          byte_mode_character_aware = 1;
8d0e4d
           break;
8d0e4d
 
8d0e4d
         case 's':
8d0e4d
@@ -579,15 +969,34 @@ main (int argc, char **argv)
8d0e4d
               | (complement ? SETFLD_COMPLEMENT : 0) );
8d0e4d
 
8d0e4d
   if (!delim_specified)
8d0e4d
-    delim = '\t';
8d0e4d
+    {
8d0e4d
+      delim = '\t';
8d0e4d
+#ifdef HAVE_MBRTOWC
8d0e4d
+      wcdelim = L'\t';
8d0e4d
+      mbdelim[0] = '\t';
8d0e4d
+      mbdelim[1] = '\0';
8d0e4d
+      delimlen = 1;
8d0e4d
+#endif
8d0e4d
+    }
8d0e4d
 
8d0e4d
   if (output_delimiter_string == NULL)
8d0e4d
     {
8d0e4d
-      static char dummy[2];
8d0e4d
-      dummy[0] = delim;
8d0e4d
-      dummy[1] = '\0';
8d0e4d
-      output_delimiter_string = dummy;
8d0e4d
-      output_delimiter_length = 1;
8d0e4d
+#ifdef HAVE_MBRTOWC
8d0e4d
+      if (MB_CUR_MAX > 1 && !force_singlebyte_mode)
8d0e4d
+        {
8d0e4d
+          output_delimiter_string = xstrdup(mbdelim);
8d0e4d
+          output_delimiter_length = delimlen;
8d0e4d
+        }
8d0e4d
+
8d0e4d
+      if (MB_CUR_MAX <= 1 || force_singlebyte_mode)
8d0e4d
+#endif
8d0e4d
+        {
8d0e4d
+          static char dummy[2];
8d0e4d
+          dummy[0] = delim;
8d0e4d
+          dummy[1] = '\0';
8d0e4d
+          output_delimiter_string = dummy;
8d0e4d
+          output_delimiter_length = 1;
8d0e4d
+        }
8d0e4d
     }
8d0e4d
 
8d0e4d
   if (optind == argc)