Blame tmux-2.6-fix-utf8-char-handling.patch

Andreas Schneider 0cd513
From c03565611e41649ce9295012faec2f0eddb2a822 Mon Sep 17 00:00:00 2001
Andreas Schneider 0cd513
From: nicm <nicm>
Andreas Schneider 0cd513
Date: Fri, 12 Jan 2018 16:32:12 +0000
Andreas Schneider 0cd513
Subject: [PATCH] Simplify UTF-8 states down into one state.
Andreas Schneider 0cd513
Andreas Schneider 0cd513
---
Andreas Schneider 0cd513
 input.c | 120 ++++++++++++----------------------------------------------------
Andreas Schneider 0cd513
 1 file changed, 22 insertions(+), 98 deletions(-)
Andreas Schneider 0cd513
Andreas Schneider 0cd513
diff --git a/input.c b/input.c
Andreas Schneider 0cd513
index 42ff7f3b2..cce4df1f7 100644
Andreas Schneider 0cd513
--- a/input.c
Andreas Schneider 0cd513
+++ b/input.c
Andreas Schneider 0cd513
@@ -85,6 +85,7 @@ struct input_ctx {
Andreas Schneider 0cd513
 	u_int			param_list_len;
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
 	struct utf8_data	utf8data;
Andreas Schneider 0cd513
+	int			utf8started;
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
 	int			ch;
Andreas Schneider 0cd513
 	int			last;
Andreas Schneider 0cd513
@@ -146,9 +147,7 @@ static void	input_csi_dispatch_sgr_256(struct input_ctx *, int, u_int *);
Andreas Schneider 0cd513
 static void	input_csi_dispatch_sgr_rgb(struct input_ctx *, int, u_int *);
Andreas Schneider 0cd513
 static void	input_csi_dispatch_sgr(struct input_ctx *);
Andreas Schneider 0cd513
 static int	input_dcs_dispatch(struct input_ctx *);
Andreas Schneider 0cd513
-static int	input_utf8_open(struct input_ctx *);
Andreas Schneider 0cd513
-static int	input_utf8_add(struct input_ctx *);
Andreas Schneider 0cd513
-static int	input_utf8_close(struct input_ctx *);
Andreas Schneider 0cd513
+static int	input_top_bit_set(struct input_ctx *);
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
 /* Command table comparison function. */
Andreas Schneider 0cd513
 static int	input_table_compare(const void *, const void *);
Andreas Schneider 0cd513
@@ -314,9 +313,6 @@ static const struct input_transition input_state_osc_string_table[];
Andreas Schneider 0cd513
 static const struct input_transition input_state_apc_string_table[];
Andreas Schneider 0cd513
 static const struct input_transition input_state_rename_string_table[];
Andreas Schneider 0cd513
 static const struct input_transition input_state_consume_st_table[];
Andreas Schneider 0cd513
-static const struct input_transition input_state_utf8_three_table[];
Andreas Schneider 0cd513
-static const struct input_transition input_state_utf8_two_table[];
Andreas Schneider 0cd513
-static const struct input_transition input_state_utf8_one_table[];
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
 /* ground state definition. */
Andreas Schneider 0cd513
 static const struct input_state input_state_ground = {
Andreas Schneider 0cd513
@@ -437,27 +433,6 @@ static const struct input_state input_state_consume_st = {
Andreas Schneider 0cd513
 	input_state_consume_st_table
Andreas Schneider 0cd513
 };
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
-/* utf8_three state definition. */
Andreas Schneider 0cd513
-static const struct input_state input_state_utf8_three = {
Andreas Schneider 0cd513
-	"utf8_three",
Andreas Schneider 0cd513
-	NULL, NULL,
Andreas Schneider 0cd513
-	input_state_utf8_three_table
Andreas Schneider 0cd513
-};
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-/* utf8_two state definition. */
Andreas Schneider 0cd513
-static const struct input_state input_state_utf8_two = {
Andreas Schneider 0cd513
-	"utf8_two",
Andreas Schneider 0cd513
-	NULL, NULL,
Andreas Schneider 0cd513
-	input_state_utf8_two_table
Andreas Schneider 0cd513
-};
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-/* utf8_one state definition. */
Andreas Schneider 0cd513
-static const struct input_state input_state_utf8_one = {
Andreas Schneider 0cd513
-	"utf8_one",
Andreas Schneider 0cd513
-	NULL, NULL,
Andreas Schneider 0cd513
-	input_state_utf8_one_table
Andreas Schneider 0cd513
-};
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
 /* ground state table. */
Andreas Schneider 0cd513
 static const struct input_transition input_state_ground_table[] = {
Andreas Schneider 0cd513
 	INPUT_STATE_ANYWHERE,
Andreas Schneider 0cd513
@@ -467,11 +442,7 @@ static const struct input_transition input_state_ground_table[] = {
Andreas Schneider 0cd513
 	{ 0x1c, 0x1f, input_c0_dispatch, NULL },
Andreas Schneider 0cd513
 	{ 0x20, 0x7e, input_print,	 NULL },
Andreas Schneider 0cd513
 	{ 0x7f, 0x7f, NULL,		 NULL },
Andreas Schneider 0cd513
-	{ 0x80, 0xc1, NULL,		 NULL },
Andreas Schneider 0cd513
-	{ 0xc2, 0xdf, input_utf8_open,	 &input_state_utf8_one },
Andreas Schneider 0cd513
-	{ 0xe0, 0xef, input_utf8_open,	 &input_state_utf8_two },
Andreas Schneider 0cd513
-	{ 0xf0, 0xf4, input_utf8_open,	 &input_state_utf8_three },
Andreas Schneider 0cd513
-	{ 0xf5, 0xff, NULL,		 NULL },
Andreas Schneider 0cd513
+	{ 0x80, 0xff, input_top_bit_set, NULL },
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
 	{ -1, -1, NULL, NULL }
Andreas Schneider 0cd513
 };
Andreas Schneider 0cd513
@@ -717,39 +688,6 @@ static const struct input_transition input_state_consume_st_table[] = {
Andreas Schneider 0cd513
 	{ -1, -1, NULL, NULL }
Andreas Schneider 0cd513
 };
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
-/* utf8_three state table. */
Andreas Schneider 0cd513
-static const struct input_transition input_state_utf8_three_table[] = {
Andreas Schneider 0cd513
-	/* No INPUT_STATE_ANYWHERE */
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	{ 0x00, 0x7f, NULL,		&input_state_ground },
Andreas Schneider 0cd513
-	{ 0x80, 0xbf, input_utf8_add,	&input_state_utf8_two },
Andreas Schneider 0cd513
-	{ 0xc0, 0xff, NULL,		&input_state_ground },
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	{ -1, -1, NULL, NULL }
Andreas Schneider 0cd513
-};
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-/* utf8_two state table. */
Andreas Schneider 0cd513
-static const struct input_transition input_state_utf8_two_table[] = {
Andreas Schneider 0cd513
-	/* No INPUT_STATE_ANYWHERE */
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	{ 0x00, 0x7f, NULL,	      &input_state_ground },
Andreas Schneider 0cd513
-	{ 0x80, 0xbf, input_utf8_add, &input_state_utf8_one },
Andreas Schneider 0cd513
-	{ 0xc0, 0xff, NULL,	      &input_state_ground },
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	{ -1, -1, NULL, NULL }
Andreas Schneider 0cd513
-};
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-/* utf8_one state table. */
Andreas Schneider 0cd513
-static const struct input_transition input_state_utf8_one_table[] = {
Andreas Schneider 0cd513
-	/* No INPUT_STATE_ANYWHERE */
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	{ 0x00, 0x7f, NULL,		&input_state_ground },
Andreas Schneider 0cd513
-	{ 0x80, 0xbf, input_utf8_close, &input_state_ground },
Andreas Schneider 0cd513
-	{ 0xc0, 0xff, NULL,		&input_state_ground },
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	{ -1, -1, NULL, NULL }
Andreas Schneider 0cd513
-};
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
 /* Input table compare. */
Andreas Schneider 0cd513
 static int
Andreas Schneider 0cd513
 input_table_compare(const void *key, const void *value)
Andreas Schneider 0cd513
@@ -1059,6 +997,8 @@ input_print(struct input_ctx *ictx)
Andreas Schneider 0cd513
 {
Andreas Schneider 0cd513
 	int	set;
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
+	ictx->utf8started = 0; /* can't be valid UTF-8 */
Andreas Schneider 0cd513
+
Andreas Schneider 0cd513
 	set = ictx->cell.set == 0 ? ictx->cell.g0set : ictx->cell.g1set;
Andreas Schneider 0cd513
 	if (set == 1)
Andreas Schneider 0cd513
 		ictx->cell.cell.attr |= GRID_ATTR_CHARSET;
Andreas Schneider 0cd513
@@ -1132,6 +1072,8 @@ input_c0_dispatch(struct input_ctx *ictx)
Andreas Schneider 0cd513
 	struct window_pane	*wp = ictx->wp;
Andreas Schneider 0cd513
 	struct screen		*s = sctx->s;
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
+	ictx->utf8started = 0; /* can't be valid UTF-8 */
Andreas Schneider 0cd513
+
Andreas Schneider 0cd513
 	log_debug("%s: '%c'", __func__, ictx->ch);
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
 	switch (ictx->ch) {
Andreas Schneider 0cd513
@@ -2064,47 +2006,29 @@ input_exit_rename(struct input_ctx *ictx)
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
 /* Open UTF-8 character. */
Andreas Schneider 0cd513
 static int
Andreas Schneider 0cd513
-input_utf8_open(struct input_ctx *ictx)
Andreas Schneider 0cd513
+input_top_bit_set(struct input_ctx *ictx)
Andreas Schneider 0cd513
 {
Andreas Schneider 0cd513
 	struct utf8_data	*ud = &ictx->utf8data;
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
-	if (utf8_open(ud, ictx->ch) != UTF8_MORE)
Andreas Schneider 0cd513
-		fatalx("UTF-8 open invalid %#x", ictx->ch);
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	log_debug("%s %hhu", __func__, ud->size);
Andreas Schneider 0cd513
 	ictx->last = -1;
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
-	return (0);
Andreas Schneider 0cd513
-}
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-/* Append to UTF-8 character. */
Andreas Schneider 0cd513
-static int
Andreas Schneider 0cd513
-input_utf8_add(struct input_ctx *ictx)
Andreas Schneider 0cd513
-{
Andreas Schneider 0cd513
-	struct utf8_data	*ud = &ictx->utf8data;
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	if (utf8_append(ud, ictx->ch) != UTF8_MORE)
Andreas Schneider 0cd513
-		fatalx("UTF-8 add invalid %#x", ictx->ch);
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	log_debug("%s", __func__);
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-	return (0);
Andreas Schneider 0cd513
-}
Andreas Schneider 0cd513
-
Andreas Schneider 0cd513
-/* Close UTF-8 string. */
Andreas Schneider 0cd513
-static int
Andreas Schneider 0cd513
-input_utf8_close(struct input_ctx *ictx)
Andreas Schneider 0cd513
-{
Andreas Schneider 0cd513
-	struct utf8_data	*ud = &ictx->utf8data;
Andreas Schneider 0cd513
+	if (!ictx->utf8started) {
Andreas Schneider 0cd513
+		if (utf8_open(ud, ictx->ch) != UTF8_MORE)
Andreas Schneider 0cd513
+			return (0);
Andreas Schneider 0cd513
+		ictx->utf8started = 1;
Andreas Schneider 0cd513
+		return (0);
Andreas Schneider 0cd513
+	}
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
-	if (utf8_append(ud, ictx->ch) != UTF8_DONE) {
Andreas Schneider 0cd513
-		/*
Andreas Schneider 0cd513
-		 * An error here could be invalid UTF-8 or it could be a
Andreas Schneider 0cd513
-		 * nonprintable character for which we can't get the
Andreas Schneider 0cd513
-		 * width. Drop it.
Andreas Schneider 0cd513
-		 */
Andreas Schneider 0cd513
+	switch (utf8_append(ud, ictx->ch)) {
Andreas Schneider 0cd513
+	case UTF8_MORE:
Andreas Schneider 0cd513
+		return (0);
Andreas Schneider 0cd513
+	case UTF8_ERROR:
Andreas Schneider 0cd513
+		ictx->utf8started = 0;
Andreas Schneider 0cd513
 		return (0);
Andreas Schneider 0cd513
+	case UTF8_DONE:
Andreas Schneider 0cd513
+		break;
Andreas Schneider 0cd513
 	}
Andreas Schneider 0cd513
+	ictx->utf8started = 0;
Andreas Schneider 0cd513
 
Andreas Schneider 0cd513
 	log_debug("%s %hhu '%*s' (width %hhu)", __func__, ud->size,
Andreas Schneider 0cd513
 	    (int)ud->size, ud->data, ud->width);