Blame SOURCES/0002-WebSockets-allow-null-characters-in-text-messages-da.patch

0a2b47
From 109bb2f692c746bc63a0ade8737b584aecb0b1ad Mon Sep 17 00:00:00 2001
0a2b47
From: Carlos Garcia Campos <cgarcia@igalia.com>
0a2b47
Date: Thu, 27 Jun 2019 16:03:21 +0200
0a2b47
Subject: [PATCH] WebSockets: allow null characters in text messages data
0a2b47
0a2b47
RFC 6455 says that text messages should contains valid UTF-8, and null
0a2b47
characters valid according to RFC 3629. However, we are using
0a2b47
g_utf8_validate(), which considers null characters as errors, to
0a2b47
validate WebSockets text messages. This patch adds an internal
0a2b47
utf8_validate() function based on g_utf8_validate() but allowing null
0a2b47
characters and just returning a gboolean since we are always ignoring
0a2b47
the end parameter in case of errors.
0a2b47
soup_websocket_connection_send_text() assumes the given text is null
0a2b47
terminated, so we need a new public function to allow sending text
0a2b47
messages containing null characters. This patch adds
0a2b47
soup_websocket_connection_send_message() that receives a
0a2b47
SoupWebsocketDataType and GBytes, which is consistent with
0a2b47
SoupWebsocketConnection::message signal.
0a2b47
0a2b47
For RHEL backport, drop the addition of soup_websocket_connection_send_message()
0a2b47
as we don't need it and don't want to expose new API.
0a2b47
diff --git libsoup/soup-websocket-connection.c libsoup/soup-websocket-connection.c
0a2b47
index 66bd6871..67a98731 100644
0a2b47
--- a/libsoup/soup-websocket-connection.c
0a2b47
+++ b/libsoup/soup-websocket-connection.c
0a2b47
@@ -155,6 +155,82 @@
0a2b47
 
0a2b47
 static void protocol_error_and_close (SoupWebsocketConnection *self);
0a2b47
 
0a2b47
+/* Code below is based on g_utf8_validate() implementation,
0a2b47
+ * but handling NULL characters as valid, as expected by
0a2b47
+ * WebSockets and compliant with RFC 3629.
0a2b47
+ */
0a2b47
+#define VALIDATE_BYTE(mask, expect)                             \
0a2b47
+        G_STMT_START {                                          \
0a2b47
+          if (G_UNLIKELY((*(guchar *)p & (mask)) != (expect)))  \
0a2b47
+                  return FALSE;                                 \
0a2b47
+        } G_STMT_END
0a2b47
+
0a2b47
+/* see IETF RFC 3629 Section 4 */
0a2b47
+static gboolean
0a2b47
+utf8_validate (const char *str,
0a2b47
+               gsize max_len)
0a2b47
+
0a2b47
+{
0a2b47
+        const gchar *p;
0a2b47
+
0a2b47
+        for (p = str; ((p - str) < max_len); p++) {
0a2b47
+                if (*(guchar *)p < 128)
0a2b47
+                        /* done */;
0a2b47
+                else {
0a2b47
+                        if (*(guchar *)p < 0xe0) { /* 110xxxxx */
0a2b47
+                                if (G_UNLIKELY (max_len - (p - str) < 2))
0a2b47
+                                        return FALSE;
0a2b47
+
0a2b47
+                                if (G_UNLIKELY (*(guchar *)p < 0xc2))
0a2b47
+                                        return FALSE;
0a2b47
+                        } else {
0a2b47
+                                if (*(guchar *)p < 0xf0) { /* 1110xxxx */
0a2b47
+                                        if (G_UNLIKELY (max_len - (p - str) < 3))
0a2b47
+                                                return FALSE;
0a2b47
+
0a2b47
+                                        switch (*(guchar *)p++ & 0x0f) {
0a2b47
+                                        case 0:
0a2b47
+                                                VALIDATE_BYTE(0xe0, 0xa0); /* 0xa0 ... 0xbf */
0a2b47
+                                                break;
0a2b47
+                                        case 0x0d:
0a2b47
+                                                VALIDATE_BYTE(0xe0, 0x80); /* 0x80 ... 0x9f */
0a2b47
+                                                break;
0a2b47
+                                        default:
0a2b47
+                                                VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
0a2b47
+                                        }
0a2b47
+                                } else if (*(guchar *)p < 0xf5) { /* 11110xxx excluding out-of-range */
0a2b47
+                                        if (G_UNLIKELY (max_len - (p - str) < 4))
0a2b47
+                                                return FALSE;
0a2b47
+
0a2b47
+                                        switch (*(guchar *)p++ & 0x07) {
0a2b47
+                                        case 0:
0a2b47
+                                                VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
0a2b47
+                                                if (G_UNLIKELY((*(guchar *)p & 0x30) == 0))
0a2b47
+                                                        return FALSE;
0a2b47
+                                                break;
0a2b47
+                                        case 4:
0a2b47
+                                                VALIDATE_BYTE(0xf0, 0x80); /* 0x80 ... 0x8f */
0a2b47
+                                                break;
0a2b47
+                                        default:
0a2b47
+                                                VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
0a2b47
+                                        }
0a2b47
+                                        p++;
0a2b47
+                                        VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
0a2b47
+                                } else {
0a2b47
+                                        return FALSE;
0a2b47
+                                }
0a2b47
+                        }
0a2b47
+
0a2b47
+                        p++;
0a2b47
+                        VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */
0a2b47
+                }
0a2b47
+        }
0a2b47
+
0a2b47
+        return TRUE;
0a2b47
+}
0a2b47
+
0a2b47
+#undef VALIDATE_BYTE
0a2b47
+
0a2b47
 static void
0a2b47
 frame_free (gpointer data)
0a2b47
 {
0a2b47
@@ -629,7 +705,7 @@
0a2b47
 		data += 2;
0a2b47
 		len -= 2;
0a2b47
 		
0a2b47
-		if (!g_utf8_validate ((char *)data, len, NULL)) {
0a2b47
+		if (!utf8_validate ((const char *)data, len)) {
0a2b47
 			g_debug ("received non-UTF8 close data: %d '%.*s' %d", (int)len, (int)len, (char *)data, (int)data[0]);
0a2b47
 			protocol_error_and_close (self);
0a2b47
 			return;
0a2b47
@@ -777,9 +853,8 @@
0a2b47
 		/* Actually deliver the message? */
0a2b47
 		if (fin) {
0a2b47
 			if (pv->message_opcode == 0x01 &&
0a2b47
-			    !g_utf8_validate((char *)pv->message_data->data,
0a2b47
-			                     pv->message_data->len,
0a2b47
-			                     NULL)) {
0a2b47
+			    !utf8_validate((const char *)pv->message_data->data,
0a2b47
+					   pv->message_data->len)) {
0a2b47
 
0a2b47
 				g_debug ("received invalid non-UTF8 text data");
0a2b47
 
0a2b47
@@ -1699,7 +1774,9 @@
0a2b47
  * @self: the WebSocket
0a2b47
  * @text: the message contents
0a2b47
  *
0a2b47
- * Send a text (UTF-8) message to the peer.
0a2b47
+ * Send a %NULL-terminated text (UTF-8) message to the peer. If you need
0a2b47
+ * to send text messages containing %NULL characters use
0a2b47
+ * soup_websocket_connection_send_message() instead.
0a2b47
  *
0a2b47
  * The message is queued to be sent and will be sent when the main loop
0a2b47
  * is run.
0a2b47
@@ -1717,7 +1794,7 @@
0a2b47
 	g_return_if_fail (text != NULL);
0a2b47
 
0a2b47
 	length = strlen (text);
0a2b47
-	g_return_if_fail (g_utf8_validate (text, length, NULL));
0a2b47
+        g_return_if_fail (utf8_validate (text, length));
0a2b47
 
0a2b47
 	send_message (self, SOUP_WEBSOCKET_QUEUE_NORMAL, 0x01, (const guint8 *) text, length);
0a2b47
 }
0a2b47
-- 
0a2b47
2.26.2
0a2b47