From 109bb2f692c746bc63a0ade8737b584aecb0b1ad Mon Sep 17 00:00:00 2001 From: Carlos Garcia Campos Date: Thu, 27 Jun 2019 16:03:21 +0200 Subject: [PATCH] WebSockets: allow null characters in text messages data RFC 6455 says that text messages should contains valid UTF-8, and null characters valid according to RFC 3629. However, we are using g_utf8_validate(), which considers null characters as errors, to validate WebSockets text messages. This patch adds an internal utf8_validate() function based on g_utf8_validate() but allowing null characters and just returning a gboolean since we are always ignoring the end parameter in case of errors. soup_websocket_connection_send_text() assumes the given text is null terminated, so we need a new public function to allow sending text messages containing null characters. This patch adds soup_websocket_connection_send_message() that receives a SoupWebsocketDataType and GBytes, which is consistent with SoupWebsocketConnection::message signal. For RHEL backport, drop the addition of soup_websocket_connection_send_message() as we don't need it and don't want to expose new API. diff --git libsoup/soup-websocket-connection.c libsoup/soup-websocket-connection.c index 66bd6871..67a98731 100644 --- a/libsoup/soup-websocket-connection.c +++ b/libsoup/soup-websocket-connection.c @@ -155,6 +155,82 @@ static void protocol_error_and_close (SoupWebsocketConnection *self); +/* Code below is based on g_utf8_validate() implementation, + * but handling NULL characters as valid, as expected by + * WebSockets and compliant with RFC 3629. + */ +#define VALIDATE_BYTE(mask, expect) \ + G_STMT_START { \ + if (G_UNLIKELY((*(guchar *)p & (mask)) != (expect))) \ + return FALSE; \ + } G_STMT_END + +/* see IETF RFC 3629 Section 4 */ +static gboolean +utf8_validate (const char *str, + gsize max_len) + +{ + const gchar *p; + + for (p = str; ((p - str) < max_len); p++) { + if (*(guchar *)p < 128) + /* done */; + else { + if (*(guchar *)p < 0xe0) { /* 110xxxxx */ + if (G_UNLIKELY (max_len - (p - str) < 2)) + return FALSE; + + if (G_UNLIKELY (*(guchar *)p < 0xc2)) + return FALSE; + } else { + if (*(guchar *)p < 0xf0) { /* 1110xxxx */ + if (G_UNLIKELY (max_len - (p - str) < 3)) + return FALSE; + + switch (*(guchar *)p++ & 0x0f) { + case 0: + VALIDATE_BYTE(0xe0, 0xa0); /* 0xa0 ... 0xbf */ + break; + case 0x0d: + VALIDATE_BYTE(0xe0, 0x80); /* 0x80 ... 0x9f */ + break; + default: + VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */ + } + } else if (*(guchar *)p < 0xf5) { /* 11110xxx excluding out-of-range */ + if (G_UNLIKELY (max_len - (p - str) < 4)) + return FALSE; + + switch (*(guchar *)p++ & 0x07) { + case 0: + VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */ + if (G_UNLIKELY((*(guchar *)p & 0x30) == 0)) + return FALSE; + break; + case 4: + VALIDATE_BYTE(0xf0, 0x80); /* 0x80 ... 0x8f */ + break; + default: + VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */ + } + p++; + VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */ + } else { + return FALSE; + } + } + + p++; + VALIDATE_BYTE(0xc0, 0x80); /* 10xxxxxx */ + } + } + + return TRUE; +} + +#undef VALIDATE_BYTE + static void frame_free (gpointer data) { @@ -629,7 +705,7 @@ data += 2; len -= 2; - if (!g_utf8_validate ((char *)data, len, NULL)) { + if (!utf8_validate ((const char *)data, len)) { g_debug ("received non-UTF8 close data: %d '%.*s' %d", (int)len, (int)len, (char *)data, (int)data[0]); protocol_error_and_close (self); return; @@ -777,9 +853,8 @@ /* Actually deliver the message? */ if (fin) { if (pv->message_opcode == 0x01 && - !g_utf8_validate((char *)pv->message_data->data, - pv->message_data->len, - NULL)) { + !utf8_validate((const char *)pv->message_data->data, + pv->message_data->len)) { g_debug ("received invalid non-UTF8 text data"); @@ -1699,7 +1774,9 @@ * @self: the WebSocket * @text: the message contents * - * Send a text (UTF-8) message to the peer. + * Send a %NULL-terminated text (UTF-8) message to the peer. If you need + * to send text messages containing %NULL characters use + * soup_websocket_connection_send_message() instead. * * The message is queued to be sent and will be sent when the main loop * is run. @@ -1717,7 +1794,7 @@ g_return_if_fail (text != NULL); length = strlen (text); - g_return_if_fail (g_utf8_validate (text, length, NULL)); + g_return_if_fail (utf8_validate (text, length)); send_message (self, SOUP_WEBSOCKET_QUEUE_NORMAL, 0x01, (const guint8 *) text, length); } -- 2.26.2