From c48d12fc0abb8f113ed386c47bf02451ea8f853d Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Tue, 22 Nov 2022 15:55:07 +0100 Subject: [PATCH] boot: Add xstrn8_to_16 (cherry picked from commit 8ad7deffa95d33b5849ad6589dd52ab12e645edc) Related: #2138081 --- src/boot/efi/efi-string.c | 78 +++++++++++++++++++++++++++++++++- src/boot/efi/efi-string.h | 5 +++ src/boot/efi/test-efi-string.c | 27 ++++++++++++ 3 files changed, 109 insertions(+), 1 deletion(-) diff --git a/src/boot/efi/efi-string.c b/src/boot/efi/efi-string.c index b877c6f224..2ba15673c9 100644 --- a/src/boot/efi/efi-string.c +++ b/src/boot/efi/efi-string.c @@ -9,7 +9,8 @@ # include "util.h" #else # include -# include "macro.h" +# include "alloc-util.h" +# define xnew(t, n) ASSERT_SE_PTR(new(t, n)) # define xmalloc(n) ASSERT_SE_PTR(malloc(n)) #endif @@ -138,6 +139,81 @@ DEFINE_STRCHR(char16_t, strchr16); DEFINE_STRNDUP(char, xstrndup8, strnlen8); DEFINE_STRNDUP(char16_t, xstrndup16, strnlen16); +static unsigned utf8_to_unichar(const char *utf8, size_t n, char32_t *c) { + char32_t unichar; + unsigned len; + + assert(utf8); + assert(c); + + if (!(utf8[0] & 0x80)) { + *c = utf8[0]; + return 1; + } else if ((utf8[0] & 0xe0) == 0xc0) { + len = 2; + unichar = utf8[0] & 0x1f; + } else if ((utf8[0] & 0xf0) == 0xe0) { + len = 3; + unichar = utf8[0] & 0x0f; + } else if ((utf8[0] & 0xf8) == 0xf0) { + len = 4; + unichar = utf8[0] & 0x07; + } else if ((utf8[0] & 0xfc) == 0xf8) { + len = 5; + unichar = utf8[0] & 0x03; + } else if ((utf8[0] & 0xfe) == 0xfc) { + len = 6; + unichar = utf8[0] & 0x01; + } else { + *c = UINT32_MAX; + return 1; + } + + if (len > n) { + *c = UINT32_MAX; + return len; + } + + for (unsigned i = 1; i < len; i++) { + if ((utf8[i] & 0xc0) != 0x80) { + *c = UINT32_MAX; + return len; + } + unichar <<= 6; + unichar |= utf8[i] & 0x3f; + } + + *c = unichar; + return len; +} + +/* Convert UTF-8 to UCS-2, skipping any invalid or short byte sequences. */ +char16_t *xstrn8_to_16(const char *str8, size_t n) { + if (!str8 || n == 0) + return NULL; + + size_t i = 0; + char16_t *str16 = xnew(char16_t, n + 1); + + while (n > 0 && *str8 != '\0') { + char32_t unichar; + + size_t utf8len = utf8_to_unichar(str8, n, &unichar); + str8 += utf8len; + n = LESS_BY(n, utf8len); + + switch (unichar) { + case 0 ... 0xd7ffU: + case 0xe000U ... 0xffffU: + str16[i++] = unichar; + break; + } + } + + str16[i] = '\0'; + return str16; +} + static bool efi_fnmatch_prefix(const char16_t *p, const char16_t *h, const char16_t **ret_p, const char16_t **ret_h) { assert(p); assert(h); diff --git a/src/boot/efi/efi-string.h b/src/boot/efi/efi-string.h index 1ebd5fd6b7..9b2a9ad1c5 100644 --- a/src/boot/efi/efi-string.h +++ b/src/boot/efi/efi-string.h @@ -99,6 +99,11 @@ static inline char16_t *xstrdup16(const char16_t *s) { return xstrndup16(s, SIZE_MAX); } +char16_t *xstrn8_to_16(const char *str8, size_t n); +static inline char16_t *xstr8_to_16(const char *str8) { + return xstrn8_to_16(str8, strlen8(str8)); +} + bool efi_fnmatch(const char16_t *pattern, const char16_t *haystack); bool parse_number8(const char *s, uint64_t *ret_u, const char **ret_tail); diff --git a/src/boot/efi/test-efi-string.c b/src/boot/efi/test-efi-string.c index 2b2359fe5c..7b43e1d629 100644 --- a/src/boot/efi/test-efi-string.c +++ b/src/boot/efi/test-efi-string.c @@ -324,6 +324,33 @@ TEST(xstrdup16) { free(s); } +TEST(xstrn8_to_16) { + char16_t *s = NULL; + + assert_se(xstrn8_to_16(NULL, 1) == NULL); + assert_se(xstrn8_to_16("a", 0) == NULL); + + assert_se(s = xstrn8_to_16("", 1)); + assert_se(streq16(s, u"")); + free(s); + + assert_se(s = xstrn8_to_16("1", 1)); + assert_se(streq16(s, u"1")); + free(s); + + assert_se(s = xstr8_to_16("abcxyzABCXYZ09 .,-_#*!\"§$%&/()=?`~")); + assert_se(streq16(s, u"abcxyzABCXYZ09 .,-_#*!\"§$%&/()=?`~")); + free(s); + + assert_se(s = xstr8_to_16("ÿⱿ𝇉 😺")); + assert_se(streq16(s, u"ÿⱿ ")); + free(s); + + assert_se(s = xstrn8_to_16("¶¶", 3)); + assert_se(streq16(s, u"¶")); + free(s); +} + #define TEST_FNMATCH_ONE(pattern, haystack, expect) \ ({ \ assert_se(fnmatch(pattern, haystack, 0) == (expect ? 0 : FNM_NOMATCH)); \