diff --git a/SOURCES/gcc8-rh2001788.patch b/SOURCES/gcc8-rh2001788.patch new file mode 100644 index 0000000..15f20be --- /dev/null +++ b/SOURCES/gcc8-rh2001788.patch @@ -0,0 +1,456 @@ +commit ee3db7c8f844556d35a66b3732bad9f44a086491 +Author: Jonathan Wakely +Date: Mon Sep 27 20:44:24 2021 +0100 + + libstdc++: Fix handling of invalid ranges in std::regex [PR102447] + + std::regex currently allows invalid bracket ranges such as [\w-a] which + are only allowed by ECMAScript when in web browser compatibility mode. + It should be an error, because the start of the range is a character + class, not a single character. The current implementation of + _Compiler::_M_expression_term does not provide a way to reject this, + because we only remember a previous character, not whether we just + processed a character class (or collating symbol etc.) + + This patch replaces the pair used to emulate + optional with a custom class closer to pair. That + allows us to track three states, so that we can tell when we've just + seen a character class. + + With this additional state the code in _M_expression_term for processing + the _S_token_bracket_dash can be improved to correctly reject the [\w-a] + case, without regressing for valid cases such as [\w-] and [----]. + + libstdc++-v3/ChangeLog: + + PR libstdc++/102447 + * include/bits/regex_compiler.h (_Compiler::_BracketState): New + class. + (_Compiler::_BrackeyMatcher): New alias template. + (_Compiler::_M_expression_term): Change pair + parameter to _BracketState. Process first character for + ECMAScript syntax as well as POSIX. + * include/bits/regex_compiler.tcc + (_Compiler::_M_insert_bracket_matcher): Pass _BracketState. + (_Compiler::_M_expression_term): Use _BracketState to store + state between calls. Improve handling of dashes in ranges. + * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc: + Add more tests for ranges containing dashes. Check invalid + ranges with character class at the beginning. + + (cherry picked from commit 7ce3c230edf6e498e125c805a6dd313bf87dc439) + +diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h +index 7e5c2073554..2eb1c3f7863 100644 +--- a/libstdc++-v3/include/bits/regex_compiler.h ++++ b/libstdc++-v3/include/bits/regex_compiler.h +@@ -122,13 +122,45 @@ namespace __detail + void + _M_insert_bracket_matcher(bool __neg); + +- // Returns true if successfully matched one term and should continue. ++ // Cache of the last atom seen in a bracketed range expression. ++ struct _BracketState ++ { ++ enum class _Type : char { _None, _Char, _Class } _M_type = _Type::_None; ++ _CharT _M_char; ++ ++ void ++ set(_CharT __c) noexcept { _M_type = _Type::_Char; _M_char = __c; } ++ ++ _GLIBCXX_NODISCARD _CharT ++ get() const noexcept { return _M_char; } ++ ++ void ++ reset(_Type __t = _Type::_None) noexcept { _M_type = __t; } ++ ++ explicit operator bool() const noexcept ++ { return _M_type != _Type::_None; } ++ ++ // Previous token was a single character. ++ _GLIBCXX_NODISCARD bool ++ _M_is_char() const noexcept { return _M_type == _Type::_Char; } ++ ++ // Previous token was a character class, equivalent class, ++ // collating symbol etc. ++ _GLIBCXX_NODISCARD bool ++ _M_is_class() const noexcept { return _M_type == _Type::_Class; } ++ }; ++ ++ template ++ using _BracketMatcher ++ = std::__detail::_BracketMatcher<_TraitsT, __icase, __collate>; ++ ++ // Returns true if successfully parsed one term and should continue ++ // compiling a bracket expression. + // Returns false if the compiler should move on. + template + bool +- _M_expression_term(pair& __last_char, +- _BracketMatcher<_TraitsT, __icase, __collate>& +- __matcher); ++ _M_expression_term(_BracketState& __last_char, ++ _BracketMatcher<__icase, __collate>& __matcher); + + int + _M_cur_int_value(int __radix); +diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc +index b1169428afb..5877d30ba52 100644 +--- a/libstdc++-v3/include/bits/regex_compiler.tcc ++++ b/libstdc++-v3/include/bits/regex_compiler.tcc +@@ -140,7 +140,8 @@ namespace __detail + return true; + if (this->_M_atom()) + { +- while (this->_M_quantifier()); ++ while (this->_M_quantifier()) ++ ; + return true; + } + return false; +@@ -410,7 +411,7 @@ namespace __detail + _M_insert_character_class_matcher() + { + __glibcxx_assert(_M_value.size() == 1); +- _BracketMatcher<_TraitsT, __icase, __collate> __matcher ++ _BracketMatcher<__icase, __collate> __matcher + (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); + __matcher._M_add_character_class(_M_value, false); + __matcher._M_ready(); +@@ -424,25 +425,17 @@ namespace __detail + _Compiler<_TraitsT>:: + _M_insert_bracket_matcher(bool __neg) + { +- _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits); +- pair __last_char; // Optional<_CharT> +- __last_char.first = false; +- if (!(_M_flags & regex_constants::ECMAScript)) +- { +- if (_M_try_char()) +- { +- __last_char.first = true; +- __last_char.second = _M_value[0]; +- } +- else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) +- { +- __last_char.first = true; +- __last_char.second = '-'; +- } +- } +- while (_M_expression_term(__last_char, __matcher)); +- if (__last_char.first) +- __matcher._M_add_char(__last_char.second); ++ _BracketMatcher<__icase, __collate> __matcher(__neg, _M_traits); ++ _BracketState __last_char; ++ if (_M_try_char()) ++ __last_char.set(_M_value[0]); ++ else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) ++ // Dash as first character is a normal character. ++ __last_char.set('-'); ++ while (_M_expression_term(__last_char, __matcher)) ++ ; ++ if (__last_char._M_is_char()) ++ __matcher._M_add_char(__last_char.get()); + __matcher._M_ready(); + _M_stack.push(_StateSeqT( + *_M_nfa, +@@ -453,27 +446,27 @@ namespace __detail + template + bool + _Compiler<_TraitsT>:: +- _M_expression_term(pair& __last_char, +- _BracketMatcher<_TraitsT, __icase, __collate>& __matcher) ++ _M_expression_term(_BracketState& __last_char, ++ _BracketMatcher<__icase, __collate>& __matcher) + { + if (_M_match_token(_ScannerT::_S_token_bracket_end)) + return false; + ++ // Add any previously cached char into the matcher and update cache. + const auto __push_char = [&](_CharT __ch) + { +- if (__last_char.first) +- __matcher._M_add_char(__last_char.second); +- else +- __last_char.first = true; +- __last_char.second = __ch; ++ if (__last_char._M_is_char()) ++ __matcher._M_add_char(__last_char.get()); ++ __last_char.set(__ch); + }; +- const auto __flush = [&] ++ // Add any previously cached char into the matcher and update cache. ++ const auto __push_class = [&] + { +- if (__last_char.first) +- { +- __matcher._M_add_char(__last_char.second); +- __last_char.first = false; +- } ++ if (__last_char._M_is_char()) ++ __matcher._M_add_char(__last_char.get()); ++ // We don't cache anything here, just record that the last thing ++ // processed was a character class (or similar). ++ __last_char.reset(_BracketState::_Type::_Class); + }; + + if (_M_match_token(_ScannerT::_S_token_collsymbol)) +@@ -482,16 +475,16 @@ namespace __detail + if (__symbol.size() == 1) + __push_char(__symbol[0]); + else +- __flush(); ++ __push_class(); + } + else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) + { +- __flush(); ++ __push_class(); + __matcher._M_add_equivalence_class(_M_value); + } + else if (_M_match_token(_ScannerT::_S_token_char_class_name)) + { +- __flush(); ++ __push_class(); + __matcher._M_add_character_class(_M_value, false); + } + else if (_M_try_char()) +@@ -508,49 +501,50 @@ namespace __detail + // It turns out that no one reads BNFs ;) + else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) + { +- if (!__last_char.first) ++ if (_M_match_token(_ScannerT::_S_token_bracket_end)) + { +- if (!(_M_flags & regex_constants::ECMAScript)) +- { +- if (_M_match_token(_ScannerT::_S_token_bracket_end)) +- { +- __push_char('-'); +- return false; +- } +- __throw_regex_error( +- regex_constants::error_range, +- "Unexpected dash in bracket expression. For POSIX syntax, " +- "a dash is not treated literally only when it is at " +- "beginning or end."); +- } ++ // For "-]" the dash is a literal character. + __push_char('-'); ++ return false; + } +- else ++ else if (__last_char._M_is_class()) ++ { ++ // "\\w-" is invalid, start of range must be a single char. ++ __throw_regex_error(regex_constants::error_range, ++ "Invalid start of range in bracket expression."); ++ } ++ else if (__last_char._M_is_char()) + { + if (_M_try_char()) + { +- __matcher._M_make_range(__last_char.second, _M_value[0]); +- __last_char.first = false; ++ // "x-y" ++ __matcher._M_make_range(__last_char.get(), _M_value[0]); ++ __last_char.reset(); + } + else if (_M_match_token(_ScannerT::_S_token_bracket_dash)) + { +- __matcher._M_make_range(__last_char.second, '-'); +- __last_char.first = false; ++ // "x--" ++ __matcher._M_make_range(__last_char.get(), '-'); ++ __last_char.reset(); + } + else +- { +- if (_M_scanner._M_get_token() +- != _ScannerT::_S_token_bracket_end) +- __throw_regex_error( +- regex_constants::error_range, +- "Character is expected after a dash."); +- __push_char('-'); +- } ++ __throw_regex_error(regex_constants::error_range, ++ "Invalid end of range in bracket expression."); + } ++ else if (_M_flags & regex_constants::ECMAScript) ++ { ++ // A dash that is not part of an existing range. Might be the ++ // start of a new range, or might just be a literal '-' char. ++ // Only ECMAScript allows that in the middle of a bracket expr. ++ __push_char('-'); ++ } ++ else ++ __throw_regex_error(regex_constants::error_range, ++ "Invalid dash in bracket expression."); + } + else if (_M_match_token(_ScannerT::_S_token_quoted_class)) + { +- __flush(); ++ __push_class(); + __matcher._M_add_character_class(_M_value, + _M_ctype.is(_CtypeT::upper, + _M_value[0])); +diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc +index 236ab663fc0..57088f5af83 100644 +--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc ++++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc +@@ -68,6 +68,16 @@ test01() + void + test02() + { ++ VERIFY(regex_match("-", regex("[-]", regex_constants::ECMAScript))); ++ VERIFY(regex_match("-", regex("[--]", regex_constants::ECMAScript))); ++ VERIFY(regex_match("-", regex("[---]", regex_constants::ECMAScript))); ++ VERIFY(regex_match("-", regex("[----]", regex_constants::ECMAScript))); ++ VERIFY(regex_match("-", regex("[-----]", regex_constants::ECMAScript))); ++ ++ VERIFY(regex_match("-", regex("[-]", regex_constants::extended))); ++ VERIFY(regex_match("-", regex("[--]", regex_constants::extended))); ++ VERIFY(regex_match("-", regex("[---]", regex_constants::extended))); ++ VERIFY(regex_match("-", regex("[----]", regex_constants::extended))); + try + { + std::regex re("[-----]", std::regex::extended); +@@ -77,7 +87,6 @@ test02() + { + VERIFY(e.code() == std::regex_constants::error_range); + } +- std::regex re("[-----]", std::regex::ECMAScript); + + VERIFY(!regex_match("b", regex("[-ac]", regex_constants::extended))); + VERIFY(!regex_match("b", regex("[ac-]", regex_constants::extended))); +@@ -92,7 +101,27 @@ test02() + } + catch (const std::regex_error& e) + { ++ VERIFY(e.code() == std::regex_constants::error_range); ++ } ++ try ++ { ++ regex("[@--]", regex_constants::extended); ++ VERIFY(false); + } ++ catch (const std::regex_error& e) ++ { ++ VERIFY(e.code() == std::regex_constants::error_range); ++ } ++ try ++ { ++ regex("[--%]", regex_constants::extended); ++ VERIFY(false); ++ } ++ catch (const std::regex_error& e) ++ { ++ VERIFY(e.code() == std::regex_constants::error_range); ++ } ++ + VERIFY(regex_match("].", regex("[][.hyphen.]-0]*", regex_constants::extended))); + } + +@@ -157,6 +186,36 @@ test06() + VERIFY(regex_match("a-", debian_cron_namespace_ok)); + } + ++// libstdc++/102447 ++void ++test07() ++{ ++ VERIFY(regex_match("-", std::regex("[\\w-]", std::regex::ECMAScript))); ++ VERIFY(regex_match("a", std::regex("[\\w-]", std::regex::ECMAScript))); ++ VERIFY(regex_match("-", std::regex("[a-]", std::regex::ECMAScript))); ++ VERIFY(regex_match("a", std::regex("[a-]", std::regex::ECMAScript))); ++ ++ try ++ { ++ std::regex re("[\\w-a]", std::regex::ECMAScript); ++ VERIFY(false); ++ } ++ catch (const std::regex_error& e) ++ { ++ VERIFY(e.code() == std::regex_constants::error_range); ++ } ++ ++ try ++ { ++ std::regex re("[\\w--]", std::regex::ECMAScript); ++ VERIFY(false); ++ } ++ catch (const std::regex_error& e) ++ { ++ VERIFY(e.code() == std::regex_constants::error_range); ++ } ++} ++ + int + main() + { +@@ -166,6 +225,7 @@ main() + test04(); + test05(); + test06(); ++ test07(); + + return 0; + } + +commit 1851cc4c5f2666dfdec53a2ada57095ffc59e08b +Author: Jonathan Wakely +Date: Mon Dec 13 13:36:33 2021 +0000 + + libstdc++: Fix non-reserved name in header + + libstdc++-v3/ChangeLog: + + * include/bits/regex_compiler.tcc (_Compiler::_M_match_token): + Use reserved name for parameter. + * testsuite/17_intro/names.cc: Check "token". + + (cherry picked from commit b0e6a257f1862e217cdf19332ea0f7bad56dcddc) + +diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc +index 8af920e5fe9..b1169428afb 100644 +--- a/libstdc++-v3/include/bits/regex_compiler.tcc ++++ b/libstdc++-v3/include/bits/regex_compiler.tcc +@@ -586,9 +586,9 @@ namespace __detail + template + bool + _Compiler<_TraitsT>:: +- _M_match_token(_TokenT token) ++ _M_match_token(_TokenT __token) + { +- if (token == _M_scanner._M_get_token()) ++ if (__token == _M_scanner._M_get_token()) + { + _M_value = _M_scanner._M_get_value(); + _M_scanner._M_advance(); +diff --git a/libstdc++-v3/testsuite/17_intro/names.cc b/libstdc++-v3/testsuite/17_intro/names.cc +index d758138dfb1..6c06aba7228 100644 +--- a/libstdc++-v3/testsuite/17_intro/names.cc ++++ b/libstdc++-v3/testsuite/17_intro/names.cc +@@ -99,6 +99,7 @@ + #define z ( + + #define tmp ( ++#define token ( + + #if __cplusplus < 201103L + #define uses_allocator ( +--- a/libstdc++-v3/include/bits/c++config.orig 2022-07-08 15:06:14.083231445 -0400 ++++ b/libstdc++-v3/include/bits/c++config 2022-07-08 15:06:41.733247859 -0400 +@@ -99,6 +99,12 @@ + # define _GLIBCXX_ABI_TAG_CXX11 __attribute ((__abi_tag__ ("cxx11"))) + #endif + ++// Macro to warn about unused results. ++#if __cplusplus >= 201703L ++# define _GLIBCXX_NODISCARD [[__nodiscard__]] ++#else ++# define _GLIBCXX_NODISCARD ++#endif + + #if __cplusplus + diff --git a/SPECS/gcc.spec b/SPECS/gcc.spec index b1130d0..b37c765 100644 --- a/SPECS/gcc.spec +++ b/SPECS/gcc.spec @@ -4,7 +4,7 @@ %global gcc_major 8 # Note, gcc_release must be integer, if you want to add suffixes to # %%{release}, append them after %%{gcc_release} on Release: line. -%global gcc_release 13 +%global gcc_release 14 %global nvptx_tools_gitrev c28050f60193b3b95a18866a96f03334e874e78f %global nvptx_newlib_gitrev aadc8eb0ec43b7cd0dd2dfb484bae63c8b05ef24 %global _unpackaged_files_terminate_build 0 @@ -285,6 +285,7 @@ Patch24: gcc8-pch-tweaks.patch Patch25: gcc8-aarch64-mtune-neoverse-512tvb.patch Patch26: gcc8-rh2028609.patch Patch27: gcc8-libgfortran-default-values.patch +Patch28: gcc8-rh2001788.patch Patch30: gcc8-rh1668903-1.patch Patch31: gcc8-rh1668903-2.patch @@ -890,6 +891,7 @@ so that there cannot be any synchronization problems. %patch25 -p1 -b .neoverse~ %patch26 -p1 -b .rh2028609~ %patch27 -p1 -b .libgfortran-default~ +%patch28 -p1 -b .rh2001788~ %patch30 -p0 -b .rh1668903-1~ %patch31 -p0 -b .rh1668903-2~ @@ -3301,6 +3303,9 @@ fi %{ANNOBIN_GCC_PLUGIN_DIR}/gcc-annobin.so.0.0.0 %changelog +* Fri Jul 8 2022 Jonathan Wakely 8.5.0-14 +- backport std::regex check for invalid range (#2001788) + * Wed Apr 20 2022 Marek Polacek 8.5.0-13 - require docbook-style-xsl instead of docbook5-style-xsl (#2073888) - backport Default widths with -fdec-format-defaults patch (#2074614)