Blame SOURCES/gcc8-rh2001788.patch

d8f365
commit ee3db7c8f844556d35a66b3732bad9f44a086491
d8f365
Author: Jonathan Wakely <jwakely@redhat.com>
d8f365
Date:   Mon Sep 27 20:44:24 2021 +0100
d8f365
d8f365
    libstdc++: Fix handling of invalid ranges in std::regex [PR102447]
d8f365
    
d8f365
    std::regex currently allows invalid bracket ranges such as [\w-a] which
d8f365
    are only allowed by ECMAScript when in web browser compatibility mode.
d8f365
    It should be an error, because the start of the range is a character
d8f365
    class, not a single character. The current implementation of
d8f365
    _Compiler::_M_expression_term does not provide a way to reject this,
d8f365
    because we only remember a previous character, not whether we just
d8f365
    processed a character class (or collating symbol etc.)
d8f365
    
d8f365
    This patch replaces the pair<bool, CharT> used to emulate
d8f365
    optional<CharT> with a custom class closer to pair<tribool,CharT>. That
d8f365
    allows us to track three states, so that we can tell when we've just
d8f365
    seen a character class.
d8f365
    
d8f365
    With this additional state the code in _M_expression_term for processing
d8f365
    the _S_token_bracket_dash can be improved to correctly reject the [\w-a]
d8f365
    case, without regressing for valid cases such as [\w-] and [----].
d8f365
    
d8f365
    libstdc++-v3/ChangeLog:
d8f365
    
d8f365
            PR libstdc++/102447
d8f365
            * include/bits/regex_compiler.h (_Compiler::_BracketState): New
d8f365
            class.
d8f365
            (_Compiler::_BrackeyMatcher): New alias template.
d8f365
            (_Compiler::_M_expression_term): Change pair<bool, CharT>
d8f365
            parameter to _BracketState. Process first character for
d8f365
            ECMAScript syntax as well as POSIX.
d8f365
            * include/bits/regex_compiler.tcc
d8f365
            (_Compiler::_M_insert_bracket_matcher): Pass _BracketState.
d8f365
            (_Compiler::_M_expression_term): Use _BracketState to store
d8f365
            state between calls. Improve handling of dashes in ranges.
d8f365
            * testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
d8f365
            Add more tests for ranges containing dashes. Check invalid
d8f365
            ranges with character class at the beginning.
d8f365
    
d8f365
    (cherry picked from commit 7ce3c230edf6e498e125c805a6dd313bf87dc439)
d8f365
d8f365
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
d8f365
index 7e5c2073554..2eb1c3f7863 100644
d8f365
--- a/libstdc++-v3/include/bits/regex_compiler.h
d8f365
+++ b/libstdc++-v3/include/bits/regex_compiler.h
d8f365
@@ -122,13 +122,45 @@ namespace __detail
d8f365
 	void
d8f365
 	_M_insert_bracket_matcher(bool __neg);
d8f365
 
d8f365
-      // Returns true if successfully matched one term and should continue.
d8f365
+      // Cache of the last atom seen in a bracketed range expression.
d8f365
+      struct _BracketState
d8f365
+      {
d8f365
+	enum class _Type : char { _None, _Char, _Class } _M_type = _Type::_None;
d8f365
+	_CharT _M_char;
d8f365
+
d8f365
+	void
d8f365
+	set(_CharT __c) noexcept { _M_type = _Type::_Char; _M_char = __c; }
d8f365
+
d8f365
+	_GLIBCXX_NODISCARD _CharT
d8f365
+	get() const noexcept { return _M_char; }
d8f365
+
d8f365
+	void
d8f365
+	reset(_Type __t = _Type::_None) noexcept { _M_type = __t; }
d8f365
+
d8f365
+	explicit operator bool() const noexcept
d8f365
+	{ return _M_type != _Type::_None; }
d8f365
+
d8f365
+	// Previous token was a single character.
d8f365
+	_GLIBCXX_NODISCARD bool
d8f365
+	_M_is_char() const noexcept { return _M_type == _Type::_Char; }
d8f365
+
d8f365
+	// Previous token was a character class, equivalent class,
d8f365
+	// collating symbol etc.
d8f365
+	_GLIBCXX_NODISCARD bool
d8f365
+	_M_is_class() const noexcept { return _M_type == _Type::_Class; }
d8f365
+      };
d8f365
+
d8f365
+      template<bool __icase, bool __collate>
d8f365
+	using _BracketMatcher
d8f365
+	  = std::__detail::_BracketMatcher<_TraitsT, __icase, __collate>;
d8f365
+
d8f365
+      // Returns true if successfully parsed one term and should continue
d8f365
+      // compiling a bracket expression.
d8f365
       // Returns false if the compiler should move on.
d8f365
       template<bool __icase, bool __collate>
d8f365
 	bool
d8f365
-	_M_expression_term(pair<bool, _CharT>& __last_char,
d8f365
-			   _BracketMatcher<_TraitsT, __icase, __collate>&
d8f365
-			   __matcher);
d8f365
+	_M_expression_term(_BracketState& __last_char,
d8f365
+			   _BracketMatcher<__icase, __collate>& __matcher);
d8f365
 
d8f365
       int
d8f365
       _M_cur_int_value(int __radix);
d8f365
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
d8f365
index b1169428afb..5877d30ba52 100644
d8f365
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
d8f365
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
d8f365
@@ -140,7 +140,8 @@ namespace __detail
d8f365
 	return true;
d8f365
       if (this->_M_atom())
d8f365
 	{
d8f365
-	  while (this->_M_quantifier());
d8f365
+	  while (this->_M_quantifier())
d8f365
+	    ;
d8f365
 	  return true;
d8f365
 	}
d8f365
       return false;
d8f365
@@ -410,7 +411,7 @@ namespace __detail
d8f365
     _M_insert_character_class_matcher()
d8f365
     {
d8f365
       __glibcxx_assert(_M_value.size() == 1);
d8f365
-      _BracketMatcher<_TraitsT, __icase, __collate> __matcher
d8f365
+      _BracketMatcher<__icase, __collate> __matcher
d8f365
 	(_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits);
d8f365
       __matcher._M_add_character_class(_M_value, false);
d8f365
       __matcher._M_ready();
d8f365
@@ -424,25 +425,17 @@ namespace __detail
d8f365
     _Compiler<_TraitsT>::
d8f365
     _M_insert_bracket_matcher(bool __neg)
d8f365
     {
d8f365
-      _BracketMatcher<_TraitsT, __icase, __collate> __matcher(__neg, _M_traits);
d8f365
-      pair<bool, _CharT> __last_char; // Optional<_CharT>
d8f365
-      __last_char.first = false;
d8f365
-      if (!(_M_flags & regex_constants::ECMAScript))
d8f365
-	{
d8f365
-	  if (_M_try_char())
d8f365
-	    {
d8f365
-	      __last_char.first = true;
d8f365
-	      __last_char.second = _M_value[0];
d8f365
-	    }
d8f365
-	  else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
d8f365
-	    {
d8f365
-	      __last_char.first = true;
d8f365
-	      __last_char.second = '-';
d8f365
-	    }
d8f365
-	}
d8f365
-      while (_M_expression_term(__last_char, __matcher));
d8f365
-      if (__last_char.first)
d8f365
-	__matcher._M_add_char(__last_char.second);
d8f365
+      _BracketMatcher<__icase, __collate> __matcher(__neg, _M_traits);
d8f365
+      _BracketState __last_char;
d8f365
+      if (_M_try_char())
d8f365
+	__last_char.set(_M_value[0]);
d8f365
+      else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
d8f365
+	// Dash as first character is a normal character.
d8f365
+	__last_char.set('-');
d8f365
+      while (_M_expression_term(__last_char, __matcher))
d8f365
+	;
d8f365
+      if (__last_char._M_is_char())
d8f365
+	__matcher._M_add_char(__last_char.get());
d8f365
       __matcher._M_ready();
d8f365
       _M_stack.push(_StateSeqT(
d8f365
 		      *_M_nfa,
d8f365
@@ -453,27 +446,27 @@ namespace __detail
d8f365
   template<bool __icase, bool __collate>
d8f365
     bool
d8f365
     _Compiler<_TraitsT>::
d8f365
-    _M_expression_term(pair<bool, _CharT>& __last_char,
d8f365
-		       _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
d8f365
+    _M_expression_term(_BracketState& __last_char,
d8f365
+		       _BracketMatcher<__icase, __collate>& __matcher)
d8f365
     {
d8f365
       if (_M_match_token(_ScannerT::_S_token_bracket_end))
d8f365
 	return false;
d8f365
 
d8f365
+      // Add any previously cached char into the matcher and update cache.
d8f365
       const auto __push_char = [&](_CharT __ch)
d8f365
       {
d8f365
-	if (__last_char.first)
d8f365
-	  __matcher._M_add_char(__last_char.second);
d8f365
-	else
d8f365
-	  __last_char.first = true;
d8f365
-	__last_char.second = __ch;
d8f365
+	if (__last_char._M_is_char())
d8f365
+	  __matcher._M_add_char(__last_char.get());
d8f365
+	__last_char.set(__ch);
d8f365
       };
d8f365
-      const auto __flush = [&]
d8f365
+      // Add any previously cached char into the matcher and update cache.
d8f365
+      const auto __push_class = [&]
d8f365
       {
d8f365
-	if (__last_char.first)
d8f365
-	  {
d8f365
-	    __matcher._M_add_char(__last_char.second);
d8f365
-	    __last_char.first = false;
d8f365
-	  }
d8f365
+        if (__last_char._M_is_char())
d8f365
+	  __matcher._M_add_char(__last_char.get());
d8f365
+	// We don't cache anything here, just record that the last thing
d8f365
+	// processed was a character class (or similar).
d8f365
+	__last_char.reset(_BracketState::_Type::_Class);
d8f365
       };
d8f365
 
d8f365
       if (_M_match_token(_ScannerT::_S_token_collsymbol))
d8f365
@@ -482,16 +475,16 @@ namespace __detail
d8f365
 	  if (__symbol.size() == 1)
d8f365
 	    __push_char(__symbol[0]);
d8f365
 	  else
d8f365
-	    __flush();
d8f365
+	    __push_class();
d8f365
 	}
d8f365
       else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
d8f365
 	{
d8f365
-	  __flush();
d8f365
+	  __push_class();
d8f365
 	  __matcher._M_add_equivalence_class(_M_value);
d8f365
 	}
d8f365
       else if (_M_match_token(_ScannerT::_S_token_char_class_name))
d8f365
 	{
d8f365
-	  __flush();
d8f365
+	  __push_class();
d8f365
 	  __matcher._M_add_character_class(_M_value, false);
d8f365
 	}
d8f365
       else if (_M_try_char())
d8f365
@@ -508,49 +501,50 @@ namespace __detail
d8f365
       // It turns out that no one reads BNFs ;)
d8f365
       else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
d8f365
 	{
d8f365
-	  if (!__last_char.first)
d8f365
+	  if (_M_match_token(_ScannerT::_S_token_bracket_end))
d8f365
 	    {
d8f365
-	      if (!(_M_flags & regex_constants::ECMAScript))
d8f365
-		{
d8f365
-		  if (_M_match_token(_ScannerT::_S_token_bracket_end))
d8f365
-		    {
d8f365
-		      __push_char('-');
d8f365
-		      return false;
d8f365
-		    }
d8f365
-		  __throw_regex_error(
d8f365
-		    regex_constants::error_range,
d8f365
-		    "Unexpected dash in bracket expression. For POSIX syntax, "
d8f365
-		    "a dash is not treated literally only when it is at "
d8f365
-		    "beginning or end.");
d8f365
-		}
d8f365
+	      // For "-]" the dash is a literal character.
d8f365
 	      __push_char('-');
d8f365
+	      return false;
d8f365
 	    }
d8f365
-	  else
d8f365
+	  else if (__last_char._M_is_class())
d8f365
+	    {
d8f365
+	      // "\\w-" is invalid, start of range must be a single char.
d8f365
+	      __throw_regex_error(regex_constants::error_range,
d8f365
+		    "Invalid start of range in bracket expression.");
d8f365
+	    }
d8f365
+	  else if (__last_char._M_is_char())
d8f365
 	    {
d8f365
 	      if (_M_try_char())
d8f365
 		{
d8f365
-		  __matcher._M_make_range(__last_char.second, _M_value[0]);
d8f365
-		  __last_char.first = false;
d8f365
+		  // "x-y"
d8f365
+		  __matcher._M_make_range(__last_char.get(), _M_value[0]);
d8f365
+		  __last_char.reset();
d8f365
 		}
d8f365
 	      else if (_M_match_token(_ScannerT::_S_token_bracket_dash))
d8f365
 		{
d8f365
-		  __matcher._M_make_range(__last_char.second, '-');
d8f365
-		  __last_char.first = false;
d8f365
+		  // "x--"
d8f365
+		  __matcher._M_make_range(__last_char.get(), '-');
d8f365
+		  __last_char.reset();
d8f365
 		}
d8f365
 	      else
d8f365
-		{
d8f365
-		  if (_M_scanner._M_get_token()
d8f365
-		      != _ScannerT::_S_token_bracket_end)
d8f365
-		    __throw_regex_error(
d8f365
-		      regex_constants::error_range,
d8f365
-		      "Character is expected after a dash.");
d8f365
-		  __push_char('-');
d8f365
-		}
d8f365
+		__throw_regex_error(regex_constants::error_range,
d8f365
+		      "Invalid end of range in bracket expression.");
d8f365
 	    }
d8f365
+	  else if (_M_flags & regex_constants::ECMAScript)
d8f365
+	    {
d8f365
+	      // A dash that is not part of an existing range. Might be the
d8f365
+	      // start of a new range, or might just be a literal '-' char.
d8f365
+	      // Only ECMAScript allows that in the middle of a bracket expr.
d8f365
+	      __push_char('-');
d8f365
+	    }
d8f365
+	  else
d8f365
+	    __throw_regex_error(regex_constants::error_range,
d8f365
+				"Invalid dash in bracket expression.");
d8f365
 	}
d8f365
       else if (_M_match_token(_ScannerT::_S_token_quoted_class))
d8f365
 	{
d8f365
-	  __flush();
d8f365
+	  __push_class();
d8f365
 	  __matcher._M_add_character_class(_M_value,
d8f365
 					   _M_ctype.is(_CtypeT::upper,
d8f365
 						       _M_value[0]));
d8f365
diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc
d8f365
index 236ab663fc0..57088f5af83 100644
d8f365
--- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc
d8f365
+++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc
d8f365
@@ -68,6 +68,16 @@ test01()
d8f365
 void
d8f365
 test02()
d8f365
 {
d8f365
+  VERIFY(regex_match("-", regex("[-]", regex_constants::ECMAScript)));
d8f365
+  VERIFY(regex_match("-", regex("[--]", regex_constants::ECMAScript)));
d8f365
+  VERIFY(regex_match("-", regex("[---]", regex_constants::ECMAScript)));
d8f365
+  VERIFY(regex_match("-", regex("[----]", regex_constants::ECMAScript)));
d8f365
+  VERIFY(regex_match("-", regex("[-----]", regex_constants::ECMAScript)));
d8f365
+
d8f365
+  VERIFY(regex_match("-", regex("[-]", regex_constants::extended)));
d8f365
+  VERIFY(regex_match("-", regex("[--]", regex_constants::extended)));
d8f365
+  VERIFY(regex_match("-", regex("[---]", regex_constants::extended)));
d8f365
+  VERIFY(regex_match("-", regex("[----]", regex_constants::extended)));
d8f365
   try
d8f365
   {
d8f365
     std::regex re("[-----]", std::regex::extended);
d8f365
@@ -77,7 +87,6 @@ test02()
d8f365
   {
d8f365
     VERIFY(e.code() == std::regex_constants::error_range);
d8f365
   }
d8f365
-  std::regex re("[-----]", std::regex::ECMAScript);
d8f365
 
d8f365
   VERIFY(!regex_match("b", regex("[-ac]", regex_constants::extended)));
d8f365
   VERIFY(!regex_match("b", regex("[ac-]", regex_constants::extended)));
d8f365
@@ -92,7 +101,27 @@ test02()
d8f365
   }
d8f365
   catch (const std::regex_error& e)
d8f365
   {
d8f365
+    VERIFY(e.code() == std::regex_constants::error_range);
d8f365
+  }
d8f365
+  try
d8f365
+  {
d8f365
+    regex("[@--]", regex_constants::extended);
d8f365
+    VERIFY(false);
d8f365
   }
d8f365
+  catch (const std::regex_error& e)
d8f365
+  {
d8f365
+    VERIFY(e.code() == std::regex_constants::error_range);
d8f365
+  }
d8f365
+  try
d8f365
+  {
d8f365
+    regex("[--%]", regex_constants::extended);
d8f365
+    VERIFY(false);
d8f365
+  }
d8f365
+  catch (const std::regex_error& e)
d8f365
+  {
d8f365
+    VERIFY(e.code() == std::regex_constants::error_range);
d8f365
+  }
d8f365
+
d8f365
   VERIFY(regex_match("].", regex("[][.hyphen.]-0]*", regex_constants::extended)));
d8f365
 }
d8f365
 
d8f365
@@ -157,6 +186,36 @@ test06()
d8f365
   VERIFY(regex_match("a-", debian_cron_namespace_ok));
d8f365
 }
d8f365
 
d8f365
+// libstdc++/102447
d8f365
+void
d8f365
+test07()
d8f365
+{
d8f365
+  VERIFY(regex_match("-", std::regex("[\\w-]", std::regex::ECMAScript)));
d8f365
+  VERIFY(regex_match("a", std::regex("[\\w-]", std::regex::ECMAScript)));
d8f365
+  VERIFY(regex_match("-", std::regex("[a-]", std::regex::ECMAScript)));
d8f365
+  VERIFY(regex_match("a", std::regex("[a-]", std::regex::ECMAScript)));
d8f365
+
d8f365
+  try
d8f365
+  {
d8f365
+    std::regex re("[\\w-a]", std::regex::ECMAScript);
d8f365
+    VERIFY(false);
d8f365
+  }
d8f365
+  catch (const std::regex_error& e)
d8f365
+  {
d8f365
+    VERIFY(e.code() == std::regex_constants::error_range);
d8f365
+  }
d8f365
+
d8f365
+  try
d8f365
+  {
d8f365
+    std::regex re("[\\w--]", std::regex::ECMAScript);
d8f365
+    VERIFY(false);
d8f365
+  }
d8f365
+  catch (const std::regex_error& e)
d8f365
+  {
d8f365
+    VERIFY(e.code() == std::regex_constants::error_range);
d8f365
+  }
d8f365
+}
d8f365
+
d8f365
 int
d8f365
 main()
d8f365
 {
d8f365
@@ -166,6 +225,7 @@ main()
d8f365
   test04();
d8f365
   test05();
d8f365
   test06();
d8f365
+  test07();
d8f365
 
d8f365
   return 0;
d8f365
 }
d8f365
d8f365
commit 1851cc4c5f2666dfdec53a2ada57095ffc59e08b
d8f365
Author: Jonathan Wakely <jwakely@redhat.com>
d8f365
Date:   Mon Dec 13 13:36:33 2021 +0000
d8f365
d8f365
    libstdc++: Fix non-reserved name in <regex> header
d8f365
    
d8f365
    libstdc++-v3/ChangeLog:
d8f365
    
d8f365
            * include/bits/regex_compiler.tcc (_Compiler::_M_match_token):
d8f365
            Use reserved name for parameter.
d8f365
            * testsuite/17_intro/names.cc: Check "token".
d8f365
    
d8f365
    (cherry picked from commit b0e6a257f1862e217cdf19332ea0f7bad56dcddc)
d8f365
d8f365
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc
d8f365
index 8af920e5fe9..b1169428afb 100644
d8f365
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
d8f365
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
d8f365
@@ -586,9 +586,9 @@ namespace __detail
d8f365
   template<typename _TraitsT>
d8f365
     bool
d8f365
     _Compiler<_TraitsT>::
d8f365
-    _M_match_token(_TokenT token)
d8f365
+    _M_match_token(_TokenT __token)
d8f365
     {
d8f365
-      if (token == _M_scanner._M_get_token())
d8f365
+      if (__token == _M_scanner._M_get_token())
d8f365
 	{
d8f365
 	  _M_value = _M_scanner._M_get_value();
d8f365
 	  _M_scanner._M_advance();
d8f365
diff --git a/libstdc++-v3/testsuite/17_intro/names.cc b/libstdc++-v3/testsuite/17_intro/names.cc
d8f365
index d758138dfb1..6c06aba7228 100644
d8f365
--- a/libstdc++-v3/testsuite/17_intro/names.cc
d8f365
+++ b/libstdc++-v3/testsuite/17_intro/names.cc
d8f365
@@ -99,6 +99,7 @@
d8f365
 #define z (
d8f365
 
d8f365
 #define tmp (
d8f365
+#define token (
d8f365
 
d8f365
 #if __cplusplus < 201103L
d8f365
 #define uses_allocator  (
d8f365
--- a/libstdc++-v3/include/bits/c++config.orig	2022-07-08 15:06:14.083231445 -0400
d8f365
+++ b/libstdc++-v3/include/bits/c++config	2022-07-08 15:06:41.733247859 -0400
d8f365
@@ -99,6 +99,12 @@
d8f365
 # define _GLIBCXX_ABI_TAG_CXX11 __attribute ((__abi_tag__ ("cxx11")))
d8f365
 #endif
d8f365
 
d8f365
+// Macro to warn about unused results.
d8f365
+#if __cplusplus >= 201703L
d8f365
+# define _GLIBCXX_NODISCARD [[__nodiscard__]]
d8f365
+#else
d8f365
+# define _GLIBCXX_NODISCARD
d8f365
+#endif
d8f365
 
d8f365
 #if __cplusplus
d8f365