Karsten Hopp ce0d03
To: vim_dev@googlegroups.com
Karsten Hopp ce0d03
Subject: Patch 7.3.1073
Karsten Hopp ce0d03
Fcc: outbox
Karsten Hopp ce0d03
From: Bram Moolenaar <Bram@moolenaar.net>
Karsten Hopp ce0d03
Mime-Version: 1.0
Karsten Hopp ce0d03
Content-Type: text/plain; charset=UTF-8
Karsten Hopp ce0d03
Content-Transfer-Encoding: 8bit
Karsten Hopp ce0d03
------------
Karsten Hopp ce0d03
Karsten Hopp ce0d03
Patch 7.3.1073
Karsten Hopp ce0d03
Problem:    New regexp engine may run out of states.
Karsten Hopp ce0d03
Solution:   Allocate states dynamically.  Also make the test report errors.
Karsten Hopp ce0d03
Files:	    src/regexp_nfa.c, src/testdir/test64.in, src/testdir/test64.ok,
Karsten Hopp ce0d03
	    src/testdir/test95.in
Karsten Hopp ce0d03
Karsten Hopp ce0d03
Karsten Hopp ce0d03
*** ../vim-7.3.1072/src/regexp_nfa.c	2013-05-30 17:49:19.000000000 +0200
Karsten Hopp ce0d03
--- src/regexp_nfa.c	2013-05-30 18:36:12.000000000 +0200
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 233,239 ****
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
  /* helper functions used when doing re2post() ... regatom() parsing */
Karsten Hopp ce0d03
  #define EMIT(c)	do {				\
Karsten Hopp ce0d03
! 		    if (post_ptr >= post_end)	\
Karsten Hopp ce0d03
  			return FAIL;		\
Karsten Hopp ce0d03
  		    *post_ptr++ = c;		\
Karsten Hopp ce0d03
  		} while (0)
Karsten Hopp ce0d03
--- 233,239 ----
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
  /* helper functions used when doing re2post() ... regatom() parsing */
Karsten Hopp ce0d03
  #define EMIT(c)	do {				\
Karsten Hopp ce0d03
! 		    if (post_ptr >= post_end && realloc_post_list() == FAIL) \
Karsten Hopp ce0d03
  			return FAIL;		\
Karsten Hopp ce0d03
  		    *post_ptr++ = c;		\
Karsten Hopp ce0d03
  		} while (0)
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 256,266 ****
Karsten Hopp ce0d03
      nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      /* Some items blow up in size, such as [A-z].  Add more space for that.
Karsten Hopp ce0d03
!      * TODO: some patterns may still fail. */
Karsten Hopp ce0d03
      nstate_max += 1000;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      /* Size for postfix representation of expr. */
Karsten Hopp ce0d03
!     postfix_size = sizeof(*post_start) * nstate_max;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      post_start = (int *)lalloc(postfix_size, TRUE);
Karsten Hopp ce0d03
      if (post_start == NULL)
Karsten Hopp ce0d03
--- 256,266 ----
Karsten Hopp ce0d03
      nstate_max = (int)(STRLEN(expr) + 1) * NFA_POSTFIX_MULTIPLIER;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      /* Some items blow up in size, such as [A-z].  Add more space for that.
Karsten Hopp ce0d03
!      * When it is still not enough realloc_post_list() will be used. */
Karsten Hopp ce0d03
      nstate_max += 1000;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      /* Size for postfix representation of expr. */
Karsten Hopp ce0d03
!     postfix_size = sizeof(int) * nstate_max;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      post_start = (int *)lalloc(postfix_size, TRUE);
Karsten Hopp ce0d03
      if (post_start == NULL)
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 277,282 ****
Karsten Hopp ce0d03
--- 277,307 ----
Karsten Hopp ce0d03
  }
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
  /*
Karsten Hopp ce0d03
+  * Allocate more space for post_start.  Called when
Karsten Hopp ce0d03
+  * running above the estimated number of states.
Karsten Hopp ce0d03
+  */
Karsten Hopp ce0d03
+     static int
Karsten Hopp ce0d03
+ realloc_post_list()
Karsten Hopp ce0d03
+ {
Karsten Hopp ce0d03
+     int   nstate_max = post_end - post_start;
Karsten Hopp ce0d03
+     int   new_max = nstate_max + 1000;
Karsten Hopp ce0d03
+     int   *new_start;
Karsten Hopp ce0d03
+     int	  *old_start;
Karsten Hopp ce0d03
+ 
Karsten Hopp ce0d03
+     new_start = (int *)lalloc(new_max * sizeof(int), TRUE);
Karsten Hopp ce0d03
+     if (new_start == NULL)
Karsten Hopp ce0d03
+ 	return FAIL;
Karsten Hopp ce0d03
+     mch_memmove(new_start, post_start, nstate_max * sizeof(int));
Karsten Hopp ce0d03
+     vim_memset(new_start + nstate_max, 0, 1000 * sizeof(int));
Karsten Hopp ce0d03
+     old_start = post_start;
Karsten Hopp ce0d03
+     post_start = new_start;
Karsten Hopp ce0d03
+     post_ptr = new_start + (post_ptr - old_start);
Karsten Hopp ce0d03
+     post_end = post_start + new_max;
Karsten Hopp ce0d03
+     vim_free(old_start);
Karsten Hopp ce0d03
+     return OK;
Karsten Hopp ce0d03
+ }
Karsten Hopp ce0d03
+ 
Karsten Hopp ce0d03
+ /*
Karsten Hopp ce0d03
   * Search between "start" and "end" and try to recognize a
Karsten Hopp ce0d03
   * character class in expanded form. For example [0-9].
Karsten Hopp ce0d03
   * On success, return the id the character class to be emitted.
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 1306,1312 ****
Karsten Hopp ce0d03
      int		greedy = TRUE;      /* Braces are prefixed with '-' ? */
Karsten Hopp ce0d03
      char_u	*old_regparse, *new_regparse;
Karsten Hopp ce0d03
      int		c2;
Karsten Hopp ce0d03
!     int		*old_post_ptr, *my_post_start;
Karsten Hopp ce0d03
      int		old_regnpar;
Karsten Hopp ce0d03
      int		quest;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
--- 1331,1338 ----
Karsten Hopp ce0d03
      int		greedy = TRUE;      /* Braces are prefixed with '-' ? */
Karsten Hopp ce0d03
      char_u	*old_regparse, *new_regparse;
Karsten Hopp ce0d03
      int		c2;
Karsten Hopp ce0d03
!     int		old_post_pos;
Karsten Hopp ce0d03
!     int		my_post_start;
Karsten Hopp ce0d03
      int		old_regnpar;
Karsten Hopp ce0d03
      int		quest;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 1317,1323 ****
Karsten Hopp ce0d03
       * <atom>{m,n} is next */
Karsten Hopp ce0d03
      old_regnpar = regnpar;
Karsten Hopp ce0d03
      /* store current pos in the postfix form, for \{m,n} involving 0s */
Karsten Hopp ce0d03
!     my_post_start = post_ptr;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      ret = nfa_regatom();
Karsten Hopp ce0d03
      if (ret == FAIL)
Karsten Hopp ce0d03
--- 1343,1349 ----
Karsten Hopp ce0d03
       * <atom>{m,n} is next */
Karsten Hopp ce0d03
      old_regnpar = regnpar;
Karsten Hopp ce0d03
      /* store current pos in the postfix form, for \{m,n} involving 0s */
Karsten Hopp ce0d03
!     my_post_start = (int)(post_ptr - post_start);
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      ret = nfa_regatom();
Karsten Hopp ce0d03
      if (ret == FAIL)
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 1430,1443 ****
Karsten Hopp ce0d03
  	    if (maxval == 0)
Karsten Hopp ce0d03
  	    {
Karsten Hopp ce0d03
  		/* Ignore result of previous call to nfa_regatom() */
Karsten Hopp ce0d03
! 		post_ptr = my_post_start;
Karsten Hopp ce0d03
  		/* NFA_SKIP_CHAR has 0-length and works everywhere */
Karsten Hopp ce0d03
  		EMIT(NFA_SKIP_CHAR);
Karsten Hopp ce0d03
  		return OK;
Karsten Hopp ce0d03
  	    }
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
  	    /* Ignore previous call to nfa_regatom() */
Karsten Hopp ce0d03
! 	    post_ptr = my_post_start;
Karsten Hopp ce0d03
  	    /* Save pos after the repeated atom and the \{} */
Karsten Hopp ce0d03
  	    new_regparse = regparse;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
--- 1456,1469 ----
Karsten Hopp ce0d03
  	    if (maxval == 0)
Karsten Hopp ce0d03
  	    {
Karsten Hopp ce0d03
  		/* Ignore result of previous call to nfa_regatom() */
Karsten Hopp ce0d03
! 		post_ptr = post_start + my_post_start;
Karsten Hopp ce0d03
  		/* NFA_SKIP_CHAR has 0-length and works everywhere */
Karsten Hopp ce0d03
  		EMIT(NFA_SKIP_CHAR);
Karsten Hopp ce0d03
  		return OK;
Karsten Hopp ce0d03
  	    }
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
  	    /* Ignore previous call to nfa_regatom() */
Karsten Hopp ce0d03
! 	    post_ptr = post_start + my_post_start;
Karsten Hopp ce0d03
  	    /* Save pos after the repeated atom and the \{} */
Karsten Hopp ce0d03
  	    new_regparse = regparse;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 1449,1461 ****
Karsten Hopp ce0d03
  		curchr = -1;
Karsten Hopp ce0d03
  		/* Restore count of parenthesis */
Karsten Hopp ce0d03
  		regnpar = old_regnpar;
Karsten Hopp ce0d03
! 		old_post_ptr = post_ptr;
Karsten Hopp ce0d03
  		if (nfa_regatom() == FAIL)
Karsten Hopp ce0d03
  		    return FAIL;
Karsten Hopp ce0d03
  		/* after "minval" times, atoms are optional */
Karsten Hopp ce0d03
  		if (i + 1 > minval)
Karsten Hopp ce0d03
  		    EMIT(quest);
Karsten Hopp ce0d03
! 		if (old_post_ptr != my_post_start)
Karsten Hopp ce0d03
  		    EMIT(NFA_CONCAT);
Karsten Hopp ce0d03
  	    }
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
--- 1475,1487 ----
Karsten Hopp ce0d03
  		curchr = -1;
Karsten Hopp ce0d03
  		/* Restore count of parenthesis */
Karsten Hopp ce0d03
  		regnpar = old_regnpar;
Karsten Hopp ce0d03
! 		old_post_pos = (int)(post_ptr - post_start);
Karsten Hopp ce0d03
  		if (nfa_regatom() == FAIL)
Karsten Hopp ce0d03
  		    return FAIL;
Karsten Hopp ce0d03
  		/* after "minval" times, atoms are optional */
Karsten Hopp ce0d03
  		if (i + 1 > minval)
Karsten Hopp ce0d03
  		    EMIT(quest);
Karsten Hopp ce0d03
! 		if (old_post_pos != my_post_start)
Karsten Hopp ce0d03
  		    EMIT(NFA_CONCAT);
Karsten Hopp ce0d03
  	    }
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 1572,1580 ****
Karsten Hopp ce0d03
  nfa_regbranch()
Karsten Hopp ce0d03
  {
Karsten Hopp ce0d03
      int		ch;
Karsten Hopp ce0d03
!     int		*old_post_ptr;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
!     old_post_ptr = post_ptr;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      /* First branch, possibly the only one */
Karsten Hopp ce0d03
      if (nfa_regconcat() == FAIL)
Karsten Hopp ce0d03
--- 1598,1606 ----
Karsten Hopp ce0d03
  nfa_regbranch()
Karsten Hopp ce0d03
  {
Karsten Hopp ce0d03
      int		ch;
Karsten Hopp ce0d03
!     int		old_post_pos;
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
!     old_post_pos = (int)(post_ptr - post_start);
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      /* First branch, possibly the only one */
Karsten Hopp ce0d03
      if (nfa_regconcat() == FAIL)
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 1587,1604 ****
Karsten Hopp ce0d03
  	skipchr();
Karsten Hopp ce0d03
  	EMIT(NFA_NOPEN);
Karsten Hopp ce0d03
  	EMIT(NFA_PREV_ATOM_NO_WIDTH);
Karsten Hopp ce0d03
! 	old_post_ptr = post_ptr;
Karsten Hopp ce0d03
  	if (nfa_regconcat() == FAIL)
Karsten Hopp ce0d03
  	    return FAIL;
Karsten Hopp ce0d03
  	/* if concat is empty, skip a input char. But do emit a node */
Karsten Hopp ce0d03
! 	if (old_post_ptr == post_ptr)
Karsten Hopp ce0d03
  	    EMIT(NFA_SKIP_CHAR);
Karsten Hopp ce0d03
  	EMIT(NFA_CONCAT);
Karsten Hopp ce0d03
  	ch = peekchr();
Karsten Hopp ce0d03
      }
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      /* Even if a branch is empty, emit one node for it */
Karsten Hopp ce0d03
!     if (old_post_ptr == post_ptr)
Karsten Hopp ce0d03
  	EMIT(NFA_SKIP_CHAR);
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      return OK;
Karsten Hopp ce0d03
--- 1613,1630 ----
Karsten Hopp ce0d03
  	skipchr();
Karsten Hopp ce0d03
  	EMIT(NFA_NOPEN);
Karsten Hopp ce0d03
  	EMIT(NFA_PREV_ATOM_NO_WIDTH);
Karsten Hopp ce0d03
! 	old_post_pos = (int)(post_ptr - post_start);
Karsten Hopp ce0d03
  	if (nfa_regconcat() == FAIL)
Karsten Hopp ce0d03
  	    return FAIL;
Karsten Hopp ce0d03
  	/* if concat is empty, skip a input char. But do emit a node */
Karsten Hopp ce0d03
! 	if (old_post_pos == (int)(post_ptr - post_start))
Karsten Hopp ce0d03
  	    EMIT(NFA_SKIP_CHAR);
Karsten Hopp ce0d03
  	EMIT(NFA_CONCAT);
Karsten Hopp ce0d03
  	ch = peekchr();
Karsten Hopp ce0d03
      }
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      /* Even if a branch is empty, emit one node for it */
Karsten Hopp ce0d03
!     if (old_post_pos == (int)(post_ptr - post_start))
Karsten Hopp ce0d03
  	EMIT(NFA_SKIP_CHAR);
Karsten Hopp ce0d03
  
Karsten Hopp ce0d03
      return OK;
Karsten Hopp ce0d03
*** ../vim-7.3.1072/src/testdir/test64.in	2013-05-30 17:05:34.000000000 +0200
Karsten Hopp ce0d03
--- src/testdir/test64.in	2013-05-30 18:38:49.000000000 +0200
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 348,353 ****
Karsten Hopp ce0d03
--- 348,356 ----
Karsten Hopp ce0d03
  :call add(tl, [2, '\_[^8-9]\+', "asfi\n9888", "asfi\n"])
Karsten Hopp ce0d03
  :call add(tl, [2, '\_[^a]\+', "asfi\n9888", "sfi\n9888"])
Karsten Hopp ce0d03
  :"
Karsten Hopp ce0d03
+ :"""" Requiring lots of states.
Karsten Hopp ce0d03
+ :call add(tl, [0, '[0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}', " 12345678-1234-1234-1234-123456789012 ", "12345678-1234-1234-1234-123456789012", "1234-"])
Karsten Hopp ce0d03
+ :"
Karsten Hopp ce0d03
  :"
Karsten Hopp ce0d03
  :"""" Run the tests
Karsten Hopp ce0d03
  :"
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 361,367 ****
Karsten Hopp ce0d03
  :      continue
Karsten Hopp ce0d03
  :    endif
Karsten Hopp ce0d03
  :    let &regexpengine = engine
Karsten Hopp ce0d03
! :    let l = matchlist(text, pat)
Karsten Hopp ce0d03
  :" check the match itself
Karsten Hopp ce0d03
  :    if len(l) == 0 && len(t) > matchidx
Karsten Hopp ce0d03
  :      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
Karsten Hopp ce0d03
--- 364,374 ----
Karsten Hopp ce0d03
  :      continue
Karsten Hopp ce0d03
  :    endif
Karsten Hopp ce0d03
  :    let &regexpengine = engine
Karsten Hopp ce0d03
! :    try
Karsten Hopp ce0d03
! :      let l = matchlist(text, pat)
Karsten Hopp ce0d03
! :    catch
Karsten Hopp ce0d03
! :      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
Karsten Hopp ce0d03
! :    endtry
Karsten Hopp ce0d03
  :" check the match itself
Karsten Hopp ce0d03
  :    if len(l) == 0 && len(t) > matchidx
Karsten Hopp ce0d03
  :      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
Karsten Hopp ce0d03
*** ../vim-7.3.1072/src/testdir/test64.ok	2013-05-30 17:05:34.000000000 +0200
Karsten Hopp ce0d03
--- src/testdir/test64.ok	2013-05-30 18:42:43.000000000 +0200
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 740,745 ****
Karsten Hopp ce0d03
--- 740,747 ----
Karsten Hopp ce0d03
  OK 0 - \_[^a]\+
Karsten Hopp ce0d03
  OK 1 - \_[^a]\+
Karsten Hopp ce0d03
  OK 2 - \_[^a]\+
Karsten Hopp ce0d03
+ OK 0 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
Karsten Hopp ce0d03
+ OK 1 - [0-9a-zA-Z]\{8}-\([0-9a-zA-Z]\{4}-\)\{3}[0-9a-zA-Z]\{12}
Karsten Hopp ce0d03
  192.168.0.1
Karsten Hopp ce0d03
  192.168.0.1
Karsten Hopp ce0d03
  192.168.0.1
Karsten Hopp ce0d03
*** ../vim-7.3.1072/src/testdir/test95.in	2013-05-26 15:14:49.000000000 +0200
Karsten Hopp ce0d03
--- src/testdir/test95.in	2013-05-30 18:13:59.000000000 +0200
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 85,91 ****
Karsten Hopp ce0d03
  :      continue
Karsten Hopp ce0d03
  :    endif
Karsten Hopp ce0d03
  :    let &regexpengine = engine
Karsten Hopp ce0d03
! :    let l = matchlist(text, pat)
Karsten Hopp ce0d03
  :" check the match itself
Karsten Hopp ce0d03
  :    if len(l) == 0 && len(t) > matchidx
Karsten Hopp ce0d03
  :      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
Karsten Hopp ce0d03
--- 85,95 ----
Karsten Hopp ce0d03
  :      continue
Karsten Hopp ce0d03
  :    endif
Karsten Hopp ce0d03
  :    let &regexpengine = engine
Karsten Hopp ce0d03
! :    try
Karsten Hopp ce0d03
! :      let l = matchlist(text, pat)
Karsten Hopp ce0d03
! :    catch
Karsten Hopp ce0d03
! :      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", caused an exception: \"' . v:exception . '\"'
Karsten Hopp ce0d03
! :    endtry
Karsten Hopp ce0d03
  :" check the match itself
Karsten Hopp ce0d03
  :    if len(l) == 0 && len(t) > matchidx
Karsten Hopp ce0d03
  :      $put ='ERROR: pat: \"' . pat . '\", text: \"' . text . '\", did not match, expected: \"' . t[matchidx] . '\"'
Karsten Hopp ce0d03
*** ../vim-7.3.1072/src/version.c	2013-05-30 17:49:19.000000000 +0200
Karsten Hopp ce0d03
--- src/version.c	2013-05-30 18:43:08.000000000 +0200
Karsten Hopp ce0d03
***************
Karsten Hopp ce0d03
*** 730,731 ****
Karsten Hopp ce0d03
--- 730,733 ----
Karsten Hopp ce0d03
  {   /* Add new patch number below this line */
Karsten Hopp ce0d03
+ /**/
Karsten Hopp ce0d03
+     1073,
Karsten Hopp ce0d03
  /**/
Karsten Hopp ce0d03
Karsten Hopp ce0d03
-- 
Karsten Hopp ce0d03
How To Keep A Healthy Level Of Insanity:
Karsten Hopp ce0d03
17. When the money comes out the ATM, scream "I won!, I won! 3rd
Karsten Hopp ce0d03
    time this week!!!!!"
Karsten Hopp ce0d03
Karsten Hopp ce0d03
 /// Bram Moolenaar -- Bram@Moolenaar.net -- http://www.Moolenaar.net   \\\
Karsten Hopp ce0d03
///        sponsor Vim, vote for features -- http://www.Vim.org/sponsor/ \\\
Karsten Hopp ce0d03
\\\  an exciting new programming language -- http://www.Zimbu.org        ///
Karsten Hopp ce0d03
 \\\            help me help AIDS victims -- http://ICCF-Holland.org    ///