github.com/afumu/libc@v0.0.6/musl/src/regex/regexec.c (about)

     1  /*
     2    regexec.c - TRE POSIX compatible matching functions (and more).
     3  
     4    Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
     5    All rights reserved.
     6  
     7    Redistribution and use in source and binary forms, with or without
     8    modification, are permitted provided that the following conditions
     9    are met:
    10  
    11      1. Redistributions of source code must retain the above copyright
    12         notice, this list of conditions and the following disclaimer.
    13  
    14      2. Redistributions in binary form must reproduce the above copyright
    15         notice, this list of conditions and the following disclaimer in the
    16         documentation and/or other materials provided with the distribution.
    17  
    18    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
    19    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    20    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    21    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
    22    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    23    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    24    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    25    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    26    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    27    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    28    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    29  
    30  */
    31  
    32  #include <stdlib.h>
    33  #include <string.h>
    34  #include <wchar.h>
    35  #include <wctype.h>
    36  #include <limits.h>
    37  #include <stdint.h>
    38  
    39  #include <regex.h>
    40  
    41  #include "tre.h"
    42  
    43  #include <assert.h>
    44  
    45  static void
    46  tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
    47  		const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo);
    48  
    49  /***********************************************************************
    50   from tre-match-utils.h
    51  ***********************************************************************/
    52  
    53  #define GET_NEXT_WCHAR() do {                                                 \
    54      prev_c = next_c; pos += pos_add_next;                                     \
    55      if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) {        \
    56          if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; }         \
    57          else pos_add_next++;                                                  \
    58      }                                                                         \
    59      str_byte += pos_add_next;                                                 \
    60    } while (0)
    61  
    62  #define IS_WORD_CHAR(c)	 ((c) == L'_' || tre_isalnum(c))
    63  
    64  #define CHECK_ASSERTIONS(assertions)					      \
    65    (((assertions & ASSERT_AT_BOL)					      \
    66      && (pos > 0 || reg_notbol)						      \
    67      && (prev_c != L'\n' || !reg_newline))				      \
    68     || ((assertions & ASSERT_AT_EOL)					      \
    69         && (next_c != L'\0' || reg_noteol)				      \
    70         && (next_c != L'\n' || !reg_newline))				      \
    71     || ((assertions & ASSERT_AT_BOW)					      \
    72         && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))	              \
    73     || ((assertions & ASSERT_AT_EOW)					      \
    74         && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
    75     || ((assertions & ASSERT_AT_WB)					      \
    76         && (pos != 0 && next_c != L'\0'					      \
    77  	   && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))		      \
    78     || ((assertions & ASSERT_AT_WB_NEG)					      \
    79         && (pos == 0 || next_c == L'\0'					      \
    80  	   || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
    81  
    82  #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
    83    (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
    84         && !(tnfa->cflags & REG_ICASE)                                         \
    85         && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
    86      || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
    87          && (tnfa->cflags & REG_ICASE)                                         \
    88          && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
    89  	&& !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
    90      || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
    91          && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
    92                                        tnfa->cflags & REG_ICASE)))
    93  
    94  
    95  
    96  
    97  /* Returns 1 if `t1' wins `t2', 0 otherwise. */
    98  static int
    99  tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
   100  	      regoff_t *t1, regoff_t *t2)
   101  {
   102    int i;
   103    for (i = 0; i < num_tags; i++)
   104      {
   105        if (tag_directions[i] == TRE_TAG_MINIMIZE)
   106  	{
   107  	  if (t1[i] < t2[i])
   108  	    return 1;
   109  	  if (t1[i] > t2[i])
   110  	    return 0;
   111  	}
   112        else
   113  	{
   114  	  if (t1[i] > t2[i])
   115  	    return 1;
   116  	  if (t1[i] < t2[i])
   117  	    return 0;
   118  	}
   119      }
   120    /*  assert(0);*/
   121    return 0;
   122  }
   123  
   124  static int
   125  tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
   126  {
   127    while (*classes != (tre_ctype_t)0)
   128      if ((!icase && tre_isctype(wc, *classes))
   129  	|| (icase && (tre_isctype(tre_toupper(wc), *classes)
   130  		      || tre_isctype(tre_tolower(wc), *classes))))
   131        return 1; /* Match. */
   132      else
   133        classes++;
   134    return 0; /* No match. */
   135  }
   136  
   137  
   138  /***********************************************************************
   139   from tre-match-parallel.c
   140  ***********************************************************************/
   141  
   142  /*
   143    This algorithm searches for matches basically by reading characters
   144    in the searched string one by one, starting at the beginning.	 All
   145    matching paths in the TNFA are traversed in parallel.	 When two or
   146    more paths reach the same state, exactly one is chosen according to
   147    tag ordering rules; if returning submatches is not required it does
   148    not matter which path is chosen.
   149  
   150    The worst case time required for finding the leftmost and longest
   151    match, or determining that there is no match, is always linearly
   152    dependent on the length of the text being searched.
   153  
   154    This algorithm cannot handle TNFAs with back referencing nodes.
   155    See `tre-match-backtrack.c'.
   156  */
   157  
   158  typedef struct {
   159    tre_tnfa_transition_t *state;
   160    regoff_t *tags;
   161  } tre_tnfa_reach_t;
   162  
   163  typedef struct {
   164    regoff_t pos;
   165    regoff_t **tags;
   166  } tre_reach_pos_t;
   167  
   168  
   169  static reg_errcode_t
   170  tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
   171  		      regoff_t *match_tags, int eflags,
   172  		      regoff_t *match_end_ofs)
   173  {
   174    /* State variables required by GET_NEXT_WCHAR. */
   175    tre_char_t prev_c = 0, next_c = 0;
   176    const char *str_byte = string;
   177    regoff_t pos = -1;
   178    regoff_t pos_add_next = 1;
   179  #ifdef TRE_MBSTATE
   180    mbstate_t mbstate;
   181  #endif /* TRE_MBSTATE */
   182    int reg_notbol = eflags & REG_NOTBOL;
   183    int reg_noteol = eflags & REG_NOTEOL;
   184    int reg_newline = tnfa->cflags & REG_NEWLINE;
   185    reg_errcode_t ret;
   186  
   187    char *buf;
   188    tre_tnfa_transition_t *trans_i;
   189    tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
   190    tre_reach_pos_t *reach_pos;
   191    int *tag_i;
   192    int num_tags, i;
   193  
   194    regoff_t match_eo = -1;	   /* end offset of match (-1 if no match found yet) */
   195    int new_match = 0;
   196    regoff_t *tmp_tags = NULL;
   197    regoff_t *tmp_iptr;
   198  
   199  #ifdef TRE_MBSTATE
   200    memset(&mbstate, '\0', sizeof(mbstate));
   201  #endif /* TRE_MBSTATE */
   202  
   203    if (!match_tags)
   204      num_tags = 0;
   205    else
   206      num_tags = tnfa->num_tags;
   207  
   208    /* Allocate memory for temporary data required for matching.	This needs to
   209       be done for every matching operation to be thread safe.  This allocates
   210       everything in a single large block with calloc(). */
   211    {
   212      size_t tbytes, rbytes, pbytes, xbytes, total_bytes;
   213      char *tmp_buf;
   214  
   215      /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
   216       * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
   217      if (num_tags > SIZE_MAX/(8 * sizeof(regoff_t) * tnfa->num_states))
   218        return REG_ESPACE;
   219  
   220      /* Likewise check rbytes. */
   221      if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next)))
   222        return REG_ESPACE;
   223  
   224      /* Likewise check pbytes. */
   225      if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos)))
   226        return REG_ESPACE;
   227  
   228      /* Compute the length of the block we need. */
   229      tbytes = sizeof(*tmp_tags) * num_tags;
   230      rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
   231      pbytes = sizeof(*reach_pos) * tnfa->num_states;
   232      xbytes = sizeof(regoff_t) * num_tags;
   233      total_bytes =
   234        (sizeof(long) - 1) * 4 /* for alignment paddings */
   235        + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
   236  
   237      /* Allocate the memory. */
   238      buf = calloc(total_bytes, 1);
   239      if (buf == NULL)
   240        return REG_ESPACE;
   241  
   242      /* Get the various pointers within tmp_buf (properly aligned). */
   243      tmp_tags = (void *)buf;
   244      tmp_buf = buf + tbytes;
   245      tmp_buf += ALIGN(tmp_buf, long);
   246      reach_next = (void *)tmp_buf;
   247      tmp_buf += rbytes;
   248      tmp_buf += ALIGN(tmp_buf, long);
   249      reach = (void *)tmp_buf;
   250      tmp_buf += rbytes;
   251      tmp_buf += ALIGN(tmp_buf, long);
   252      reach_pos = (void *)tmp_buf;
   253      tmp_buf += pbytes;
   254      tmp_buf += ALIGN(tmp_buf, long);
   255      for (i = 0; i < tnfa->num_states; i++)
   256        {
   257  	reach[i].tags = (void *)tmp_buf;
   258  	tmp_buf += xbytes;
   259  	reach_next[i].tags = (void *)tmp_buf;
   260  	tmp_buf += xbytes;
   261        }
   262    }
   263  
   264    for (i = 0; i < tnfa->num_states; i++)
   265      reach_pos[i].pos = -1;
   266  
   267    GET_NEXT_WCHAR();
   268    pos = 0;
   269  
   270    reach_next_i = reach_next;
   271    while (1)
   272      {
   273        /* If no match found yet, add the initial states to `reach_next'. */
   274        if (match_eo < 0)
   275  	{
   276  	  trans_i = tnfa->initial;
   277  	  while (trans_i->state != NULL)
   278  	    {
   279  	      if (reach_pos[trans_i->state_id].pos < pos)
   280  		{
   281  		  if (trans_i->assertions
   282  		      && CHECK_ASSERTIONS(trans_i->assertions))
   283  		    {
   284  		      trans_i++;
   285  		      continue;
   286  		    }
   287  
   288  		  reach_next_i->state = trans_i->state;
   289  		  for (i = 0; i < num_tags; i++)
   290  		    reach_next_i->tags[i] = -1;
   291  		  tag_i = trans_i->tags;
   292  		  if (tag_i)
   293  		    while (*tag_i >= 0)
   294  		      {
   295  			if (*tag_i < num_tags)
   296  			  reach_next_i->tags[*tag_i] = pos;
   297  			tag_i++;
   298  		      }
   299  		  if (reach_next_i->state == tnfa->final)
   300  		    {
   301  		      match_eo = pos;
   302  		      new_match = 1;
   303  		      for (i = 0; i < num_tags; i++)
   304  			match_tags[i] = reach_next_i->tags[i];
   305  		    }
   306  		  reach_pos[trans_i->state_id].pos = pos;
   307  		  reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
   308  		  reach_next_i++;
   309  		}
   310  	      trans_i++;
   311  	    }
   312  	  reach_next_i->state = NULL;
   313  	}
   314        else
   315  	{
   316  	  if (num_tags == 0 || reach_next_i == reach_next)
   317  	    /* We have found a match. */
   318  	    break;
   319  	}
   320  
   321        /* Check for end of string. */
   322        if (!next_c) break;
   323  
   324        GET_NEXT_WCHAR();
   325  
   326        /* Swap `reach' and `reach_next'. */
   327        reach_i = reach;
   328        reach = reach_next;
   329        reach_next = reach_i;
   330  
   331        /* For each state in `reach', weed out states that don't fulfill the
   332  	 minimal matching conditions. */
   333        if (tnfa->num_minimals && new_match)
   334  	{
   335  	  new_match = 0;
   336  	  reach_next_i = reach_next;
   337  	  for (reach_i = reach; reach_i->state; reach_i++)
   338  	    {
   339  	      int skip = 0;
   340  	      for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
   341  		{
   342  		  int end = tnfa->minimal_tags[i];
   343  		  int start = tnfa->minimal_tags[i + 1];
   344  		  if (end >= num_tags)
   345  		    {
   346  		      skip = 1;
   347  		      break;
   348  		    }
   349  		  else if (reach_i->tags[start] == match_tags[start]
   350  			   && reach_i->tags[end] < match_tags[end])
   351  		    {
   352  		      skip = 1;
   353  		      break;
   354  		    }
   355  		}
   356  	      if (!skip)
   357  		{
   358  		  reach_next_i->state = reach_i->state;
   359  		  tmp_iptr = reach_next_i->tags;
   360  		  reach_next_i->tags = reach_i->tags;
   361  		  reach_i->tags = tmp_iptr;
   362  		  reach_next_i++;
   363  		}
   364  	    }
   365  	  reach_next_i->state = NULL;
   366  
   367  	  /* Swap `reach' and `reach_next'. */
   368  	  reach_i = reach;
   369  	  reach = reach_next;
   370  	  reach_next = reach_i;
   371  	}
   372  
   373        /* For each state in `reach' see if there is a transition leaving with
   374  	 the current input symbol to a state not yet in `reach_next', and
   375  	 add the destination states to `reach_next'. */
   376        reach_next_i = reach_next;
   377        for (reach_i = reach; reach_i->state; reach_i++)
   378  	{
   379  	  for (trans_i = reach_i->state; trans_i->state; trans_i++)
   380  	    {
   381  	      /* Does this transition match the input symbol? */
   382  	      if (trans_i->code_min <= (tre_cint_t)prev_c &&
   383  		  trans_i->code_max >= (tre_cint_t)prev_c)
   384  		{
   385  		  if (trans_i->assertions
   386  		      && (CHECK_ASSERTIONS(trans_i->assertions)
   387  			  || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
   388  		    {
   389  		      continue;
   390  		    }
   391  
   392  		  /* Compute the tags after this transition. */
   393  		  for (i = 0; i < num_tags; i++)
   394  		    tmp_tags[i] = reach_i->tags[i];
   395  		  tag_i = trans_i->tags;
   396  		  if (tag_i != NULL)
   397  		    while (*tag_i >= 0)
   398  		      {
   399  			if (*tag_i < num_tags)
   400  			  tmp_tags[*tag_i] = pos;
   401  			tag_i++;
   402  		      }
   403  
   404  		  if (reach_pos[trans_i->state_id].pos < pos)
   405  		    {
   406  		      /* Found an unvisited node. */
   407  		      reach_next_i->state = trans_i->state;
   408  		      tmp_iptr = reach_next_i->tags;
   409  		      reach_next_i->tags = tmp_tags;
   410  		      tmp_tags = tmp_iptr;
   411  		      reach_pos[trans_i->state_id].pos = pos;
   412  		      reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
   413  
   414  		      if (reach_next_i->state == tnfa->final
   415  			  && (match_eo == -1
   416  			      || (num_tags > 0
   417  				  && reach_next_i->tags[0] <= match_tags[0])))
   418  			{
   419  			  match_eo = pos;
   420  			  new_match = 1;
   421  			  for (i = 0; i < num_tags; i++)
   422  			    match_tags[i] = reach_next_i->tags[i];
   423  			}
   424  		      reach_next_i++;
   425  
   426  		    }
   427  		  else
   428  		    {
   429  		      assert(reach_pos[trans_i->state_id].pos == pos);
   430  		      /* Another path has also reached this state.  We choose
   431  			 the winner by examining the tag values for both
   432  			 paths. */
   433  		      if (tre_tag_order(num_tags, tnfa->tag_directions,
   434  					tmp_tags,
   435  					*reach_pos[trans_i->state_id].tags))
   436  			{
   437  			  /* The new path wins. */
   438  			  tmp_iptr = *reach_pos[trans_i->state_id].tags;
   439  			  *reach_pos[trans_i->state_id].tags = tmp_tags;
   440  			  if (trans_i->state == tnfa->final)
   441  			    {
   442  			      match_eo = pos;
   443  			      new_match = 1;
   444  			      for (i = 0; i < num_tags; i++)
   445  				match_tags[i] = tmp_tags[i];
   446  			    }
   447  			  tmp_tags = tmp_iptr;
   448  			}
   449  		    }
   450  		}
   451  	    }
   452  	}
   453        reach_next_i->state = NULL;
   454      }
   455  
   456    *match_end_ofs = match_eo;
   457    ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
   458  error_exit:
   459    xfree(buf);
   460    return ret;
   461  }
   462  
   463  
   464  
   465  /***********************************************************************
   466   from tre-match-backtrack.c
   467  ***********************************************************************/
   468  
   469  /*
   470    This matcher is for regexps that use back referencing.  Regexp matching
   471    with back referencing is an NP-complete problem on the number of back
   472    references.  The easiest way to match them is to use a backtracking
   473    routine which basically goes through all possible paths in the TNFA
   474    and chooses the one which results in the best (leftmost and longest)
   475    match.  This can be spectacularly expensive and may run out of stack
   476    space, but there really is no better known generic algorithm.	 Quoting
   477    Henry Spencer from comp.compilers:
   478    <URL: http://compilers.iecc.com/comparch/article/93-03-102>
   479  
   480      POSIX.2 REs require longest match, which is really exciting to
   481      implement since the obsolete ("basic") variant also includes
   482      \<digit>.  I haven't found a better way of tackling this than doing
   483      a preliminary match using a DFA (or simulation) on a modified RE
   484      that just replicates subREs for \<digit>, and then doing a
   485      backtracking match to determine whether the subRE matches were
   486      right.  This can be rather slow, but I console myself with the
   487      thought that people who use \<digit> deserve very slow execution.
   488      (Pun unintentional but very appropriate.)
   489  
   490  */
   491  
   492  typedef struct {
   493    regoff_t pos;
   494    const char *str_byte;
   495    tre_tnfa_transition_t *state;
   496    int state_id;
   497    int next_c;
   498    regoff_t *tags;
   499  #ifdef TRE_MBSTATE
   500    mbstate_t mbstate;
   501  #endif /* TRE_MBSTATE */
   502  } tre_backtrack_item_t;
   503  
   504  typedef struct tre_backtrack_struct {
   505    tre_backtrack_item_t item;
   506    struct tre_backtrack_struct *prev;
   507    struct tre_backtrack_struct *next;
   508  } *tre_backtrack_t;
   509  
   510  #ifdef TRE_MBSTATE
   511  #define BT_STACK_MBSTATE_IN  stack->item.mbstate = (mbstate)
   512  #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
   513  #else /* !TRE_MBSTATE */
   514  #define BT_STACK_MBSTATE_IN
   515  #define BT_STACK_MBSTATE_OUT
   516  #endif /* !TRE_MBSTATE */
   517  
   518  #define tre_bt_mem_new		  tre_mem_new
   519  #define tre_bt_mem_alloc	  tre_mem_alloc
   520  #define tre_bt_mem_destroy	  tre_mem_destroy
   521  
   522  
   523  #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
   524    do									      \
   525      {									      \
   526        int i;								      \
   527        if (!stack->next)							      \
   528  	{								      \
   529  	  tre_backtrack_t s;						      \
   530  	  s = tre_bt_mem_alloc(mem, sizeof(*s));			      \
   531  	  if (!s)							      \
   532  	    {								      \
   533  	      tre_bt_mem_destroy(mem);					      \
   534  	      if (tags)							      \
   535  		xfree(tags);						      \
   536  	      if (pmatch)						      \
   537  		xfree(pmatch);						      \
   538  	      if (states_seen)						      \
   539  		xfree(states_seen);					      \
   540  	      return REG_ESPACE;					      \
   541  	    }								      \
   542  	  s->prev = stack;						      \
   543  	  s->next = NULL;						      \
   544  	  s->item.tags = tre_bt_mem_alloc(mem,				      \
   545  					  sizeof(*tags) * tnfa->num_tags);    \
   546  	  if (!s->item.tags)						      \
   547  	    {								      \
   548  	      tre_bt_mem_destroy(mem);					      \
   549  	      if (tags)							      \
   550  		xfree(tags);						      \
   551  	      if (pmatch)						      \
   552  		xfree(pmatch);						      \
   553  	      if (states_seen)						      \
   554  		xfree(states_seen);					      \
   555  	      return REG_ESPACE;					      \
   556  	    }								      \
   557  	  stack->next = s;						      \
   558  	  stack = s;							      \
   559  	}								      \
   560        else								      \
   561  	stack = stack->next;						      \
   562        stack->item.pos = (_pos);						      \
   563        stack->item.str_byte = (_str_byte);				      \
   564        stack->item.state = (_state);					      \
   565        stack->item.state_id = (_state_id);				      \
   566        stack->item.next_c = (_next_c);					      \
   567        for (i = 0; i < tnfa->num_tags; i++)				      \
   568  	stack->item.tags[i] = (_tags)[i];				      \
   569        BT_STACK_MBSTATE_IN;						      \
   570      }									      \
   571    while (0)
   572  
   573  #define BT_STACK_POP()							      \
   574    do									      \
   575      {									      \
   576        int i;								      \
   577        assert(stack->prev);						      \
   578        pos = stack->item.pos;						      \
   579        str_byte = stack->item.str_byte;					      \
   580        state = stack->item.state;					      \
   581        next_c = stack->item.next_c;					      \
   582        for (i = 0; i < tnfa->num_tags; i++)				      \
   583  	tags[i] = stack->item.tags[i];					      \
   584        BT_STACK_MBSTATE_OUT;						      \
   585        stack = stack->prev;						      \
   586      }									      \
   587    while (0)
   588  
   589  #undef MIN
   590  #define MIN(a, b) ((a) <= (b) ? (a) : (b))
   591  
   592  static reg_errcode_t
   593  tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
   594  		       regoff_t *match_tags, int eflags, regoff_t *match_end_ofs)
   595  {
   596    /* State variables required by GET_NEXT_WCHAR. */
   597    tre_char_t prev_c = 0, next_c = 0;
   598    const char *str_byte = string;
   599    regoff_t pos = 0;
   600    regoff_t pos_add_next = 1;
   601  #ifdef TRE_MBSTATE
   602    mbstate_t mbstate;
   603  #endif /* TRE_MBSTATE */
   604    int reg_notbol = eflags & REG_NOTBOL;
   605    int reg_noteol = eflags & REG_NOTEOL;
   606    int reg_newline = tnfa->cflags & REG_NEWLINE;
   607  
   608    /* These are used to remember the necessary values of the above
   609       variables to return to the position where the current search
   610       started from. */
   611    int next_c_start;
   612    const char *str_byte_start;
   613    regoff_t pos_start = -1;
   614  #ifdef TRE_MBSTATE
   615    mbstate_t mbstate_start;
   616  #endif /* TRE_MBSTATE */
   617  
   618    /* End offset of best match so far, or -1 if no match found yet. */
   619    regoff_t match_eo = -1;
   620    /* Tag arrays. */
   621    int *next_tags;
   622    regoff_t *tags = NULL;
   623    /* Current TNFA state. */
   624    tre_tnfa_transition_t *state;
   625    int *states_seen = NULL;
   626  
   627    /* Memory allocator to for allocating the backtracking stack. */
   628    tre_mem_t mem = tre_bt_mem_new();
   629  
   630    /* The backtracking stack. */
   631    tre_backtrack_t stack;
   632  
   633    tre_tnfa_transition_t *trans_i;
   634    regmatch_t *pmatch = NULL;
   635    int ret;
   636  
   637  #ifdef TRE_MBSTATE
   638    memset(&mbstate, '\0', sizeof(mbstate));
   639  #endif /* TRE_MBSTATE */
   640  
   641    if (!mem)
   642      return REG_ESPACE;
   643    stack = tre_bt_mem_alloc(mem, sizeof(*stack));
   644    if (!stack)
   645      {
   646        ret = REG_ESPACE;
   647        goto error_exit;
   648      }
   649    stack->prev = NULL;
   650    stack->next = NULL;
   651  
   652    if (tnfa->num_tags)
   653      {
   654        tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
   655        if (!tags)
   656  	{
   657  	  ret = REG_ESPACE;
   658  	  goto error_exit;
   659  	}
   660      }
   661    if (tnfa->num_submatches)
   662      {
   663        pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
   664        if (!pmatch)
   665  	{
   666  	  ret = REG_ESPACE;
   667  	  goto error_exit;
   668  	}
   669      }
   670    if (tnfa->num_states)
   671      {
   672        states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
   673        if (!states_seen)
   674  	{
   675  	  ret = REG_ESPACE;
   676  	  goto error_exit;
   677  	}
   678      }
   679  
   680   retry:
   681    {
   682      int i;
   683      for (i = 0; i < tnfa->num_tags; i++)
   684        {
   685  	tags[i] = -1;
   686  	if (match_tags)
   687  	  match_tags[i] = -1;
   688        }
   689      for (i = 0; i < tnfa->num_states; i++)
   690        states_seen[i] = 0;
   691    }
   692  
   693    state = NULL;
   694    pos = pos_start;
   695    GET_NEXT_WCHAR();
   696    pos_start = pos;
   697    next_c_start = next_c;
   698    str_byte_start = str_byte;
   699  #ifdef TRE_MBSTATE
   700    mbstate_start = mbstate;
   701  #endif /* TRE_MBSTATE */
   702  
   703    /* Handle initial states. */
   704    next_tags = NULL;
   705    for (trans_i = tnfa->initial; trans_i->state; trans_i++)
   706      {
   707        if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
   708  	{
   709  	  continue;
   710  	}
   711        if (state == NULL)
   712  	{
   713  	  /* Start from this state. */
   714  	  state = trans_i->state;
   715  	  next_tags = trans_i->tags;
   716  	}
   717        else
   718  	{
   719  	  /* Backtrack to this state. */
   720  	  BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
   721  			trans_i->state_id, next_c, tags, mbstate);
   722  	  {
   723  	    int *tmp = trans_i->tags;
   724  	    if (tmp)
   725  	      while (*tmp >= 0)
   726  		stack->item.tags[*tmp++] = pos;
   727  	  }
   728  	}
   729      }
   730  
   731    if (next_tags)
   732      for (; *next_tags >= 0; next_tags++)
   733        tags[*next_tags] = pos;
   734  
   735  
   736    if (state == NULL)
   737      goto backtrack;
   738  
   739    while (1)
   740      {
   741        tre_tnfa_transition_t *next_state;
   742        int empty_br_match;
   743  
   744        if (state == tnfa->final)
   745  	{
   746  	  if (match_eo < pos
   747  	      || (match_eo == pos
   748  		  && match_tags
   749  		  && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
   750  				   tags, match_tags)))
   751  	    {
   752  	      int i;
   753  	      /* This match wins the previous match. */
   754  	      match_eo = pos;
   755  	      if (match_tags)
   756  		for (i = 0; i < tnfa->num_tags; i++)
   757  		  match_tags[i] = tags[i];
   758  	    }
   759  	  /* Our TNFAs never have transitions leaving from the final state,
   760  	     so we jump right to backtracking. */
   761  	  goto backtrack;
   762  	}
   763  
   764        /* Go to the next character in the input string. */
   765        empty_br_match = 0;
   766        trans_i = state;
   767        if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
   768  	{
   769  	  /* This is a back reference state.  All transitions leaving from
   770  	     this state have the same back reference "assertion".  Instead
   771  	     of reading the next character, we match the back reference. */
   772  	  regoff_t so, eo;
   773  	  int bt = trans_i->u.backref;
   774  	  regoff_t bt_len;
   775  	  int result;
   776  
   777  	  /* Get the substring we need to match against.  Remember to
   778  	     turn off REG_NOSUB temporarily. */
   779  	  tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
   780  			  tnfa, tags, pos);
   781  	  so = pmatch[bt].rm_so;
   782  	  eo = pmatch[bt].rm_eo;
   783  	  bt_len = eo - so;
   784  
   785  	  result = strncmp((const char*)string + so, str_byte - 1,
   786  				 (size_t)bt_len);
   787  
   788  	  if (result == 0)
   789  	    {
   790  	      /* Back reference matched.  Check for infinite loop. */
   791  	      if (bt_len == 0)
   792  		empty_br_match = 1;
   793  	      if (empty_br_match && states_seen[trans_i->state_id])
   794  		{
   795  		  goto backtrack;
   796  		}
   797  
   798  	      states_seen[trans_i->state_id] = empty_br_match;
   799  
   800  	      /* Advance in input string and resync `prev_c', `next_c'
   801  		 and pos. */
   802  	      str_byte += bt_len - 1;
   803  	      pos += bt_len - 1;
   804  	      GET_NEXT_WCHAR();
   805  	    }
   806  	  else
   807  	    {
   808  	      goto backtrack;
   809  	    }
   810  	}
   811        else
   812  	{
   813  	  /* Check for end of string. */
   814  	  if (next_c == L'\0')
   815  		goto backtrack;
   816  
   817  	  /* Read the next character. */
   818  	  GET_NEXT_WCHAR();
   819  	}
   820  
   821        next_state = NULL;
   822        for (trans_i = state; trans_i->state; trans_i++)
   823  	{
   824  	  if (trans_i->code_min <= (tre_cint_t)prev_c
   825  	      && trans_i->code_max >= (tre_cint_t)prev_c)
   826  	    {
   827  	      if (trans_i->assertions
   828  		  && (CHECK_ASSERTIONS(trans_i->assertions)
   829  		      || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
   830  		{
   831  		  continue;
   832  		}
   833  
   834  	      if (next_state == NULL)
   835  		{
   836  		  /* First matching transition. */
   837  		  next_state = trans_i->state;
   838  		  next_tags = trans_i->tags;
   839  		}
   840  	      else
   841  		{
   842  		  /* Second matching transition.  We may need to backtrack here
   843  		     to take this transition instead of the first one, so we
   844  		     push this transition in the backtracking stack so we can
   845  		     jump back here if needed. */
   846  		  BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
   847  				trans_i->state_id, next_c, tags, mbstate);
   848  		  {
   849  		    int *tmp;
   850  		    for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
   851  		      stack->item.tags[*tmp] = pos;
   852  		  }
   853  #if 0 /* XXX - it's important not to look at all transitions here to keep
   854  	 the stack small! */
   855  		  break;
   856  #endif
   857  		}
   858  	    }
   859  	}
   860  
   861        if (next_state != NULL)
   862  	{
   863  	  /* Matching transitions were found.  Take the first one. */
   864  	  state = next_state;
   865  
   866  	  /* Update the tag values. */
   867  	  if (next_tags)
   868  	    while (*next_tags >= 0)
   869  	      tags[*next_tags++] = pos;
   870  	}
   871        else
   872  	{
   873  	backtrack:
   874  	  /* A matching transition was not found.  Try to backtrack. */
   875  	  if (stack->prev)
   876  	    {
   877  	      if (stack->item.state->assertions & ASSERT_BACKREF)
   878  		{
   879  		  states_seen[stack->item.state_id] = 0;
   880  		}
   881  
   882  	      BT_STACK_POP();
   883  	    }
   884  	  else if (match_eo < 0)
   885  	    {
   886  	      /* Try starting from a later position in the input string. */
   887  	      /* Check for end of string. */
   888  	      if (next_c == L'\0')
   889  		    {
   890  		      break;
   891  		    }
   892  	      next_c = next_c_start;
   893  #ifdef TRE_MBSTATE
   894  	      mbstate = mbstate_start;
   895  #endif /* TRE_MBSTATE */
   896  	      str_byte = str_byte_start;
   897  	      goto retry;
   898  	    }
   899  	  else
   900  	    {
   901  	      break;
   902  	    }
   903  	}
   904      }
   905  
   906    ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
   907    *match_end_ofs = match_eo;
   908  
   909   error_exit:
   910    tre_bt_mem_destroy(mem);
   911  #ifndef TRE_USE_ALLOCA
   912    if (tags)
   913      xfree(tags);
   914    if (pmatch)
   915      xfree(pmatch);
   916    if (states_seen)
   917      xfree(states_seen);
   918  #endif /* !TRE_USE_ALLOCA */
   919  
   920    return ret;
   921  }
   922  
   923  /***********************************************************************
   924   from regexec.c
   925  ***********************************************************************/
   926  
   927  /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
   928     endpoint values. */
   929  static void
   930  tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
   931  		const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo)
   932  {
   933    tre_submatch_data_t *submatch_data;
   934    unsigned int i, j;
   935    int *parents;
   936  
   937    i = 0;
   938    if (match_eo >= 0 && !(cflags & REG_NOSUB))
   939      {
   940        /* Construct submatch offsets from the tags. */
   941        submatch_data = tnfa->submatch_data;
   942        while (i < tnfa->num_submatches && i < nmatch)
   943  	{
   944  	  if (submatch_data[i].so_tag == tnfa->end_tag)
   945  	    pmatch[i].rm_so = match_eo;
   946  	  else
   947  	    pmatch[i].rm_so = tags[submatch_data[i].so_tag];
   948  
   949  	  if (submatch_data[i].eo_tag == tnfa->end_tag)
   950  	    pmatch[i].rm_eo = match_eo;
   951  	  else
   952  	    pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
   953  
   954  	  /* If either of the endpoints were not used, this submatch
   955  	     was not part of the match. */
   956  	  if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
   957  	    pmatch[i].rm_so = pmatch[i].rm_eo = -1;
   958  
   959  	  i++;
   960  	}
   961        /* Reset all submatches that are not within all of their parent
   962  	 submatches. */
   963        i = 0;
   964        while (i < tnfa->num_submatches && i < nmatch)
   965  	{
   966  	  if (pmatch[i].rm_eo == -1)
   967  	    assert(pmatch[i].rm_so == -1);
   968  	  assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
   969  
   970  	  parents = submatch_data[i].parents;
   971  	  if (parents != NULL)
   972  	    for (j = 0; parents[j] >= 0; j++)
   973  	      {
   974  		if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
   975  		    || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
   976  		  pmatch[i].rm_so = pmatch[i].rm_eo = -1;
   977  	      }
   978  	  i++;
   979  	}
   980      }
   981  
   982    while (i < nmatch)
   983      {
   984        pmatch[i].rm_so = -1;
   985        pmatch[i].rm_eo = -1;
   986        i++;
   987      }
   988  }
   989  
   990  
   991  /*
   992    Wrapper functions for POSIX compatible regexp matching.
   993  */
   994  
   995  int
   996  regexec(const regex_t *restrict preg, const char *restrict string,
   997  	  size_t nmatch, regmatch_t pmatch[restrict], int eflags)
   998  {
   999    tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
  1000    reg_errcode_t status;
  1001    regoff_t *tags = NULL, eo;
  1002    if (tnfa->cflags & REG_NOSUB) nmatch = 0;
  1003    if (tnfa->num_tags > 0 && nmatch > 0)
  1004      {
  1005        tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
  1006        if (tags == NULL)
  1007  	return REG_ESPACE;
  1008      }
  1009  
  1010    /* Dispatch to the appropriate matcher. */
  1011    if (tnfa->have_backrefs)
  1012      {
  1013        /* The regex has back references, use the backtracking matcher. */
  1014        status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
  1015      }
  1016    else
  1017      {
  1018        /* Exact matching, no back references, use the parallel matcher. */
  1019        status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
  1020      }
  1021  
  1022    if (status == REG_OK)
  1023      /* A match was found, so fill the submatch registers. */
  1024      tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
  1025    if (tags)
  1026      xfree(tags);
  1027    return status;
  1028  }