github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/lib/stringlib/pattern/builder.go (about)

     1  package pattern
     2  
     3  import (
     4  	"errors"
     5  	"fmt"
     6  )
     7  
     8  const maxPatternSize = 10000
     9  
    10  type patternBuilder struct {
    11  	items                   []patternItem
    12  	ciMax                   uint64
    13  	cStack                  []uint64
    14  	ptn                     string
    15  	i                       int
    16  	anchorLeft, anchorRight bool
    17  }
    18  
    19  func (pb *patternBuilder) getPattern() (*Pattern, error) {
    20  	// var anchorLeft, anchorRight bool
    21  	// if len(pb.ptn) > 0 && pb.ptn[0] == '^' {
    22  	// 	anchorLeft = true
    23  	// 	pb.ptn = pb.ptn[1:]
    24  	// }
    25  	// if last := len(pb.ptn) - 1; last >= 0 && pb.ptn[last] == '$' {
    26  	// 	anchorRight = true
    27  	// 	pb.ptn = pb.ptn[:last]
    28  	// }
    29  	sz := 0
    30  	for pb.i < len(pb.ptn) {
    31  		err := pb.getPatternItem()
    32  		if err != nil {
    33  			return nil, err
    34  		}
    35  		sz++
    36  		if sz > maxPatternSize {
    37  			return nil, errPatternTooComplex
    38  		}
    39  	}
    40  	if len(pb.cStack) != 0 {
    41  		return nil, errUnfinishedCapture
    42  	}
    43  	return &Pattern{
    44  		items:        pb.items,
    45  		captureCount: int(pb.ciMax),
    46  		startAnchor:  pb.anchorLeft,
    47  		endAnchor:    pb.anchorRight,
    48  	}, nil
    49  }
    50  
    51  func (pb *patternBuilder) next() (byte, error) {
    52  	if pb.i >= len(pb.ptn) {
    53  		return 0, errInvalidPattern
    54  	}
    55  	b := pb.ptn[pb.i]
    56  	pb.i++
    57  	return b, nil
    58  }
    59  
    60  func (pb *patternBuilder) back() {
    61  	pb.i--
    62  }
    63  
    64  func (pb *patternBuilder) emit(item patternItem) {
    65  	pb.items = append(pb.items, item)
    66  }
    67  
    68  func (pb *patternBuilder) getPatternItem() error {
    69  	b, err := pb.next()
    70  	if err != nil {
    71  		return err
    72  	}
    73  	var s byteSet
    74  	switch b {
    75  	case '^':
    76  		if pb.i == 1 {
    77  			pb.anchorLeft = true
    78  			return nil
    79  		}
    80  		pb.back()
    81  		s, err = pb.getCharClass()
    82  	case '$':
    83  		if pb.i == len(pb.ptn) {
    84  			pb.anchorRight = true
    85  			return nil
    86  		}
    87  		pb.back()
    88  		s, err = pb.getCharClass()
    89  	case '(':
    90  		pb.ciMax++
    91  		if pb.ciMax >= 10 {
    92  			return errInvalidPattern
    93  		}
    94  		b, err = pb.next()
    95  		if err != nil {
    96  			return err
    97  		}
    98  		if b != ')' {
    99  			// Special case: empty capture will generate a position. So we only
   100  			// emit a ptnStartCapture and skip the ptnEndCapture.  The pattern
   101  			// matcher will then create a capture whose end is -1.
   102  			pb.back()
   103  			pb.cStack = append(pb.cStack, pb.ciMax)
   104  		}
   105  		pb.emit(patternItem{byteSet{pb.ciMax}, ptnStartCapture})
   106  		return nil
   107  	case ')':
   108  		i := len(pb.cStack) - 1
   109  		if i < 0 {
   110  			return errInvalidPatternCapture
   111  		}
   112  		pb.emit(patternItem{byteSet{pb.cStack[i]}, ptnEndCapture})
   113  		pb.cStack = pb.cStack[:i]
   114  		return nil
   115  	case '%':
   116  		c, err := pb.next()
   117  		if err != nil {
   118  			return err
   119  		}
   120  		switch {
   121  		case c == 'f':
   122  			s, err := pb.getCharClass()
   123  			if err == nil {
   124  				pb.emit(patternItem{s, ptnFrontier})
   125  			}
   126  			return err
   127  		case c == 'b':
   128  			op, err := pb.next()
   129  			if err != nil {
   130  				return err
   131  			}
   132  			cl, err := pb.next()
   133  			if err != nil {
   134  				return err
   135  			}
   136  			// The doc says op and cl must be different, but the 5.3.4
   137  			// implementation allows them to be equal.
   138  			// if op == cl {
   139  			// 	return errInvalidPattern
   140  			// }
   141  			pb.emit(patternItem{[4]uint64{uint64(op), uint64(cl)}, ptnBalanced})
   142  			return nil
   143  		case c >= '1' && c <= '9':
   144  			ci := uint64(c - '0')
   145  			if !pb.checkCapture(ci) {
   146  				return ErrInvalidCaptureIdx(int(ci))
   147  			}
   148  			pb.emit(patternItem{[4]uint64{ci}, ptnCapture})
   149  			return nil
   150  		default:
   151  			s, err = getCharRange(c)
   152  			if err != nil {
   153  				return err
   154  			}
   155  		}
   156  	default:
   157  		pb.back()
   158  		s, err = pb.getCharClass()
   159  	}
   160  	if err != nil {
   161  		return err
   162  	}
   163  	b, err = pb.next()
   164  	ptnType := ptnOnce
   165  	if err == nil {
   166  		switch b {
   167  		case '*':
   168  			ptnType = ptnGreedyRepeat
   169  		case '+':
   170  			ptnType = ptnGreedyRepeatOnce
   171  		case '-':
   172  			ptnType = ptnRepeat
   173  		case '?':
   174  			ptnType = ptnOptional
   175  		default:
   176  			pb.back()
   177  		}
   178  	}
   179  	pb.emit(patternItem{s, ptnType})
   180  	return nil
   181  }
   182  
   183  func (pb *patternBuilder) checkCapture(ci uint64) bool {
   184  	if ci > pb.ciMax {
   185  		return false
   186  	}
   187  	for _, sci := range pb.cStack {
   188  		if sci == ci {
   189  			return false
   190  		}
   191  	}
   192  	return true
   193  }
   194  
   195  func (pb *patternBuilder) getCharClass() (byteSet, error) {
   196  	b, err := pb.next()
   197  	if err != nil {
   198  		return byteSet{}, err
   199  	}
   200  	switch b {
   201  	case '.':
   202  		return fullSet, nil
   203  	case '%':
   204  		b, err := pb.next()
   205  		if err != nil {
   206  			return byteSet{}, err
   207  		}
   208  		return getCharRange(b)
   209  	case '[':
   210  		return pb.getUnion()
   211  	default:
   212  		s := byteSet{}
   213  		s.add(b)
   214  		return s, nil
   215  	}
   216  }
   217  
   218  func (pb *patternBuilder) getUnion() (s byteSet, err error) {
   219  	var b byte
   220  	b, err = pb.next()
   221  	neg := false
   222  	// Note: no need to check err if b is not 0
   223  	if b == '^' {
   224  		neg = true
   225  		b, err = pb.next()
   226  	}
   227  	if b == ']' {
   228  		s.add(b)
   229  		b, err = pb.next()
   230  	}
   231  	var r byteSet
   232  Loop:
   233  	for err == nil {
   234  		switch {
   235  		case b == ']':
   236  			if neg {
   237  				s.complement()
   238  			}
   239  			return
   240  		case b == '%':
   241  			b, err = pb.next()
   242  			if err != nil {
   243  				return
   244  			}
   245  			r, err = getCharRange(b)
   246  			if err != nil {
   247  				return
   248  			}
   249  			s.merge(r)
   250  		default:
   251  			c := b
   252  			b, err = pb.next()
   253  			if err != nil {
   254  				return
   255  			}
   256  			if b == '-' {
   257  				b, err = pb.next()
   258  				if err != nil {
   259  					return
   260  				}
   261  				if b == ']' {
   262  					s.add(c)
   263  					s.add('-')
   264  					continue Loop
   265  				}
   266  				s.merge(byteRange(c, b))
   267  			} else {
   268  				s.add(c)
   269  				continue Loop
   270  			}
   271  		}
   272  		b, err = pb.next()
   273  	}
   274  	return
   275  }
   276  
   277  func getCharRange(c byte) (byteSet, error) {
   278  	s, ok := namedByteSet[c]
   279  	if !ok {
   280  		switch {
   281  		case c == '0':
   282  			return s, ErrInvalidCaptureIdx(0)
   283  		case (c >= '1' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'):
   284  			return s, ErrInvalidPct
   285  		default:
   286  			s.add(c)
   287  		}
   288  	}
   289  	return s, nil
   290  }
   291  
   292  var ErrInvalidPct = errors.New("invalid use of '%'")
   293  
   294  func ErrInvalidCaptureIdx(i int) error {
   295  	return fmt.Errorf("invalid capture index %%%d", i)
   296  }