github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/lib/stringlib/pattern/matcher.go (about)

     1  package pattern
     2  
     3  type patternMatcher struct {
     4  	Pattern
     5  	s          string // string to match
     6  	captures   [10]Capture
     7  	si         int // current index in s (string to match)
     8  	ci         int
     9  	pi         int // current index in pattern
    10  	trackbacks []trackback
    11  
    12  	budget uint64
    13  }
    14  
    15  type trackback struct {
    16  	si, ci, pi int
    17  	siMin      int
    18  }
    19  
    20  func (m *patternMatcher) reset(si int) {
    21  	m.trackbacks = nil
    22  	m.si = si
    23  	m.ci = 0
    24  	m.pi = 0
    25  }
    26  
    27  func (m *patternMatcher) find() []Capture {
    28  	for si := m.si; si <= len(m.s); si++ {
    29  		m.reset(si)
    30  		if captures := m.matchToEnd(); captures != nil {
    31  			return captures
    32  		}
    33  	}
    34  	return nil
    35  }
    36  
    37  func (m *patternMatcher) findFromStart() []Capture {
    38  	if m.startAnchor {
    39  		return m.matchToEnd()
    40  	}
    41  	return m.find()
    42  }
    43  
    44  func (m *patternMatcher) matchToEnd() []Capture {
    45  	m.captures[0].start = m.si
    46  	for {
    47  		m.match()
    48  		if m.si == -1 {
    49  			return nil
    50  		}
    51  		if !m.endAnchor || m.si == len(m.s) {
    52  			m.captures[0].end = m.si
    53  			return m.captures[:m.captureCount+1]
    54  		}
    55  		m.trackback()
    56  	}
    57  }
    58  
    59  func (m *patternMatcher) match() {
    60  	for m.pi < len(m.items) {
    61  		switch item := m.items[m.pi]; item.ptnType {
    62  		case ptnOnce:
    63  			if !m.matchNext(item.bytes) {
    64  				m.trackback()
    65  			} else {
    66  				m.pi++
    67  			}
    68  		case ptnGreedyRepeat:
    69  			si := m.si
    70  			for m.matchNext(item.bytes) {
    71  			}
    72  			m.pi++
    73  			if si < m.si {
    74  				m.addTrackback(si)
    75  			}
    76  		case ptnGreedyRepeatOnce:
    77  			if !m.matchNext(item.bytes) {
    78  				m.trackback()
    79  			} else {
    80  				si := m.si
    81  				for m.matchNext(item.bytes) {
    82  				}
    83  				m.pi++
    84  				if si < m.si {
    85  					m.addTrackback(si)
    86  				}
    87  			}
    88  		case ptnRepeat:
    89  			if m.matchNext(item.bytes) {
    90  				m.addTrackback(m.si)
    91  				m.si--
    92  			}
    93  			m.pi++
    94  		case ptnOptional:
    95  			si := m.si
    96  			m.pi++
    97  			if m.matchNext(item.bytes) {
    98  				m.addTrackback(si)
    99  			}
   100  		case ptnCapture:
   101  			c := m.captures[item.bytes[0]]
   102  			end := m.si + c.end - c.start
   103  			if end <= len(m.s) && m.s[c.start:c.end] == m.s[m.si:end] {
   104  				m.si = end
   105  				m.pi++
   106  			} else {
   107  				m.trackback()
   108  			}
   109  		case ptnBalanced:
   110  			op := byte(item.bytes[0])
   111  			if b, ok := m.getNext(); !ok || b != op {
   112  				m.trackback()
   113  			} else {
   114  				cl := byte(item.bytes[1])
   115  				depth := 1
   116  			BLoop:
   117  				for {
   118  					b, ok := m.getNext()
   119  					if !ok {
   120  						m.trackback()
   121  						break BLoop
   122  					}
   123  					switch b {
   124  					case cl:
   125  						depth--
   126  						if depth == 0 {
   127  							m.pi++
   128  							break BLoop
   129  						}
   130  					case op:
   131  						depth++
   132  					}
   133  				}
   134  			}
   135  		case ptnFrontier:
   136  			var p, n byte
   137  			if m.si > 0 {
   138  				p = m.s[m.si-1]
   139  			}
   140  			if m.si < len(m.s) {
   141  				n = m.s[m.si]
   142  			}
   143  			s := item.bytes
   144  			if s.contains(p) || !s.contains(n) {
   145  				m.trackback()
   146  			} else {
   147  				m.pi++
   148  			}
   149  		case ptnStartCapture:
   150  			// The end of the capture is set to -1.  If this is an empty
   151  			// capture, no ptnEndCapture item was emitted so the end will remain
   152  			// -1.
   153  			m.captures[item.bytes[0]] = Capture{m.si, -1}
   154  			m.pi++
   155  		case ptnEndCapture:
   156  			m.captures[item.bytes[0]].end = m.si
   157  			m.pi++
   158  		default:
   159  			panic("???")
   160  		}
   161  	}
   162  }
   163  
   164  func (m *patternMatcher) matchNext(s byteSet) bool {
   165  	match := m.si < len(m.s) && s.contains(m.s[m.si])
   166  	if match {
   167  		m.si++
   168  		m.consumeBudget()
   169  	}
   170  	return match
   171  }
   172  
   173  func (m *patternMatcher) getNext() (b byte, ok bool) {
   174  	ok = m.si < len(m.s)
   175  	if ok {
   176  		b = m.s[m.si]
   177  		m.si++
   178  		m.consumeBudget()
   179  	}
   180  	return
   181  }
   182  
   183  func (m *patternMatcher) trackback() {
   184  	i := len(m.trackbacks) - 1
   185  	if i < 0 {
   186  		m.pi = len(m.items)
   187  		m.si = -1
   188  	} else {
   189  		t := &m.trackbacks[i]
   190  		m.si = t.si
   191  		m.pi = t.pi
   192  		m.ci = t.ci
   193  		if t.si > t.siMin {
   194  			t.si--
   195  		} else {
   196  			m.trackbacks = m.trackbacks[:i]
   197  		}
   198  	}
   199  }
   200  
   201  func (m *patternMatcher) addTrackback(siMin int) {
   202  	m.trackbacks = append(m.trackbacks, trackback{m.si, m.ci, m.pi, siMin})
   203  }
   204  
   205  func (m *patternMatcher) consumeBudget() {
   206  	if m.budget == 0 {
   207  		return
   208  	}
   209  	m.budget--
   210  	if m.budget == 0 {
   211  		panic(budgetConsumed)
   212  	}
   213  }
   214  
   215  var budgetConsumed interface{} = "budget consumed"