github.com/arnodel/golua@v0.0.0-20230215163904-e0b5347eaaa1/lib/stringlib/pattern/matcher.go (about) 1 package pattern 2 3 type patternMatcher struct { 4 Pattern 5 s string // string to match 6 captures [10]Capture 7 si int // current index in s (string to match) 8 ci int 9 pi int // current index in pattern 10 trackbacks []trackback 11 12 budget uint64 13 } 14 15 type trackback struct { 16 si, ci, pi int 17 siMin int 18 } 19 20 func (m *patternMatcher) reset(si int) { 21 m.trackbacks = nil 22 m.si = si 23 m.ci = 0 24 m.pi = 0 25 } 26 27 func (m *patternMatcher) find() []Capture { 28 for si := m.si; si <= len(m.s); si++ { 29 m.reset(si) 30 if captures := m.matchToEnd(); captures != nil { 31 return captures 32 } 33 } 34 return nil 35 } 36 37 func (m *patternMatcher) findFromStart() []Capture { 38 if m.startAnchor { 39 return m.matchToEnd() 40 } 41 return m.find() 42 } 43 44 func (m *patternMatcher) matchToEnd() []Capture { 45 m.captures[0].start = m.si 46 for { 47 m.match() 48 if m.si == -1 { 49 return nil 50 } 51 if !m.endAnchor || m.si == len(m.s) { 52 m.captures[0].end = m.si 53 return m.captures[:m.captureCount+1] 54 } 55 m.trackback() 56 } 57 } 58 59 func (m *patternMatcher) match() { 60 for m.pi < len(m.items) { 61 switch item := m.items[m.pi]; item.ptnType { 62 case ptnOnce: 63 if !m.matchNext(item.bytes) { 64 m.trackback() 65 } else { 66 m.pi++ 67 } 68 case ptnGreedyRepeat: 69 si := m.si 70 for m.matchNext(item.bytes) { 71 } 72 m.pi++ 73 if si < m.si { 74 m.addTrackback(si) 75 } 76 case ptnGreedyRepeatOnce: 77 if !m.matchNext(item.bytes) { 78 m.trackback() 79 } else { 80 si := m.si 81 for m.matchNext(item.bytes) { 82 } 83 m.pi++ 84 if si < m.si { 85 m.addTrackback(si) 86 } 87 } 88 case ptnRepeat: 89 if m.matchNext(item.bytes) { 90 m.addTrackback(m.si) 91 m.si-- 92 } 93 m.pi++ 94 case ptnOptional: 95 si := m.si 96 m.pi++ 97 if m.matchNext(item.bytes) { 98 m.addTrackback(si) 99 } 100 case ptnCapture: 101 c := m.captures[item.bytes[0]] 102 end := m.si + c.end - c.start 103 if end <= len(m.s) && m.s[c.start:c.end] == m.s[m.si:end] { 104 m.si = end 105 m.pi++ 106 } else { 107 m.trackback() 108 } 109 case ptnBalanced: 110 op := byte(item.bytes[0]) 111 if b, ok := m.getNext(); !ok || b != op { 112 m.trackback() 113 } else { 114 cl := byte(item.bytes[1]) 115 depth := 1 116 BLoop: 117 for { 118 b, ok := m.getNext() 119 if !ok { 120 m.trackback() 121 break BLoop 122 } 123 switch b { 124 case cl: 125 depth-- 126 if depth == 0 { 127 m.pi++ 128 break BLoop 129 } 130 case op: 131 depth++ 132 } 133 } 134 } 135 case ptnFrontier: 136 var p, n byte 137 if m.si > 0 { 138 p = m.s[m.si-1] 139 } 140 if m.si < len(m.s) { 141 n = m.s[m.si] 142 } 143 s := item.bytes 144 if s.contains(p) || !s.contains(n) { 145 m.trackback() 146 } else { 147 m.pi++ 148 } 149 case ptnStartCapture: 150 // The end of the capture is set to -1. If this is an empty 151 // capture, no ptnEndCapture item was emitted so the end will remain 152 // -1. 153 m.captures[item.bytes[0]] = Capture{m.si, -1} 154 m.pi++ 155 case ptnEndCapture: 156 m.captures[item.bytes[0]].end = m.si 157 m.pi++ 158 default: 159 panic("???") 160 } 161 } 162 } 163 164 func (m *patternMatcher) matchNext(s byteSet) bool { 165 match := m.si < len(m.s) && s.contains(m.s[m.si]) 166 if match { 167 m.si++ 168 m.consumeBudget() 169 } 170 return match 171 } 172 173 func (m *patternMatcher) getNext() (b byte, ok bool) { 174 ok = m.si < len(m.s) 175 if ok { 176 b = m.s[m.si] 177 m.si++ 178 m.consumeBudget() 179 } 180 return 181 } 182 183 func (m *patternMatcher) trackback() { 184 i := len(m.trackbacks) - 1 185 if i < 0 { 186 m.pi = len(m.items) 187 m.si = -1 188 } else { 189 t := &m.trackbacks[i] 190 m.si = t.si 191 m.pi = t.pi 192 m.ci = t.ci 193 if t.si > t.siMin { 194 t.si-- 195 } else { 196 m.trackbacks = m.trackbacks[:i] 197 } 198 } 199 } 200 201 func (m *patternMatcher) addTrackback(siMin int) { 202 m.trackbacks = append(m.trackbacks, trackback{m.si, m.ci, m.pi, siMin}) 203 } 204 205 func (m *patternMatcher) consumeBudget() { 206 if m.budget == 0 { 207 return 208 } 209 m.budget-- 210 if m.budget == 0 { 211 panic(budgetConsumed) 212 } 213 } 214 215 var budgetConsumed interface{} = "budget consumed"