github.com/wbrown/gpt_bpe@v0.0.0-20250709161131-1571a6e8ad2d/runetree.go (about)

     1  package gpt_bpe
     2  
     3  import (
     4  	"regexp/syntax"
     5  	"strings"
     6  	"unicode"
     7  )
     8  
     9  type RuneNode struct {
    10  	rune              rune               // The rune this node represents.
    11  	runes             []rune             // The prior runes that led to this node.
    12  	terminal          bool               // If this node is an absolute terminal node.
    13  	replacement       *[]rune            // The replacement runes for this node.
    14  	childs            map[rune]*RuneNode // The child nodes.
    15  	childsArr         *[]*RuneNode       // The child nodes in an array, for precedence
    16  	isPrefix          bool               // Whether this node is a valid prefix match
    17  	isContractionTree bool               // Whether this node is a contraction tree
    18  }
    19  
    20  type RuneNodes []*RuneNode
    21  
    22  func runeIsIn(r rune, runes []rune) bool {
    23  	for _, rr := range runes {
    24  		if r == rr {
    25  			return true
    26  		}
    27  	}
    28  	return false
    29  }
    30  
    31  func (nodes *RuneNodes) evaluate(r rune) *RuneNode {
    32  	var idx int
    33  	var candidate *RuneNode
    34  	for idx, candidate = range *nodes {
    35  
    36  		var isContraction bool
    37  		if candidate.isContractionTree {
    38  			isContraction = true
    39  		}
    40  		candidate = candidate.evaluate(r)
    41  		// ' is not a contraction but 's is,
    42  		// so we don't care about nils if we're in a contraction tree
    43  		if candidate == nil && isContraction {
    44  			continue
    45  		}
    46  
    47  		(*nodes)[idx] = candidate
    48  
    49  		if candidate != nil && (candidate.terminal || candidate.
    50  			replacement != nil) {
    51  			break
    52  		}
    53  	}
    54  	// Clean out any nodes that are no longer valid.
    55  	for idx = 0; idx < len(*nodes); idx++ {
    56  		if idx >= len(*nodes) {
    57  			break
    58  		}
    59  		if (*nodes)[idx] == nil {
    60  			*nodes = append((*nodes)[:idx], (*nodes)[idx+1:]...)
    61  			idx--
    62  		}
    63  	}
    64  	return candidate
    65  }
    66  
    67  func (node *RuneNode) evaluate(r rune) *RuneNode {
    68  	// If the node has an array of children, use that. The array exists if the
    69  	// node has less than 10 children, and is used to speed up the evaluation
    70  	// of the node.
    71  	if node.childsArr != nil {
    72  		children := *node.childsArr
    73  		for _, child := range children {
    74  			if child.rune == r {
    75  				return child
    76  			}
    77  		}
    78  	} else {
    79  		child, ok := node.childs[r]
    80  		if ok {
    81  			return child
    82  		}
    83  	}
    84  	return nil
    85  }
    86  
    87  // Represent the tree as a string by traversing the tree, and using tree
    88  // characters to represent the tree structure.
    89  func (node *RuneNode) string(level int, sb *strings.Builder) {
    90  	if node == nil {
    91  		return
    92  	}
    93  	sb.WriteRune(node.rune)
    94  	idx := 0
    95  	if len(node.childs) == 1 {
    96  		// Get the only element from the map recursively until we find a node
    97  		// with more than one child.
    98  		for r := range node.childs {
    99  			node.childs[r].string(level, sb)
   100  		}
   101  		return
   102  	}
   103  	level += 1
   104  	if node.replacement != nil {
   105  		sb.WriteString(" -> ")
   106  		sb.WriteString(string(*node.replacement))
   107  	}
   108  	sb.WriteByte('\n')
   109  
   110  	for r := range node.childs {
   111  		sb.WriteString(strings.Repeat("| ", level-1))
   112  		// If we're the last child, then we prepend with a tree terminator.
   113  		if idx == len(node.childs)-1 {
   114  			sb.WriteString("└─")
   115  		} else {
   116  			sb.WriteString("├─")
   117  		}
   118  		node.childs[r].string(level, sb)
   119  		idx += 1
   120  	}
   121  }
   122  
   123  // Wrapper
   124  func (runeTree *RuneNode) String() string {
   125  	sb := strings.Builder{}
   126  	runeTree.string(0, &sb)
   127  	return sb.String()
   128  }
   129  
   130  func (runeTree *RuneNode) insertRunes(runes []rune) (node *RuneNode) {
   131  	node = runeTree
   132  	keyLen := len(runes)
   133  	for i := 0; i < keyLen; i++ {
   134  		r := runes[i]
   135  		childNode, ok := node.childs[r]
   136  		if !ok {
   137  			children := make([]*RuneNode, 0)
   138  			node.childs[r] = &RuneNode{
   139  				rune:              r,
   140  				runes:             runes[:i+1],
   141  				terminal:          i == keyLen-1,
   142  				childs:            make(map[rune]*RuneNode, 0),
   143  				childsArr:         &children,
   144  				isContractionTree: node.isContractionTree,
   145  			}
   146  		} else if i == keyLen-1 {
   147  			childNode.terminal = true
   148  		}
   149  		if len(node.childs) > 10 {
   150  			// If there are more than 10 children, we set the array pointer
   151  			// to nil, so that we can use the map instead.
   152  			node.childsArr = nil
   153  		} else {
   154  			if node.childsArr == nil {
   155  				children := make([]*RuneNode, 0)
   156  				node.childsArr = &children
   157  			}
   158  			if len(node.childs) != len(*node.childsArr) {
   159  				*node.childsArr = append(*node.childsArr, node.childs[r])
   160  			}
   161  		}
   162  		node = node.childs[r]
   163  	}
   164  	return node
   165  }
   166  
   167  func NewRuneTree() *RuneNode {
   168  	return &RuneNode{
   169  		runes:  []rune{},
   170  		childs: make(map[rune]*RuneNode, 0),
   171  	}
   172  }
   173  
   174  // ContractionsTree creates a specialized RuneTree for handling contractions
   175  func CreateContractionsTree() *RuneNode {
   176  	tree := NewRuneTree()
   177  	contractions := []string{
   178  		"'s", "'t", "'re", "'ve", "'m", "'ll", "'d",
   179  	}
   180  	// Insert each contraction into the tree
   181  	for _, c := range contractions {
   182  		tree.insertRunes([]rune(c))
   183  	}
   184  	tree.isContractionTree = true
   185  	return tree
   186  }
   187  
   188  func (runeTree *RuneNode) InsertReplacementsIntoRuneTree(
   189  	replacements map[string]string,
   190  ) {
   191  	for k, v := range replacements {
   192  		keyRunes := []rune(k)
   193  		valueRunes := []rune(v)
   194  		keyNode := runeTree.insertRunes(keyRunes)
   195  		keyNode.replacement = &valueRunes
   196  	}
   197  }
   198  
   199  func CreateReplacementsRuneTree(replacements map[string]string) *RuneNode {
   200  	runeTree := NewRuneTree()
   201  	runeTree.isContractionTree = false
   202  	runeTree.InsertReplacementsIntoRuneTree(replacements)
   203  	return runeTree
   204  }
   205  
   206  func (runeTree *RuneNode) InsertIntoRuneTree(s []string) {
   207  	for _, k := range s {
   208  		keyRunes := []rune(k)
   209  		runeTree.insertRunes(keyRunes)
   210  	}
   211  }
   212  
   213  // Create a new rune tree from an array of strings to match against.
   214  func CreateRuneTree(s []string) *RuneNode {
   215  	runeTree := NewRuneTree()
   216  	runeTree.isContractionTree = false
   217  	runeTree.InsertIntoRuneTree(s)
   218  	return runeTree
   219  }
   220  
   221  type rangeTuple struct {
   222  	start int
   223  	end   int
   224  }
   225  
   226  // The AST is given as a []rune where every two runes are the start and end of a range
   227  // We want to convert this to a list of rangeTuples for easier handling
   228  func ArrayAsRanges(runes []rune) []rangeTuple {
   229  	// [65 90 97 122 170 170 181 181 186 186 192 214 216 246 248 705 ...
   230  	// All are pairs of 2, start and end of a range, print as X-Y
   231  	ranges := make([]rangeTuple, 0)
   232  	for i := 0; i < len(runes); i += 2 {
   233  		ranges = append(ranges, rangeTuple{start: int(runes[i]), end: int(runes[i+1])})
   234  	}
   235  	return ranges
   236  }
   237  
   238  // We will need to populate a lookup table for the ranges
   239  // Once per node. Use binary search to find the rune in the ranges
   240  func populateCharRanges(i int, ranges []rangeTuple) bool {
   241  	// Binary search
   242  	low, high := 0, len(ranges)-1
   243  	for low <= high {
   244  		mid := low + (high-low)/2
   245  		if ranges[mid].start <= i && i <= ranges[mid].end {
   246  			return true
   247  		}
   248  		if i < ranges[mid].start {
   249  			high = mid - 1
   250  		} else {
   251  			low = mid + 1
   252  		}
   253  	}
   254  	// If we didn't find the rune in the ranges, return false
   255  	return false
   256  }
   257  
   258  type RangeLUT struct {
   259  	lookup []bool
   260  }
   261  
   262  func newRangeLUT(ranges []rangeTuple) *RangeLUT {
   263  	maxLutSize := ranges[len(ranges)-1].end + 1
   264  	lut := &RangeLUT{
   265  		lookup: make([]bool, maxLutSize),
   266  	}
   267  	for i := 0; i < len(lut.lookup); i++ {
   268  		lut.lookup[i] = populateCharRanges(i, ranges)
   269  	}
   270  	return lut
   271  }
   272  
   273  // Once we have done it once, we can now use a lookup table to find the rune in the ranges
   274  func containsCharInRange(r rune, lut *RangeLUT) bool {
   275  	if lut != nil && int(r) < len(lut.lookup) {
   276  		return lut.lookup[int(r)]
   277  	} else {
   278  		return false
   279  	}
   280  }
   281  
   282  // Nodes of the regex tree
   283  type RegexNode struct {
   284  	runeArray   []rune       // The runes this node represents, used in literals and char classes
   285  	parent      *RegexNode   // The parent node
   286  	children    []*RegexNode // The child nodes
   287  	min         int          // The min number of matches, set previously, used in literals and char classes
   288  	max         int          // The max number of matches, set previously, used in literals and char classes
   289  	flags       int          // Any flags set on the node, Unused for now
   290  	lastOp      string       // The operation of the node prior
   291  	thisOp      string       // The operation of the node
   292  	pathStrings []string     // The string representation of the path to this node
   293  	rangeLUT    *RangeLUT    // The lookup table for char classes
   294  }
   295  
   296  func CreateRegexTree(AST *syntax.Regexp) *RegexNode {
   297  	// Given a syntax.regexp assumed as the root, create a tree of RegexNodes
   298  	// We want the info nodes to inform the op nodes of their min/max, flags, and last op
   299  
   300  	// Create the root node
   301  	root := &RegexNode{
   302  		runeArray:   AST.Rune,
   303  		parent:      nil,
   304  		children:    make([]*RegexNode, 0),
   305  		min:         AST.Min,
   306  		max:         AST.Max,
   307  		flags:       int(AST.Flags),
   308  		lastOp:      AST.Op.String(),
   309  		thisOp:      AST.Op.String(),
   310  		pathStrings: make([]string, 0),
   311  	}
   312  	root.parent = root
   313  	root.pathStrings = append(root.pathStrings, "(root)")
   314  
   315  	// Create the tree
   316  	ASTPath := make([]string, 0)
   317  	ASTPath = append(ASTPath, "(root)")
   318  	root.createTree(AST, ASTPath)
   319  
   320  	return root
   321  }
   322  
   323  func (rn *RegexNode) createTree(AST *syntax.Regexp, ASTPath []string) {
   324  	// Create the tree
   325  	lastOp := AST.Op.String()
   326  	ASTPath = append(ASTPath, lastOp)
   327  
   328  	for _, sub := range AST.Sub {
   329  		// Create a new node
   330  		newNode := &RegexNode{
   331  			runeArray:   sub.Rune,
   332  			parent:      rn,
   333  			children:    make([]*RegexNode, 0),
   334  			min:         sub.Min,
   335  			max:         sub.Max,
   336  			flags:       int(sub.Flags),
   337  			lastOp:      lastOp,
   338  			thisOp:      sub.Op.String(),
   339  			pathStrings: ASTPath,
   340  		}
   341  		if len(sub.Sub) > 0 {
   342  			newNode.createTree(sub, ASTPath)
   343  		}
   344  		rn.children = append(rn.children, newNode)
   345  	}
   346  }
   347  
   348  // We need a path map to know where we are in the tree
   349  func (rn *RegexNode) GeneratePathMap() [][]int {
   350  	var pathMap [][]int
   351  	generatePathMap(rn, 0, []int{}, &pathMap)
   352  	return pathMap
   353  }
   354  
   355  func generatePathMap(
   356  	rn *RegexNode,
   357  	parentIndex int,
   358  	currentPath []int,
   359  	pathMap *[][]int,
   360  ) {
   361  	// Generate a map of the tree with dfs
   362  	currentPath = append(currentPath, parentIndex)
   363  
   364  	// If not already in the map, add the current path
   365  	pathCopy := make([]int, len(currentPath))
   366  	copy(pathCopy, currentPath)
   367  	*pathMap = append(*pathMap, pathCopy)
   368  	for idx, child := range rn.children {
   369  		generatePathMap(child, idx, currentPath, pathMap)
   370  	}
   371  
   372  }
   373  
   374  func (rn *RegexNode) String() string {
   375  	// Print the tree
   376  	sb := strings.Builder{}
   377  	rn.string(0, &sb)
   378  	return sb.String()
   379  }
   380  
   381  func (rn *RegexNode) string(level int, sb *strings.Builder) {
   382  	if rn == nil {
   383  		return
   384  	}
   385  	if len(rn.runeArray) > 50 {
   386  		sb.WriteString(string(rn.runeArray[:50]))
   387  	} else {
   388  		sb.WriteString(string(rn.runeArray))
   389  	}
   390  	idx := 0
   391  	if len(rn.children) == 1 {
   392  		// Get the only element from the map recursively until we find a node
   393  		// with more than one child.
   394  		for r := range rn.children {
   395  			rn.children[r].string(level, sb)
   396  		}
   397  		return
   398  	}
   399  	level += 1
   400  	sb.WriteString(" -> ")
   401  	sb.WriteString(rn.lastOp)
   402  	sb.WriteByte('\n')
   403  
   404  	for r := range rn.children {
   405  		sb.WriteString(strings.Repeat("| ", level-1))
   406  		// If we're the last child, then we prepend with a tree terminator.
   407  		if idx == len(rn.children)-1 {
   408  			sb.WriteString("└─")
   409  		} else {
   410  			sb.WriteString("├─")
   411  		}
   412  		rn.children[r].string(level, sb)
   413  		idx += 1
   414  	}
   415  }
   416  
   417  // Variables saved during and between traversals
   418  type matchVariables struct {
   419  	matchedWords                []string   // The words that have been matched
   420  	subjectRuneArrIndex         int        // The index of the last rune matched
   421  	subjectRuneCandidateIndices []int      // The indices of the runes that are candidates for matching
   422  	currentNodeIdx              int        // The index of the current node in the path map
   423  	pathMap                     [][]int    // The path map of the tree
   424  	ParentOp                    string     // The operation of the parent node from where we are
   425  	minGroupSize                int        // The minimum number of runes that must be matched
   426  	maxGroupSize                int        // The maximum number of runes that can be matched
   427  	candidateRunes              []rune     // The runes that are candidates for matching
   428  	skipUntilNum                int        // The number of nodes to skip until the next node that isn't a child of the current node
   429  	rootNode                    *RegexNode // The root node of the tree
   430  	endEval                     bool       // Whether we should end the evaluation
   431  	lastInfoOpLevel             int        // The level of the last info op, used for resetting group sizes
   432  	parentMatched               bool       // The direct parent of the current node has at least one match
   433  }
   434  
   435  // We want to take a string and use pre-order traversal to match the string to the tree, in a regex-like fashion
   436  // This is much faster than using the regex package.
   437  // The input is a pathmap generate from the regex tree, and the runes to match
   438  // The output is a list of strings that have been matched
   439  func (rn *RegexNode) EvaluateRegexTree(runes []rune, pathMap [][]int) []string {
   440  	// Init variables
   441  	var matchVars matchVariables
   442  	matchVars.matchedWords = make([]string, 0)
   443  	matchVars.subjectRuneArrIndex = 0
   444  	matchVars.currentNodeIdx = 0
   445  	matchVars.minGroupSize = 1
   446  	matchVars.maxGroupSize = -1
   447  	matchVars.candidateRunes = make([]rune, 0, 64)
   448  	matchVars.subjectRuneCandidateIndices = []int{0}
   449  	matchVars.pathMap = pathMap
   450  	matchVars.rootNode = rn
   451  	matchVars.endEval = false
   452  	matchVars.lastInfoOpLevel = 1
   453  
   454  	// Start the traversal
   455  	for {
   456  		rn.traverseRegexTree(runes, &matchVars, 0)
   457  		if matchVars.subjectRuneArrIndex >= len(runes) {
   458  			break
   459  		}
   460  		// Reset for next round
   461  		matchVars.currentNodeIdx = 0
   462  		matchVars.minGroupSize = 1
   463  		matchVars.maxGroupSize = -1
   464  		matchVars.candidateRunes = matchVars.candidateRunes[:0]
   465  		matchVars.subjectRuneCandidateIndices[0] = matchVars.subjectRuneArrIndex
   466  		matchVars.subjectRuneCandidateIndices = matchVars.subjectRuneCandidateIndices[:1]
   467  		matchVars.skipUntilNum = 0
   468  		matchVars.endEval = false
   469  		matchVars.lastInfoOpLevel = 1
   470  	}
   471  
   472  	return matchVars.matchedWords
   473  }
   474  
   475  // The recursive function that traverses the tree
   476  func (rn *RegexNode) traverseRegexTree(
   477  	runes []rune,
   478  	matchVars *matchVariables,
   479  	level int,
   480  ) {
   481  	// Pre-order traversal of the tree
   482  	if matchVars.endEval {
   483  		return
   484  	}
   485  	level += 1
   486  	thisNodeMap := matchVars.pathMap[matchVars.currentNodeIdx]
   487  	lastNodeMap := make([]int, 0)
   488  	if matchVars.currentNodeIdx > 0 {
   489  		lastNodeMap = matchVars.pathMap[matchVars.currentNodeIdx-1]
   490  	}
   491  	thisNodeRuneIdx := -1
   492  	thisNodeRuneParentIdx := 0
   493  
   494  	// Check if we are at the branch root and have a accumulated split
   495  	if len(thisNodeMap) == 2 && len(matchVars.candidateRunes) != 0 {
   496  		strMatched := string(matchVars.candidateRunes)
   497  		matchVars.matchedWords = append(matchVars.matchedWords, strMatched)
   498  		matchVars.subjectRuneArrIndex += len(matchVars.candidateRunes)
   499  
   500  		// Finish Round
   501  		matchVars.endEval = true
   502  		return
   503  	} else if len(thisNodeMap) == 2 {
   504  		// Reset candidate indices if we are bach at the branch root
   505  		matchVars.subjectRuneCandidateIndices[0] = matchVars.subjectRuneArrIndex
   506  		matchVars.subjectRuneCandidateIndices = matchVars.subjectRuneCandidateIndices[:1]
   507  	} else if len(thisNodeMap) != len(lastNodeMap) && len(lastNodeMap) != 0 {
   508  		// We have either traversed up or down the tree
   509  		// Reset parent match variable
   510  		matchVars.parentMatched = false
   511  	}
   512  
   513  	// Evaluate the current node
   514  	if matchVars.skipUntilNum == 0 {
   515  		// if the index isn't of the right length, we append the index to the candidate indices
   516  		if len(matchVars.subjectRuneCandidateIndices) < len(thisNodeMap) {
   517  			candidateRuneArray := matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-1]
   518  			matchVars.subjectRuneCandidateIndices = append(
   519  				matchVars.subjectRuneCandidateIndices, candidateRuneArray,
   520  			)
   521  		} else {
   522  			// Trim to the right length
   523  			matchVars.subjectRuneCandidateIndices = matchVars.subjectRuneCandidateIndices[:len(thisNodeMap)]
   524  		}
   525  		thisNodeRuneIdx = matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-1]
   526  		if len(matchVars.subjectRuneCandidateIndices) > 1 {
   527  			thisNodeRuneParentIdx = matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-2]
   528  		}
   529  
   530  		switch rn.thisOp {
   531  		case "Alternate":
   532  			// Nothing needs to happen if we have these nodes here
   533  		case "Concat":
   534  			// Nothing needs to happen if we have these nodes here
   535  		case "Quest":
   536  			// Set minmax for the next nodes
   537  			matchVars.minGroupSize = 0
   538  			matchVars.maxGroupSize = 1
   539  			matchVars.lastInfoOpLevel = level
   540  		case "Plus":
   541  			// Set minmax for the next nodes
   542  			matchVars.minGroupSize = 1
   543  			matchVars.maxGroupSize = -1
   544  			matchVars.lastInfoOpLevel = level
   545  		case "Repeat":
   546  			// Set minmax for the next nodes
   547  			matchVars.minGroupSize = rn.min
   548  			matchVars.maxGroupSize = rn.max
   549  			matchVars.lastInfoOpLevel = level
   550  		case "Star":
   551  			// Set minmax for the next nodes
   552  			matchVars.minGroupSize = 0
   553  			matchVars.maxGroupSize = -1
   554  			matchVars.lastInfoOpLevel = level
   555  		case "Literal":
   556  			// Evaluate the literal
   557  			caseInsensitiveFlag := false
   558  			if rn.flags&int(syntax.FoldCase) != 0 {
   559  				caseInsensitiveFlag = true
   560  			}
   561  			matches := 0
   562  			matchArr := make([]rune, 0)
   563  			for i := 0; i < len(rn.runeArray); i++ {
   564  				if thisNodeRuneIdx+i < len(runes) {
   565  					if rn.runeArray[i] == runes[thisNodeRuneIdx+i] {
   566  						matches += 1
   567  						matchArr = append(matchArr, runes[thisNodeRuneIdx+i])
   568  					} else {
   569  						if caseInsensitiveFlag && unicode.IsLetter(rn.runeArray[i]) && unicode.IsLetter(runes[thisNodeRuneIdx+i]) {
   570  							if rn.runeArray[i] == runes[thisNodeRuneIdx+i]+32 {
   571  								matches += 1
   572  								matchArr = append(matchArr, runes[thisNodeRuneIdx+i])
   573  							} else if rn.runeArray[i] == runes[thisNodeRuneIdx+i]-32 {
   574  								matches += 1
   575  								matchArr = append(matchArr, runes[thisNodeRuneIdx+i])
   576  							} else {
   577  								break
   578  							}
   579  						} else {
   580  							break
   581  
   582  						}
   583  					}
   584  				}
   585  			}
   586  
   587  			// If we are expecting a non-zero match, set the min group size
   588  			// to the length of the rune array (literal	length)
   589  			if matchVars.minGroupSize > 0 {
   590  				matchVars.minGroupSize = len(rn.runeArray)
   591  			}
   592  
   593  			// Matches must be at least min group but can exceed max, will be cut off.
   594  			if matchVars.minGroupSize == -1 || matches >= matchVars.minGroupSize {
   595  				if matchVars.maxGroupSize == -1 || matches <= matchVars.maxGroupSize {
   596  					// Matched
   597  					matchVars.parentMatched = true
   598  					if matches != 0 {
   599  						matchVars.candidateRunes = append(
   600  							matchVars.candidateRunes, matchArr...,
   601  						)
   602  						thisNodeRuneIdx += matches
   603  					}
   604  				} else if matches > matchVars.maxGroupSize {
   605  					// Matched, but exceeded max
   606  					// set matches to max
   607  					matches = matchVars.maxGroupSize
   608  					if len(matchArr) > matches {
   609  						matchArr = matchArr[:matches]
   610  					}
   611  					matchVars.candidateRunes = append(
   612  						matchVars.candidateRunes, matchArr...,
   613  					)
   614  					thisNodeRuneIdx += matches
   615  					matchVars.parentMatched = true
   616  				} else {
   617  					// Not matched
   618  					// If the parent is a concat, this is an AND statement, we should skip sibings
   619  					hasConcatParent := false
   620  					for _, path := range rn.pathStrings {
   621  						if path == "Concat" {
   622  							hasConcatParent = true
   623  							break
   624  						}
   625  					}
   626  
   627  					// Calculate skip length here
   628  					if hasConcatParent {
   629  						matchVars.skipUntilNum = calcSkipLength(
   630  							matchVars.pathMap, matchVars.currentNodeIdx, true,
   631  						)
   632  						matchVars.candidateRunes = matchVars.candidateRunes[:0]
   633  						// pop one idx
   634  						matchVars.subjectRuneCandidateIndices = matchVars.subjectRuneCandidateIndices[:len(matchVars.subjectRuneCandidateIndices)-1]
   635  					} else {
   636  						matchVars.skipUntilNum = calcSkipLength(
   637  							matchVars.pathMap, matchVars.currentNodeIdx, false,
   638  						)
   639  						// Reset one idx
   640  						matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-1] = thisNodeRuneParentIdx
   641  						thisNodeRuneIdx = thisNodeRuneParentIdx
   642  					}
   643  				}
   644  			} else {
   645  				// Not matched
   646  				// If the parent is a concat, this is an AND statement, we should skip sibings
   647  				hasConcatParent := false
   648  				parentPtr := rn.parent
   649  				for {
   650  					if parentPtr == rn {
   651  						break
   652  					}
   653  					if parentPtr.thisOp == "Concat" {
   654  						hasConcatParent = true
   655  						break
   656  					} else if parentPtr.thisOp == "Alternate" {
   657  						break
   658  					} else {
   659  						parentPtr = parentPtr.parent
   660  					}
   661  
   662  				}
   663  				// If not matched, we don't care about evaluating the
   664  				// children of the current node (and potentially siblings)
   665  				if hasConcatParent {
   666  					matchVars.skipUntilNum = calcSkipLength(
   667  						matchVars.pathMap, matchVars.currentNodeIdx, true,
   668  					)
   669  					matchVars.candidateRunes = matchVars.candidateRunes[:0]
   670  					// pop one idx
   671  					matchVars.subjectRuneCandidateIndices = matchVars.subjectRuneCandidateIndices[:len(matchVars.subjectRuneCandidateIndices)-1]
   672  				} else {
   673  					matchVars.skipUntilNum = calcSkipLength(
   674  						matchVars.pathMap, matchVars.currentNodeIdx, false,
   675  					)
   676  					// Reset one idx
   677  					matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-1] = thisNodeRuneParentIdx
   678  					thisNodeRuneIdx = thisNodeRuneParentIdx
   679  				}
   680  			}
   681  		case "CharClass":
   682  			// Evaluate the char class
   683  			// We generate and use a LUT for the char class as an optimization over directly
   684  			// checking the ranges.
   685  			var lut *RangeLUT
   686  			if rn.rangeLUT == nil {
   687  				rangesArray := ArrayAsRanges(rn.runeArray)
   688  				rn.rangeLUT = newRangeLUT(rangesArray)
   689  			} else {
   690  				lut = rn.rangeLUT
   691  			}
   692  
   693  			matches := 0
   694  			for {
   695  				if thisNodeRuneIdx+matches < len(runes) {
   696  					if containsCharInRange(runes[thisNodeRuneIdx+matches], lut) {
   697  						matches += 1
   698  					} else {
   699  						break
   700  					}
   701  				} else {
   702  					break
   703  				}
   704  			}
   705  
   706  			// Must be at least min group but can exceed max, will be cut off.
   707  			if matchVars.minGroupSize == -1 || matches >= matchVars.minGroupSize {
   708  				if matchVars.maxGroupSize == -1 || matches <= matchVars.maxGroupSize {
   709  					// Matched
   710  					matchVars.parentMatched = true
   711  					if matches != 0 {
   712  						matchVars.candidateRunes = append(
   713  							matchVars.candidateRunes,
   714  							runes[thisNodeRuneIdx:thisNodeRuneIdx+matches]...,
   715  						)
   716  						thisNodeRuneIdx += matches
   717  					}
   718  				} else if matches > matchVars.maxGroupSize {
   719  					// Matched, but exceeded max
   720  					// set matches to max
   721  					matches = matchVars.maxGroupSize
   722  					matchVars.candidateRunes = append(
   723  						matchVars.candidateRunes,
   724  						runes[thisNodeRuneIdx:thisNodeRuneIdx+matches]...,
   725  					)
   726  					thisNodeRuneIdx += matches
   727  					matchVars.parentMatched = true
   728  				} else {
   729  					// Not matched
   730  					// If the last alt/concat parent was a concat
   731  					hasConcatParent := false
   732  					parentPtr := rn.parent
   733  					for {
   734  						if parentPtr == rn {
   735  							break
   736  						}
   737  						if parentPtr.thisOp == "Concat" {
   738  							hasConcatParent = true
   739  							break
   740  						} else if parentPtr.thisOp == "Alternate" {
   741  							break
   742  						} else {
   743  							parentPtr = parentPtr.parent
   744  						}
   745  
   746  					}
   747  
   748  					// If not matched, we don't care about evaluating the
   749  					// children of the current node (and potentially siblings)
   750  					if hasConcatParent {
   751  						matchVars.skipUntilNum = calcSkipLength(
   752  							matchVars.pathMap, matchVars.currentNodeIdx, true,
   753  						)
   754  						matchVars.candidateRunes = matchVars.candidateRunes[:0]
   755  						// pop one idx
   756  						matchVars.subjectRuneCandidateIndices = matchVars.subjectRuneCandidateIndices[:len(matchVars.subjectRuneCandidateIndices)-1]
   757  					} else {
   758  						matchVars.skipUntilNum = calcSkipLength(
   759  							matchVars.pathMap, matchVars.currentNodeIdx, false,
   760  						)
   761  						// Reset one idx
   762  						matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-1] = thisNodeRuneParentIdx
   763  						thisNodeRuneIdx = thisNodeRuneParentIdx
   764  					}
   765  				}
   766  			} else {
   767  				// Not matched
   768  				// If the parent is a concat, this is an AND statement, we should skip sibings
   769  				hasConcatParent := false
   770  				parentPtr := rn.parent
   771  				for {
   772  					if parentPtr == rn {
   773  						break
   774  					}
   775  					if parentPtr.thisOp == "Concat" {
   776  						hasConcatParent = true
   777  						break
   778  					} else if parentPtr.thisOp == "Alternate" {
   779  						break
   780  					} else {
   781  						parentPtr = parentPtr.parent
   782  					}
   783  
   784  				}
   785  
   786  				// Calculate skip length here
   787  				if hasConcatParent {
   788  					matchVars.skipUntilNum = calcSkipLength(
   789  						matchVars.pathMap, matchVars.currentNodeIdx, true,
   790  					)
   791  					matchVars.candidateRunes = matchVars.candidateRunes[:0]
   792  					// pop one idx
   793  					matchVars.subjectRuneCandidateIndices = matchVars.subjectRuneCandidateIndices[:len(matchVars.subjectRuneCandidateIndices)-1]
   794  				} else {
   795  					//fmt.Printf("Parent is not concat, skipping children\n")
   796  					matchVars.skipUntilNum = calcSkipLength(
   797  						matchVars.pathMap, matchVars.currentNodeIdx, false,
   798  					)
   799  					// Reset one idx
   800  					matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-1] = thisNodeRuneParentIdx
   801  					thisNodeRuneIdx = thisNodeRuneParentIdx
   802  				}
   803  			}
   804  
   805  		default:
   806  			// Do nothing if we don't find the operation
   807  
   808  		}
   809  	} else {
   810  		// Decrement the skip until num
   811  		matchVars.skipUntilNum -= 1
   812  	}
   813  
   814  	// Reset min/max if there is no path to a min/max setting node
   815  	found := false
   816  	if level > matchVars.lastInfoOpLevel {
   817  		matchVars.lastInfoOpLevel = level
   818  	}
   819  
   820  	if matchVars.minGroupSize == 1 && matchVars.maxGroupSize == -1 {
   821  		found = true
   822  	} else if matchVars.lastInfoOpLevel != 1 {
   823  		found = true
   824  	}
   825  
   826  	if !found {
   827  		matchVars.minGroupSize = 1
   828  		matchVars.maxGroupSize = -1
   829  	}
   830  
   831  	// Update the rune candidate idx. If theres not a Alternate,we update the parent
   832  	if thisNodeRuneIdx != -1 {
   833  		parentOp := rn.parent.thisOp
   834  		if parentOp == "Quest" || parentOp == "Plus" || parentOp == "Repeat" || parentOp == "Star" {
   835  			if len(matchVars.subjectRuneCandidateIndices) > 1 {
   836  				matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-2] = thisNodeRuneIdx
   837  			}
   838  		}
   839  		matchVars.subjectRuneCandidateIndices[len(matchVars.subjectRuneCandidateIndices)-1] = thisNodeRuneIdx
   840  	}
   841  
   842  	// Load info from the current node
   843  	matchVars.currentNodeIdx += 1
   844  	// If next node is a branch root, and this node is a failed match, we want to actively clear the candidate runes
   845  	flagNextNodeIsBranchRoot := false
   846  	if matchVars.currentNodeIdx < len(matchVars.pathMap) && len(matchVars.pathMap[matchVars.currentNodeIdx]) == 2 {
   847  		flagNextNodeIsBranchRoot = true
   848  	}
   849  	if flagNextNodeIsBranchRoot && len(matchVars.candidateRunes) != 0 && !matchVars.parentMatched {
   850  		matchVars.candidateRunes = matchVars.candidateRunes[:0]
   851  	}
   852  	// Traverse the children
   853  	for _, child := range rn.children {
   854  		child.traverseRegexTree(runes, matchVars, level)
   855  	}
   856  
   857  }
   858  
   859  // Given current index, find the next index that isn't a child of the current index
   860  // If skipSiblings is true, we skip all siblings of the current node as well
   861  // Return the number of nodes between the current node and the next node that isn't a child of the current node
   862  func calcSkipLength(mapOfTree [][]int, currentPos int, skipSiblings bool) int {
   863  	// Get the current path
   864  	currentPath := mapOfTree[currentPos]
   865  	lenOfCurrentPath := len(currentPath)
   866  	skipLength := 0
   867  	for {
   868  		// Check if we are at end of map
   869  		if currentPos == len(mapOfTree)-1 {
   870  			break
   871  		}
   872  		// Check if we are at root
   873  		if len(mapOfTree[currentPos]) == 1 {
   874  			break
   875  		}
   876  
   877  		// Siblings are on the same length, if we want to skip siblings, we only check for lesser length
   878  		if skipSiblings {
   879  			if len(mapOfTree[currentPos+1]) < lenOfCurrentPath {
   880  				break
   881  			} else {
   882  				currentPos += 1
   883  			}
   884  		} else {
   885  			if len(mapOfTree[currentPos+1]) <= lenOfCurrentPath {
   886  				break
   887  			} else {
   888  				currentPos += 1
   889  			}
   890  		}
   891  
   892  		skipLength += 1
   893  	}
   894  	return skipLength
   895  }