github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/branch_control/expr_parser_node.go

github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/branch_control/expr_parser_node.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package branch_control
    16  
    17  import (
    18  	"math"
    19  	"sync"
    20  
    21  	"github.com/dolthub/go-mysql-server/sql"
    22  )
    23  
    24  var (
    25  	aiciSorter = sql.Collation_utf8mb4_0900_ai_ci.Sorter()
    26  	sortFuncs  = []func(r rune) int32{aiciSorter, aiciSorter, sql.Collation_utf8mb4_0900_bin.Sorter(), aiciSorter}
    27  )
    28  
    29  // MatchNode contains a collection of sort orders that allow for an optimized level of traversal compared to
    30  // MatchExpression due to the sharing of like sort orders, reducing the overall number of comparisons needed.
    31  type MatchNode struct {
    32  	SortOrders []int32              // These are the sort orders that will be compared against when matching a given rune.
    33  	Children   map[int32]*MatchNode // These are the children of this node that each represent a different path in the sort orders.
    34  	Data       *MatchNodeData       // This is the collection of data that the node holds. Will be nil if it's not a destination node.
    35  }
    36  
    37  // MatchNodeData is the data contained in a destination MatchNode.
    38  type MatchNodeData struct {
    39  	Permissions Permissions
    40  	RowIndex    uint32
    41  }
    42  
    43  // MatchResult contains the data and expression length of a successful match.
    44  type MatchResult struct {
    45  	MatchNodeData
    46  	Length uint32
    47  }
    48  
    49  // matchNodeCounted is an intermediary node used while processing matches that records the length of the match so far.
    50  // This may be used to distinguish between which matches are the longest.
    51  type matchNodeCounted struct {
    52  	MatchNode
    53  	Length uint32
    54  }
    55  
    56  // matchNodeCountedPool is a pool for MatchNodeCounted.
    57  var matchNodeCountedPool = &sync.Pool{
    58  	New: func() any {
    59  		return make([]matchNodeCounted, 0, 16)
    60  	},
    61  }
    62  
    63  // concatenatedSortOrderPool is a pool for concatenated sort orders.
    64  var concatenatedSortOrderPool = &sync.Pool{
    65  	New: func() any {
    66  		return make([]int32, 0, 128)
    67  	},
    68  }
    69  
    70  // Match returns a collection of results based on the given strings or expressions. When the parameters represent
    71  // standard strings, then this simply matches those strings against the parsed expressions. However, if the parameters
    72  // represent expressions, then this matches against all parsed expressions that are either duplicates or supersets of
    73  // the given expressions. This allows the user to "match" against new expressions to see if they are already covered.
    74  func (mn *MatchNode) Match(database, branch, user, host string) []MatchResult {
    75  	allSortOrders := mn.parseExpression(database, branch, user, host)
    76  	defer func() {
    77  		concatenatedSortOrderPool.Put(allSortOrders)
    78  	}()
    79  
    80  	// This is the slice that we'll put matches into. This will also flip to become the match subset. This way we reuse
    81  	// the underlying arrays. We grab this from the pool. These are not pointers, as we modify the data inside to
    82  	// simplify the loop's logic.
    83  	matches := matchNodeCountedPool.Get().([]matchNodeCounted)[:0]
    84  	// This is the slice we'll iterate over. We also grab this from the pool.
    85  	matchSubset := matchNodeCountedPool.Get().([]matchNodeCounted)[:0]
    86  	matchSubset = append(matchSubset, matchNodeCounted{
    87  		MatchNode: *mn,
    88  		Length:    0,
    89  	})
    90  
    91  	// Loop over the entire set of sort orders
    92  	for _, sortOrder := range allSortOrders {
    93  		for _, node := range matchSubset {
    94  			if len(node.SortOrders) == 0 {
    95  				// At most we'll look at three children that may match, we can ignore all other children
    96  				if child, ok := node.Children[singleMatch]; ok {
    97  					matches = processMatch(matches, matchNodeCounted{
    98  						MatchNode: *child,
    99  						Length:    node.Length,
   100  					}, sortOrder)
   101  				}
   102  				if child, ok := node.Children[anyMatch]; ok {
   103  					matches = processMatch(matches, matchNodeCounted{
   104  						MatchNode: *child,
   105  						Length:    node.Length,
   106  					}, sortOrder)
   107  				}
   108  				if child, ok := node.Children[sortOrder]; ok {
   109  					matches = processMatch(matches, matchNodeCounted{
   110  						MatchNode: *child,
   111  						Length:    node.Length,
   112  					}, sortOrder)
   113  				}
   114  				continue
   115  			}
   116  			matches = processMatch(matches, node, sortOrder)
   117  		}
   118  		// Swap the two, and put the slice of matches to be at the beginning of the previous subset array to reuse it
   119  		matches, matchSubset = matchSubset[:0], matches
   120  	}
   121  	// We're done with the matches slice, so put it back in the pool
   122  	matchNodeCountedPool.Put(matches)
   123  
   124  	// The subset may contain partial matches (which do not count), so we filter for only complete matches
   125  	results := make([]MatchResult, 0, len(matchSubset))
   126  	for _, node := range matchSubset {
   127  		if node.Data != nil {
   128  			if len(node.SortOrders) == 0 {
   129  				results = append(results, MatchResult{
   130  					MatchNodeData: *node.Data,
   131  					Length:        node.Length,
   132  				})
   133  			} else if len(node.SortOrders) == 1 && node.SortOrders[0] == anyMatch {
   134  				results = append(results, MatchResult{
   135  					MatchNodeData: *node.Data,
   136  					Length:        node.Length + 1,
   137  				})
   138  			}
   139  		}
   140  	}
   141  	// Now we're done with the subset slice, so put it back in the pool
   142  	matchNodeCountedPool.Put(matchSubset)
   143  	return results
   144  }
   145  
   146  // processMatch handles the behavior of how to process a sort order against a node. Returns a new slice with any newly
   147  // appended nodes (which should overwrite the first parameter in the calling function).
   148  func processMatch(matches []matchNodeCounted, node matchNodeCounted, sortOrder int32) []matchNodeCounted {
   149  	switch node.SortOrders[0] {
   150  	case singleMatch:
   151  		if sortOrder < singleMatch {
   152  			return matches
   153  		}
   154  		node.SortOrders = node.SortOrders[1:]
   155  		node.Length += 1
   156  		matches = append(matches, node)
   157  	case anyMatch:
   158  		// Since any match can be a zero-length match, we need to check if we also match the next sort order
   159  		if len(node.SortOrders) > 1 && node.SortOrders[1] == sortOrder {
   160  			matches = append(matches, matchNodeCounted{
   161  				MatchNode: MatchNode{
   162  					SortOrders: node.SortOrders[2:],
   163  					Children:   node.Children,
   164  					Data:       node.Data,
   165  				},
   166  				Length: node.Length + 2,
   167  			})
   168  		}
   169  		// Any match cannot match a columnMarker as they represent column boundaries
   170  		if sortOrder != columnMarker {
   171  			matches = append(matches, node)
   172  		}
   173  	default:
   174  		// NOTE: it's worth mentioning that separators only match with themselves, so no need for special logic
   175  		if sortOrder == node.SortOrders[0] {
   176  			node.SortOrders = node.SortOrders[1:]
   177  			node.Length += 1
   178  			matches = append(matches, node)
   179  		}
   180  	}
   181  	return matches
   182  }
   183  
   184  // Add will add the given expressions to the node hierarchy. If the expressions already exists, then this overwrites
   185  // the pre-existing entry. Assumes that the given expressions have already been folded.
   186  func (mn *MatchNode) Add(databaseExpr, branchExpr, userExpr, hostExpr string, data MatchNodeData) {
   187  	root := mn
   188  	allSortOrders := mn.parseExpression(databaseExpr, branchExpr, userExpr, hostExpr)
   189  	defer func() {
   190  		concatenatedSortOrderPool.Put(allSortOrders)
   191  	}()
   192  
   193  	remainingRootSortOrders := root.SortOrders
   194  	allSortOrdersMaxIndex := len(allSortOrders) - 1
   195  ParentLoop:
   196  	for i, sortOrder := range allSortOrders {
   197  		if remainingRootSortOrders[0] == sortOrder {
   198  			if len(remainingRootSortOrders) > 1 && i < allSortOrdersMaxIndex {
   199  				// There are more sort orders on both sides, so we simply continue
   200  				remainingRootSortOrders = remainingRootSortOrders[1:]
   201  				continue
   202  			} else if len(remainingRootSortOrders) > 1 && i == allSortOrdersMaxIndex {
   203  				// We have more sort orders on the root, but no more in our expressions, so we put the remaining root
   204  				// sort orders as a child and set this as a destination node
   205  				root.Children = map[int32]*MatchNode{remainingRootSortOrders[1]: {
   206  					SortOrders: remainingRootSortOrders[1:],
   207  					Children:   root.Children,
   208  					Data:       root.Data,
   209  				}}
   210  				root.SortOrders = root.SortOrders[:len(root.SortOrders)-len(remainingRootSortOrders)+1]
   211  				root.Data = &data
   212  				break
   213  			} else if len(remainingRootSortOrders) == 1 && i < allSortOrdersMaxIndex {
   214  				// We've run out of sort orders on the root, but still have more from children, so check if there's a
   215  				// matching child
   216  				nextSortOrder := allSortOrders[i+1]
   217  				if child, ok := root.Children[nextSortOrder]; ok {
   218  					remainingRootSortOrders = child.SortOrders
   219  					root = root.Children[nextSortOrder]
   220  					continue ParentLoop
   221  				}
   222  				// None of the children matched, so we create a new one and add it. As we're using a pool, we need to
   223  				// create a new slice.
   224  				originalSortOrders := allSortOrders[i+1:]
   225  				newSortOrders := make([]int32, len(originalSortOrders))
   226  				copy(newSortOrders, originalSortOrders)
   227  				root.Children[newSortOrders[0]] = &MatchNode{
   228  					SortOrders: newSortOrders,
   229  					Children:   make(map[int32]*MatchNode),
   230  					Data:       &data,
   231  				}
   232  				break
   233  			} else {
   234  				// We have no more sort orders on either side so this is an exact match, therefore we update the data
   235  				root.Data = &data
   236  				break
   237  			}
   238  		} else {
   239  			// Since the sort orders do not match, we create a child here with the remaining expressions' sort orders,
   240  			// and move the root's remaining sort orders to its own child.
   241  			splitRoot := &MatchNode{
   242  				SortOrders: remainingRootSortOrders,
   243  				Children:   root.Children,
   244  				Data:       root.Data,
   245  			}
   246  			// As we're using a pool, we need to create a new slice
   247  			originalSortOrders := allSortOrders[i:]
   248  			newSortOrders := make([]int32, len(originalSortOrders))
   249  			copy(newSortOrders, originalSortOrders)
   250  			newChild := &MatchNode{
   251  				SortOrders: newSortOrders,
   252  				Children:   make(map[int32]*MatchNode),
   253  				Data:       &data,
   254  			}
   255  			root.SortOrders = root.SortOrders[:len(root.SortOrders)-len(remainingRootSortOrders)]
   256  			root.Children = map[int32]*MatchNode{splitRoot.SortOrders[0]: splitRoot, newChild.SortOrders[0]: newChild}
   257  			// As the root's data is now in the split, we set the data here to nil as it's no longer a destination node.
   258  			// If it wasn't a destination node, then nothing changes (we just set the split's data to nil as well).
   259  			root.Data = nil
   260  			break
   261  		}
   262  	}
   263  }
   264  
   265  // Remove will remove the given expressions to the node hierarchy. If the expressions do not exist, then nothing
   266  // happens. Assumes that the given expressions have already been folded.
   267  func (mn *MatchNode) Remove(databaseExpr, branchExpr, userExpr, hostExpr string) uint32 {
   268  	root := mn
   269  	allSortOrders := mn.parseExpression(databaseExpr, branchExpr, userExpr, hostExpr)
   270  	defer func() {
   271  		concatenatedSortOrderPool.Put(allSortOrders)
   272  	}()
   273  
   274  	// We track the parent of the root node so that we can delete its child if applicable
   275  	var rootParent *MatchNode = nil
   276  	childIndex := int32(0)
   277  
   278  	remainingRootSortOrders := root.SortOrders
   279  	allSortOrdersMaxIndex := len(allSortOrders) - 1
   280  	removedIndex := uint32(math.MaxUint32)
   281  ParentLoop:
   282  	for i, sortOrder := range allSortOrders {
   283  		if remainingRootSortOrders[0] == sortOrder {
   284  			if len(remainingRootSortOrders) > 1 && i < allSortOrdersMaxIndex {
   285  				// There are more sort orders on both sides, so we simply continue
   286  				remainingRootSortOrders = remainingRootSortOrders[1:]
   287  				continue
   288  			} else if len(remainingRootSortOrders) > 1 && i == allSortOrdersMaxIndex {
   289  				// We have more sort orders on the root, but no more in our expressions, so this set of expressions
   290  				// don't have a match
   291  				break
   292  			} else if len(remainingRootSortOrders) == 1 && i < allSortOrdersMaxIndex {
   293  				// We've run out of sort orders on the root, but still have more from the expressions, so check if a
   294  				// child will match the next sort order from the expressions
   295  				nextSortOrder := allSortOrders[i+1]
   296  				if child, ok := root.Children[nextSortOrder]; ok {
   297  					remainingRootSortOrders = child.SortOrders
   298  					rootParent = root
   299  					childIndex = nextSortOrder
   300  					root = child
   301  					continue ParentLoop
   302  				}
   303  				// None of the children matched, so this set of expressions don't have a match
   304  				break
   305  			} else {
   306  				// We have no more sort orders on either side so this is an exact match.
   307  				// If it's a destination node, then we mark it as no longer being one.
   308  				if root.Data != nil {
   309  					removedIndex = root.Data.RowIndex
   310  				}
   311  				root.Data = nil
   312  				if len(root.Children) == 1 {
   313  					// Since there is only a single child, we merge it with this node
   314  					for _, child := range root.Children {
   315  						// The fact that you gotta do a range + break to get a single map element is silly
   316  						root.SortOrders = append(root.SortOrders, child.SortOrders...)
   317  						root.Data = child.Data
   318  						root.Children = nil
   319  						break
   320  					}
   321  				} else if len(root.Children) == 0 {
   322  					if rootParent != nil {
   323  						// With no children, we can remove this node from the parent
   324  						delete(rootParent.Children, childIndex)
   325  						// If the parent only has a single child, and it's not a destination node, we can merge that child
   326  						// with the parent
   327  						if len(rootParent.Children) == 1 && rootParent.Data == nil {
   328  							// Since there is only a single child, we merge it with this node
   329  							for _, child := range rootParent.Children {
   330  								// It was silly a few lines ago, and it's still silly here
   331  								rootParent.SortOrders = append(rootParent.SortOrders, child.SortOrders...)
   332  								rootParent.Data = child.Data
   333  								rootParent.Children = child.Children
   334  							}
   335  						}
   336  					} else {
   337  						// This is the base root of the table, and it has no children (they may have been merged with
   338  						// the base root in a previous deletion), so we completely reset its sort orders to the base state
   339  						root.SortOrders = []int32{columnMarker}
   340  					}
   341  				}
   342  				// If this node has multiple children then we have nothing more to do
   343  				break
   344  			}
   345  		} else {
   346  			// Since the sort orders do not match, that means that this set of expressions don't have a match
   347  			break
   348  		}
   349  	}
   350  	return removedIndex
   351  }
   352  
   353  // parseExpression parses expressions into a concatenated collection of sort orders. The returned slice belongs to the
   354  // pool, which, if possible, should be returned once it is no longer needed. As this function doesn't distinguish
   355  // between strings and expressions, it assumes any given expressions have already been folded.
   356  func (mn *MatchNode) parseExpression(database, branch, user, host string) []int32 {
   357  	if len(database) > math.MaxUint16 {
   358  		database = database[:math.MaxUint16]
   359  	}
   360  	if len(branch) > math.MaxUint16 {
   361  		branch = branch[:math.MaxUint16]
   362  	}
   363  	if len(user) > math.MaxUint16 {
   364  		user = user[:math.MaxUint16]
   365  	}
   366  	if len(host) > math.MaxUint16 {
   367  		host = host[:math.MaxUint16]
   368  	}
   369  
   370  	allSortOrders := concatenatedSortOrderPool.Get().([]int32)[:0]
   371  	for i, str := range []string{database, branch, user, host} {
   372  		escaped := false
   373  		sortFunc := sortFuncs[i]
   374  		allSortOrders = append(allSortOrders, columnMarker)
   375  		for _, r := range str {
   376  			if escaped {
   377  				escaped = false
   378  				allSortOrders = append(allSortOrders, sortFunc(r))
   379  			} else {
   380  				switch r {
   381  				case '\\':
   382  					escaped = true
   383  				case '%':
   384  					allSortOrders = append(allSortOrders, anyMatch)
   385  				case '_':
   386  					allSortOrders = append(allSortOrders, singleMatch)
   387  				default:
   388  					allSortOrders = append(allSortOrders, sortFunc(r))
   389  				}
   390  			}
   391  		}
   392  	}
   393  	return allSortOrders
   394  }