get.pme.sh/pnats@v0.0.0-20240304004023-26bb5a137ed0/server/stree/stree.go (about)

     1  // Copyright 2023-2024 The NATS Authors
     2  // Licensed under the Apache License, Version 2.0 (the "License");
     3  // you may not use this file except in compliance with the License.
     4  // You may obtain a copy of the License at
     5  //
     6  // http://www.apache.org/licenses/LICENSE-2.0
     7  //
     8  // Unless required by applicable law or agreed to in writing, software
     9  // distributed under the License is distributed on an "AS IS" BASIS,
    10  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package stree
    15  
    16  import (
    17  	"bytes"
    18  	"sort"
    19  )
    20  
    21  // SubjectTree is an adaptive radix trie (ART) for storing subject information on literal subjects.
    22  // Will use dynamic nodes, path compression and lazy expansion.
    23  // The reason this exists is to not only save some memory in our filestore but to greatly optimize matching
    24  // a wildcard subject to certain members, e.g. consumer NumPending calculations.
    25  type SubjectTree[T any] struct {
    26  	root node
    27  	size int
    28  }
    29  
    30  // NewSubjectTree creates a new SubjectTree with values T.
    31  func NewSubjectTree[T any]() *SubjectTree[T] {
    32  	return &SubjectTree[T]{}
    33  }
    34  
    35  // Size returns the number of elements stored.
    36  func (t *SubjectTree[T]) Size() int {
    37  	if t == nil {
    38  		return 0
    39  	}
    40  	return t.size
    41  }
    42  
    43  // Will empty out the tree, or if tree is nil create a new one.
    44  func (t *SubjectTree[T]) Empty() *SubjectTree[T] {
    45  	if t == nil {
    46  		return NewSubjectTree[T]()
    47  	}
    48  	t.root, t.size = nil, 0
    49  	return t
    50  }
    51  
    52  // Insert a value into the tree. Will return if the value was updated and if so the old value.
    53  func (t *SubjectTree[T]) Insert(subject []byte, value T) (*T, bool) {
    54  	old, updated := t.insert(&t.root, subject, value, 0)
    55  	if !updated {
    56  		t.size++
    57  	}
    58  	return old, updated
    59  }
    60  
    61  // Find will find the value and return it or false if it was not found.
    62  func (t *SubjectTree[T]) Find(subject []byte) (*T, bool) {
    63  	var si uint16
    64  	for n := t.root; n != nil; {
    65  		if n.isLeaf() {
    66  			if ln := n.(*leaf[T]); ln.match(subject[si:]) {
    67  				return &ln.value, true
    68  			}
    69  			return nil, false
    70  		}
    71  		// We are a node type here, grab meta portion.
    72  		if bn := n.base(); bn.prefixLen > 0 {
    73  			end := min(int(si+bn.prefixLen), len(subject))
    74  			if !bytes.Equal(subject[si:end], bn.prefix[:bn.prefixLen]) {
    75  				return nil, false
    76  			}
    77  			// Increment our subject index.
    78  			si += bn.prefixLen
    79  		}
    80  		if an := n.findChild(pivot(subject, si)); an != nil {
    81  			n = *an
    82  		} else {
    83  			return nil, false
    84  		}
    85  	}
    86  	return nil, false
    87  }
    88  
    89  // Delete will delete the item and return its value, or not found if it did not exist.
    90  func (t *SubjectTree[T]) Delete(subject []byte) (*T, bool) {
    91  	val, deleted := t.delete(&t.root, subject, 0)
    92  	if deleted {
    93  		t.size--
    94  	}
    95  	return val, deleted
    96  }
    97  
    98  // Match will match against a subject that can have wildcards and invoke the callback func for each matched value.
    99  func (t *SubjectTree[T]) Match(filter []byte, cb func(subject []byte, val *T)) {
   100  	if len(filter) == 0 || cb == nil {
   101  		return
   102  	}
   103  	// We need to break this up into chunks based on wildcards, either pwc '*' or fwc '>'.
   104  	var raw [16][]byte
   105  	parts := genParts(filter, raw[:0])
   106  	var _pre [256]byte
   107  	t.match(t.root, parts, _pre[:0], cb)
   108  }
   109  
   110  // Iter will walk all entries in the SubjectTree lexographically. The callback can return false to terminate the walk.
   111  func (t *SubjectTree[T]) Iter(cb func(subject []byte, val *T) bool) {
   112  	if t == nil || t.root == nil {
   113  		return
   114  	}
   115  	var _pre [256]byte
   116  	t.iter(t.root, _pre[:0], cb)
   117  }
   118  
   119  // Internal methods
   120  
   121  // Internal call to insert that can be recursive.
   122  func (t *SubjectTree[T]) insert(np *node, subject []byte, value T, si int) (*T, bool) {
   123  	n := *np
   124  	if n == nil {
   125  		*np = newLeaf(subject, value)
   126  		return nil, false
   127  	}
   128  	if n.isLeaf() {
   129  		ln := n.(*leaf[T])
   130  		if ln.match(subject[si:]) {
   131  			// Replace with new value.
   132  			old := ln.value
   133  			ln.value = value
   134  			return &old, true
   135  		}
   136  		// Here we need to split this leaf.
   137  		cpi := commonPrefixLen(ln.suffix, subject[si:])
   138  		nn := newNode4(subject[si : si+cpi])
   139  		ln.setSuffix(ln.suffix[cpi:])
   140  		si += cpi
   141  		// Make sure we have different pivot, normally this will be the case unless we have overflowing prefixes.
   142  		if p := pivot(ln.suffix, 0); si < len(subject) && p == subject[si] {
   143  			// We need to split the original leaf. Recursively call into insert.
   144  			t.insert(np, subject, value, si)
   145  			// Now add the update version of *np as a child to the new node4.
   146  			nn.addChild(p, *np)
   147  		} else {
   148  			// Can just add this new leaf as a sibling.
   149  			nl := newLeaf(subject[si:], value)
   150  			nn.addChild(pivot(nl.suffix, 0), nl)
   151  			// Add back original.
   152  			nn.addChild(pivot(ln.suffix, 0), ln)
   153  		}
   154  		*np = nn
   155  		return nil, false
   156  	}
   157  
   158  	// Non-leaf nodes.
   159  	bn := n.base()
   160  	if bn.prefixLen > 0 {
   161  		cpi := commonPrefixLen(bn.prefix[:bn.prefixLen], subject[si:])
   162  		if pli := int(bn.prefixLen); cpi >= pli {
   163  			// Move past this node. We look for an existing child node to recurse into.
   164  			// If one does not exist we can create a new leaf node.
   165  			si += pli
   166  			if nn := n.findChild(pivot(subject, si)); nn != nil {
   167  				return t.insert(nn, subject, value, si)
   168  			}
   169  			if n.isFull() {
   170  				n = n.grow()
   171  				*np = n
   172  			}
   173  			n.addChild(pivot(subject, si), newLeaf(subject[si:], value))
   174  			return nil, false
   175  		} else {
   176  			// We did not match the prefix completely here.
   177  			// Calculate new prefix for this node.
   178  			prefix := subject[si : si+cpi]
   179  			si += len(prefix)
   180  			// We will insert a new node4 and attach our current node below after adjusting prefix.
   181  			nn := newNode4(prefix)
   182  			// Shift the prefix for our original node.
   183  			n.setPrefix(bn.prefix[cpi:bn.prefixLen])
   184  			nn.addChild(pivot(bn.prefix[:], 0), n)
   185  			// Add in our new leaf.
   186  			nn.addChild(pivot(subject[si:], 0), newLeaf(subject[si:], value))
   187  			// Update our node reference.
   188  			*np = nn
   189  		}
   190  	} else {
   191  		if nn := n.findChild(pivot(subject, si)); nn != nil {
   192  			return t.insert(nn, subject, value, si)
   193  		}
   194  		// No prefix and no matched child, so add in new leafnode as needed.
   195  		if n.isFull() {
   196  			n = n.grow()
   197  			*np = n
   198  		}
   199  		n.addChild(pivot(subject, si), newLeaf(subject[si:], value))
   200  	}
   201  
   202  	return nil, false
   203  }
   204  
   205  // internal function to recursively find the leaf to delete. Will do compaction if the item is found and removed.
   206  func (t *SubjectTree[T]) delete(np *node, subject []byte, si uint16) (*T, bool) {
   207  	if t == nil || np == nil || *np == nil || len(subject) == 0 {
   208  		return nil, false
   209  	}
   210  	n := *np
   211  	if n.isLeaf() {
   212  		ln := n.(*leaf[T])
   213  		if ln.match(subject[si:]) {
   214  			*np = nil
   215  			return &ln.value, true
   216  		}
   217  		return nil, false
   218  	}
   219  	// Not a leaf node.
   220  	if bn := n.base(); bn.prefixLen > 0 {
   221  		if !bytes.Equal(subject[si:si+bn.prefixLen], bn.prefix[:bn.prefixLen]) {
   222  			return nil, false
   223  		}
   224  		// Increment our subject index.
   225  		si += bn.prefixLen
   226  	}
   227  	p := pivot(subject, si)
   228  	nna := n.findChild(p)
   229  	if nna == nil {
   230  		return nil, false
   231  	}
   232  	nn := *nna
   233  	if nn.isLeaf() {
   234  		ln := nn.(*leaf[T])
   235  		if ln.match(subject[si:]) {
   236  			n.deleteChild(p)
   237  
   238  			if sn := n.shrink(); sn != nil {
   239  				bn := n.base()
   240  				// Make sure to set cap so we force an append to copy below.
   241  				pre := bn.prefix[:bn.prefixLen:bn.prefixLen]
   242  				// Need to fix up prefixes/suffixes.
   243  				if sn.isLeaf() {
   244  					ln := sn.(*leaf[T])
   245  					// Make sure to set cap so we force an append to copy.
   246  					ln.suffix = append(pre, ln.suffix...)
   247  				} else {
   248  					// We are a node here, we need to add in the old prefix.
   249  					if len(pre) > 0 {
   250  						bsn := sn.base()
   251  						sn.setPrefix(append(pre, bsn.prefix[:bsn.prefixLen]...))
   252  					}
   253  				}
   254  				*np = sn
   255  			}
   256  
   257  			return &ln.value, true
   258  		}
   259  		return nil, false
   260  	}
   261  	return t.delete(nna, subject, si)
   262  }
   263  
   264  // Internal function which can be called recursively to match all leaf nodes to a given filter subject which
   265  // once here has been decomposed to parts. These parts only care about wildcards, both pwc and fwc.
   266  func (t *SubjectTree[T]) match(n node, parts [][]byte, pre []byte, cb func(subject []byte, val *T)) {
   267  	// Capture if we are sitting on a terminal fwc.
   268  	var hasFWC bool
   269  	if lp := len(parts); lp > 0 && parts[lp-1][0] == fwc {
   270  		hasFWC = true
   271  	}
   272  
   273  	for n != nil {
   274  		nparts, matched := n.matchParts(parts)
   275  		// Check if we did not match.
   276  		if !matched {
   277  			return
   278  		}
   279  		// We have matched here. If we are a leaf and have exhausted all parts or he have a FWC fire callback.
   280  		if n.isLeaf() {
   281  			if len(nparts) == 0 || (hasFWC && len(nparts) == 1) {
   282  				ln := n.(*leaf[T])
   283  				cb(append(pre, ln.suffix...), &ln.value)
   284  			}
   285  			return
   286  		}
   287  		// We have normal nodes here.
   288  		// We need to append our prefix
   289  		bn := n.base()
   290  		if bn.prefixLen > 0 {
   291  			// Note that this append may reallocate, but it doesn't modify "pre" at the "match" callsite.
   292  			pre = append(pre, bn.prefix[:bn.prefixLen]...)
   293  		}
   294  
   295  		// Check our remaining parts.
   296  		if len(nparts) == 0 && !hasFWC {
   297  			// We are a node with no parts left and we are not looking at a fwc.
   298  			// We could have a leafnode with no suffix which would be a match.
   299  			// We could also have a terminal pwc. Check for those here.
   300  			var hasTermPWC bool
   301  			if lp := len(parts); lp > 0 && len(parts[lp-1]) == 1 && parts[lp-1][0] == pwc {
   302  				// If we are sitting on a terminal pwc, put the pwc back and continue.
   303  				nparts = parts[len(parts)-1:]
   304  				hasTermPWC = true
   305  			}
   306  			for _, cn := range n.children() {
   307  				if cn == nil {
   308  					continue
   309  				}
   310  				if cn.isLeaf() {
   311  					ln := cn.(*leaf[T])
   312  					if len(ln.suffix) == 0 {
   313  						cb(append(pre, ln.suffix...), &ln.value)
   314  					} else if hasTermPWC && bytes.IndexByte(ln.suffix, tsep) < 0 {
   315  						cb(append(pre, ln.suffix...), &ln.value)
   316  					}
   317  				} else if hasTermPWC {
   318  					// We have terminal pwc so call into match again with the child node.
   319  					t.match(cn, nparts, pre, cb)
   320  				}
   321  			}
   322  			// Return regardless.
   323  			return
   324  		}
   325  		// If we are sitting on a terminal fwc, put back and continue.
   326  		if hasFWC && len(nparts) == 0 {
   327  			nparts = parts[len(parts)-1:]
   328  		}
   329  
   330  		// Here we are a node type with a partial match.
   331  		// Check if the first part is a wildcard.
   332  		fp := nparts[0]
   333  		p := pivot(fp, 0)
   334  		// Check if we have a pwc/fwc part here. This will cause us to iterate.
   335  		if len(fp) == 1 && (p == pwc || p == fwc) {
   336  			// We need to iterate over all children here for the current node
   337  			// to see if we match further down.
   338  			for _, cn := range n.children() {
   339  				if cn != nil {
   340  					t.match(cn, nparts, pre, cb)
   341  				}
   342  			}
   343  		}
   344  		// Here we have normal traversal, so find the next child.
   345  		nn := n.findChild(p)
   346  		if nn == nil {
   347  			return
   348  		}
   349  		n, parts = *nn, nparts
   350  	}
   351  }
   352  
   353  // Interal iter function to walk nodes in lexigraphical order.
   354  func (t *SubjectTree[T]) iter(n node, pre []byte, cb func(subject []byte, val *T) bool) bool {
   355  	if n.isLeaf() {
   356  		ln := n.(*leaf[T])
   357  		return cb(append(pre, ln.suffix...), &ln.value)
   358  	}
   359  	// We are normal node here.
   360  	bn := n.base()
   361  	// Note that this append may reallocate, but it doesn't modify "pre" at the "iter" callsite.
   362  	pre = append(pre, bn.prefix[:bn.prefixLen]...)
   363  	// Collect nodes since unsorted.
   364  	var _nodes [256]node
   365  	nodes := _nodes[:0]
   366  	for _, cn := range n.children() {
   367  		if cn != nil {
   368  			nodes = append(nodes, cn)
   369  		}
   370  	}
   371  	// Now sort.
   372  	sort.SliceStable(nodes, func(i, j int) bool { return bytes.Compare(nodes[i].path(), nodes[j].path()) < 0 })
   373  	// Now walk the nodes in order and call into next iter.
   374  	for i := range nodes {
   375  		if !t.iter(nodes[i], pre, cb) {
   376  			return false
   377  		}
   378  	}
   379  	return true
   380  }