src.elv.sh@v0.21.0-dev.0.20240515223629-06979efb9a2a/pkg/persistent/vector/vector.go (about)

     1  // Package vector implements persistent vector.
     2  //
     3  // This is a Go clone of Clojure's PersistentVector type
     4  // (https://github.com/clojure/clojure/blob/master/src/jvm/clojure/lang/PersistentVector.java).
     5  // For an introduction to the internals, see
     6  // https://hypirion.com/musings/understanding-persistent-vector-pt-1.
     7  package vector
     8  
     9  import (
    10  	"bytes"
    11  	"encoding/json"
    12  	"fmt"
    13  )
    14  
    15  const (
    16  	chunkBits  = 5
    17  	nodeSize   = 1 << chunkBits
    18  	tailMaxLen = nodeSize
    19  	chunkMask  = nodeSize - 1
    20  )
    21  
    22  // Vector is a persistent sequential container for arbitrary values. It supports
    23  // O(1) lookup by index, modification by index, and insertion and removal
    24  // operations at the end. Being a persistent variant of the data structure, it
    25  // is immutable, and provides O(1) operations to create modified versions of the
    26  // vector that shares the underlying data structure, making it suitable for
    27  // concurrent access. The empty value is a valid empty vector.
    28  type Vector interface {
    29  	json.Marshaler
    30  	// Len returns the length of the vector.
    31  	Len() int
    32  	// Index returns the i-th element of the vector, if it exists. The second
    33  	// return value indicates whether the element exists.
    34  	Index(i int) (any, bool)
    35  	// Assoc returns an almost identical Vector, with the i-th element
    36  	// replaced. If the index is smaller than 0 or greater than the length of
    37  	// the vector, it returns nil. If the index is equal to the size of the
    38  	// vector, it is equivalent to Conj.
    39  	Assoc(i int, val any) Vector
    40  	// Conj returns an almost identical Vector, with an additional element
    41  	// appended to the end.
    42  	Conj(val any) Vector
    43  	// Pop returns an almost identical Vector, with the last element removed. It
    44  	// returns nil if the vector is already empty.
    45  	Pop() Vector
    46  	// SubVector returns a subvector containing the elements from i up to but
    47  	// not including j.
    48  	SubVector(i, j int) Vector
    49  	// Iterator returns an iterator over the vector.
    50  	Iterator() Iterator
    51  }
    52  
    53  // Iterator is an iterator over vector elements. It can be used like this:
    54  //
    55  //	for it := v.Iterator(); it.HasElem(); it.Next() {
    56  //	    elem := it.Elem()
    57  //	    // do something with elem...
    58  //	}
    59  type Iterator interface {
    60  	// Elem returns the element at the current position.
    61  	Elem() any
    62  	// HasElem returns whether the iterator is pointing to an element.
    63  	HasElem() bool
    64  	// Next moves the iterator to the next position.
    65  	Next()
    66  }
    67  
    68  type vector struct {
    69  	count int
    70  	// height of the tree structure, defined to be 0 when root is a leaf.
    71  	height uint
    72  	root   node
    73  	tail   []any
    74  }
    75  
    76  // Empty is an empty Vector.
    77  var Empty Vector = &vector{}
    78  
    79  // node is a node in the vector tree. It is always of the size nodeSize.
    80  type node *[nodeSize]any
    81  
    82  func newNode() node {
    83  	return node(&[nodeSize]any{})
    84  }
    85  
    86  func clone(n node) node {
    87  	a := *n
    88  	return node(&a)
    89  }
    90  
    91  func nodeFromSlice(s []any) node {
    92  	var n [nodeSize]any
    93  	copy(n[:], s)
    94  	return &n
    95  }
    96  
    97  // Count returns the number of elements in a Vector.
    98  func (v *vector) Len() int {
    99  	return v.count
   100  }
   101  
   102  // treeSize returns the number of elements stored in the tree (as opposed to the
   103  // tail).
   104  func (v *vector) treeSize() int {
   105  	if v.count < tailMaxLen {
   106  		return 0
   107  	}
   108  	return ((v.count - 1) >> chunkBits) << chunkBits
   109  }
   110  
   111  func (v *vector) Index(i int) (any, bool) {
   112  	if i < 0 || i >= v.count {
   113  		return nil, false
   114  	}
   115  
   116  	// The following is very similar to sliceFor, but is implemented separately
   117  	// to avoid unnecessary copying.
   118  	if i >= v.treeSize() {
   119  		return v.tail[i&chunkMask], true
   120  	}
   121  	n := v.root
   122  	for shift := v.height * chunkBits; shift > 0; shift -= chunkBits {
   123  		n = n[(i>>shift)&chunkMask].(node)
   124  	}
   125  	return n[i&chunkMask], true
   126  }
   127  
   128  // sliceFor returns the slice where the i-th element is stored. The index must
   129  // be in bound.
   130  func (v *vector) sliceFor(i int) []any {
   131  	if i >= v.treeSize() {
   132  		return v.tail
   133  	}
   134  	n := v.root
   135  	for shift := v.height * chunkBits; shift > 0; shift -= chunkBits {
   136  		n = n[(i>>shift)&chunkMask].(node)
   137  	}
   138  	return n[:]
   139  }
   140  
   141  func (v *vector) Assoc(i int, val any) Vector {
   142  	if i < 0 || i > v.count {
   143  		return nil
   144  	} else if i == v.count {
   145  		return v.Conj(val)
   146  	}
   147  	if i >= v.treeSize() {
   148  		newTail := append([]any(nil), v.tail...)
   149  		newTail[i&chunkMask] = val
   150  		return &vector{v.count, v.height, v.root, newTail}
   151  	}
   152  	return &vector{v.count, v.height, doAssoc(v.height, v.root, i, val), v.tail}
   153  }
   154  
   155  // doAssoc returns an almost identical tree, with the i-th element replaced by
   156  // val.
   157  func doAssoc(height uint, n node, i int, val any) node {
   158  	m := clone(n)
   159  	if height == 0 {
   160  		m[i&chunkMask] = val
   161  	} else {
   162  		sub := (i >> (height * chunkBits)) & chunkMask
   163  		m[sub] = doAssoc(height-1, m[sub].(node), i, val)
   164  	}
   165  	return m
   166  }
   167  
   168  func (v *vector) Conj(val any) Vector {
   169  	// Room in tail?
   170  	if v.count-v.treeSize() < tailMaxLen {
   171  		newTail := make([]any, len(v.tail)+1)
   172  		copy(newTail, v.tail)
   173  		newTail[len(v.tail)] = val
   174  		return &vector{v.count + 1, v.height, v.root, newTail}
   175  	}
   176  	// Full tail; push into tree.
   177  	tailNode := nodeFromSlice(v.tail)
   178  	newHeight := v.height
   179  	var newRoot node
   180  	// Overflow root?
   181  	if (v.count >> chunkBits) > (1 << (v.height * chunkBits)) {
   182  		newRoot = newNode()
   183  		newRoot[0] = v.root
   184  		newRoot[1] = newPath(v.height, tailNode)
   185  		newHeight++
   186  	} else {
   187  		newRoot = v.pushTail(v.height, v.root, tailNode)
   188  	}
   189  	return &vector{v.count + 1, newHeight, newRoot, []any{val}}
   190  }
   191  
   192  // pushTail returns a tree with tail appended.
   193  func (v *vector) pushTail(height uint, n node, tail node) node {
   194  	if height == 0 {
   195  		return tail
   196  	}
   197  	idx := ((v.count - 1) >> (height * chunkBits)) & chunkMask
   198  	m := clone(n)
   199  	child := n[idx]
   200  	if child == nil {
   201  		m[idx] = newPath(height-1, tail)
   202  	} else {
   203  		m[idx] = v.pushTail(height-1, child.(node), tail)
   204  	}
   205  	return m
   206  }
   207  
   208  // newPath creates a left-branching tree of specified height and leaf.
   209  func newPath(height uint, leaf node) node {
   210  	if height == 0 {
   211  		return leaf
   212  	}
   213  	ret := newNode()
   214  	ret[0] = newPath(height-1, leaf)
   215  	return ret
   216  }
   217  
   218  func (v *vector) Pop() Vector {
   219  	switch v.count {
   220  	case 0:
   221  		return nil
   222  	case 1:
   223  		return Empty
   224  	}
   225  	if v.count-v.treeSize() > 1 {
   226  		newTail := make([]any, len(v.tail)-1)
   227  		copy(newTail, v.tail)
   228  		return &vector{v.count - 1, v.height, v.root, newTail}
   229  	}
   230  	newTail := v.sliceFor(v.count - 2)
   231  	newRoot := v.popTail(v.height, v.root)
   232  	newHeight := v.height
   233  	if v.height > 0 && newRoot[1] == nil {
   234  		newRoot = newRoot[0].(node)
   235  		newHeight--
   236  	}
   237  	return &vector{v.count - 1, newHeight, newRoot, newTail}
   238  }
   239  
   240  // popTail returns a new tree with the last leaf removed.
   241  func (v *vector) popTail(level uint, n node) node {
   242  	idx := ((v.count - 2) >> (level * chunkBits)) & chunkMask
   243  	if level > 1 {
   244  		newChild := v.popTail(level-1, n[idx].(node))
   245  		if newChild == nil && idx == 0 {
   246  			return nil
   247  		}
   248  		m := clone(n)
   249  		if newChild == nil {
   250  			// This is needed since `m[idx] = newChild` would store an
   251  			// interface{} with a non-nil type part, which is non-nil
   252  			m[idx] = nil
   253  		} else {
   254  			m[idx] = newChild
   255  		}
   256  		return m
   257  	} else if idx == 0 {
   258  		return nil
   259  	} else {
   260  		m := clone(n)
   261  		m[idx] = nil
   262  		return m
   263  	}
   264  }
   265  
   266  func (v *vector) SubVector(begin, end int) Vector {
   267  	if begin < 0 || begin > end || end > v.count {
   268  		return nil
   269  	}
   270  	return &subVector{v, begin, end}
   271  }
   272  
   273  func (v *vector) Iterator() Iterator {
   274  	return newIterator(v)
   275  }
   276  
   277  func (v *vector) MarshalJSON() ([]byte, error) {
   278  	return marshalJSON(v.Iterator())
   279  }
   280  
   281  type subVector struct {
   282  	v     *vector
   283  	begin int
   284  	end   int
   285  }
   286  
   287  func (s *subVector) Len() int {
   288  	return s.end - s.begin
   289  }
   290  
   291  func (s *subVector) Index(i int) (any, bool) {
   292  	if i < 0 || s.begin+i >= s.end {
   293  		return nil, false
   294  	}
   295  	return s.v.Index(s.begin + i)
   296  }
   297  
   298  func (s *subVector) Assoc(i int, val any) Vector {
   299  	if i < 0 || s.begin+i > s.end {
   300  		return nil
   301  	} else if s.begin+i == s.end {
   302  		return s.Conj(val)
   303  	}
   304  	return s.v.Assoc(s.begin+i, val).SubVector(s.begin, s.end)
   305  }
   306  
   307  func (s *subVector) Conj(val any) Vector {
   308  	return s.v.Assoc(s.end, val).SubVector(s.begin, s.end+1)
   309  }
   310  
   311  func (s *subVector) Pop() Vector {
   312  	switch s.Len() {
   313  	case 0:
   314  		return nil
   315  	case 1:
   316  		return Empty
   317  	default:
   318  		return s.v.SubVector(s.begin, s.end-1)
   319  	}
   320  }
   321  
   322  func (s *subVector) SubVector(i, j int) Vector {
   323  	return s.v.SubVector(s.begin+i, s.begin+j)
   324  }
   325  
   326  func (s *subVector) Iterator() Iterator {
   327  	return newIteratorWithRange(s.v, s.begin, s.end)
   328  }
   329  
   330  func (s *subVector) MarshalJSON() ([]byte, error) {
   331  	return marshalJSON(s.Iterator())
   332  }
   333  
   334  type iterator struct {
   335  	v        *vector
   336  	treeSize int
   337  	index    int
   338  	end      int
   339  	path     []pathEntry
   340  }
   341  
   342  type pathEntry struct {
   343  	node  node
   344  	index int
   345  }
   346  
   347  func (e pathEntry) current() any {
   348  	return e.node[e.index]
   349  }
   350  
   351  func newIterator(v *vector) *iterator {
   352  	return newIteratorWithRange(v, 0, v.Len())
   353  }
   354  
   355  func newIteratorWithRange(v *vector, begin, end int) *iterator {
   356  	it := &iterator{v, v.treeSize(), begin, end, nil}
   357  	if it.index >= it.treeSize {
   358  		return it
   359  	}
   360  	// Find the node for begin, remembering all nodes along the path.
   361  	n := v.root
   362  	for shift := v.height * chunkBits; shift > 0; shift -= chunkBits {
   363  		idx := (begin >> shift) & chunkMask
   364  		it.path = append(it.path, pathEntry{n, idx})
   365  		n = n[idx].(node)
   366  	}
   367  	it.path = append(it.path, pathEntry{n, begin & chunkMask})
   368  	return it
   369  }
   370  
   371  func (it *iterator) Elem() any {
   372  	if it.index >= it.treeSize {
   373  		return it.v.tail[it.index-it.treeSize]
   374  	}
   375  	return it.path[len(it.path)-1].current()
   376  }
   377  
   378  func (it *iterator) HasElem() bool {
   379  	return it.index < it.end
   380  }
   381  
   382  func (it *iterator) Next() {
   383  	if it.index+1 >= it.treeSize {
   384  		// Next element is in tail. Just increment the index.
   385  		it.index++
   386  		return
   387  	}
   388  	// Find the deepest level that can be advanced.
   389  	var i int
   390  	for i = len(it.path) - 1; i >= 0; i-- {
   391  		e := it.path[i]
   392  		if e.index+1 < len(e.node) {
   393  			break
   394  		}
   395  	}
   396  	if i == -1 {
   397  		panic("cannot advance; vector iterator bug")
   398  	}
   399  	// Advance on this node, and re-populate all deeper levels.
   400  	it.path[i].index++
   401  	for i++; i < len(it.path); i++ {
   402  		it.path[i] = pathEntry{it.path[i-1].current().(node), 0}
   403  	}
   404  	it.index++
   405  }
   406  
   407  type marshalError struct {
   408  	index int
   409  	cause error
   410  }
   411  
   412  func (err *marshalError) Error() string {
   413  	return fmt.Sprintf("element %d: %s", err.index, err.cause)
   414  }
   415  
   416  func marshalJSON(it Iterator) ([]byte, error) {
   417  	var buf bytes.Buffer
   418  	buf.WriteByte('[')
   419  	index := 0
   420  	for ; it.HasElem(); it.Next() {
   421  		if index > 0 {
   422  			buf.WriteByte(',')
   423  		}
   424  		elemBytes, err := json.Marshal(it.Elem())
   425  		if err != nil {
   426  			return nil, &marshalError{index, err}
   427  		}
   428  		buf.Write(elemBytes)
   429  		index++
   430  	}
   431  	buf.WriteByte(']')
   432  	return buf.Bytes(), nil
   433  }