github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/types/list.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  package types
     6  
     7  import (
     8  	"sync/atomic"
     9  
    10  	"github.com/attic-labs/noms/go/d"
    11  )
    12  
    13  // List represents a list or an array of Noms values. A list can contain zero or more values of zero
    14  // or more types. The type of the list will reflect the type of the elements in the list. For
    15  // example:
    16  //
    17  //  l := NewList(Number(1), Bool(true))
    18  //  fmt.Println(l.Type().Describe())
    19  //  // outputs List<Bool | Number>
    20  //
    21  // Lists, like all Noms values are immutable so the "mutation" methods return a new list.
    22  type List struct {
    23  	sequence
    24  }
    25  
    26  func newList(seq sequence) List {
    27  	return List{seq}
    28  }
    29  
    30  // NewList creates a new List where the type is computed from the elements in the list, populated
    31  // with values, chunking if and when needed.
    32  func NewList(vrw ValueReadWriter, values ...Value) List {
    33  	ch := newEmptyListSequenceChunker(vrw)
    34  	for _, v := range values {
    35  		ch.Append(v)
    36  	}
    37  	return newList(ch.Done())
    38  }
    39  
    40  // NewStreamingList creates a new List, populated with values, chunking if and when needed. As
    41  // chunks are created, they're written to vrw -- including the root chunk of the list. Once the
    42  // caller has closed values, the caller can read the completed List from the returned channel.
    43  func NewStreamingList(vrw ValueReadWriter, values <-chan Value) <-chan List {
    44  	out := make(chan List, 1)
    45  	go func() {
    46  		defer close(out)
    47  		ch := newEmptyListSequenceChunker(vrw)
    48  		for v := range values {
    49  			ch.Append(v)
    50  		}
    51  		out <- newList(ch.Done())
    52  	}()
    53  	return out
    54  }
    55  
    56  func (l List) Edit() *ListEditor {
    57  	return NewListEditor(l)
    58  }
    59  
    60  // Collection interface
    61  
    62  func (l List) asSequence() sequence {
    63  	return l.sequence
    64  }
    65  
    66  // Value interface
    67  func (l List) Value() Value {
    68  	return l
    69  }
    70  
    71  func (l List) WalkValues(cb ValueCallback) {
    72  	iterAll(l, func(v Value, idx uint64) {
    73  		cb(v)
    74  	})
    75  }
    76  
    77  // Get returns the value at the given index. If this list has been chunked then this will have to
    78  // descend into the prolly-tree which leads to Get being O(depth).
    79  func (l List) Get(idx uint64) Value {
    80  	d.PanicIfFalse(idx < l.Len())
    81  	cur := newCursorAtIndex(l.sequence, idx)
    82  	return cur.current().(Value)
    83  }
    84  
    85  // Concat returns a new List comprised of this joined with other. It only needs
    86  // to visit the rightmost prolly tree chunks of this List, and the leftmost
    87  // prolly tree chunks of other, so it's efficient.
    88  func (l List) Concat(other List) List {
    89  	seq := concat(l.sequence, other.sequence, func(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker {
    90  		return l.newChunker(cur, vrw)
    91  	})
    92  	return newList(seq)
    93  }
    94  
    95  // Iter iterates over the list and calls f for every element in the list. If f returns true then the
    96  // iteration stops.
    97  func (l List) Iter(f func(v Value, index uint64) (stop bool)) {
    98  	idx := uint64(0)
    99  	cur := newCursorAtIndex(l.sequence, idx)
   100  	cur.iter(func(v interface{}) bool {
   101  		if f(v.(Value), uint64(idx)) {
   102  			return true
   103  		}
   104  		idx++
   105  		return false
   106  	})
   107  }
   108  
   109  func (l List) IterRange(startIdx, endIdx uint64, f func(v Value, idx uint64)) {
   110  	idx := uint64(startIdx)
   111  	cb := func(v Value) {
   112  		f(v, idx)
   113  		idx++
   114  	}
   115  	iterRange(l, startIdx, endIdx, cb)
   116  }
   117  
   118  // IterAll iterates over the list and calls f for every element in the list. Unlike Iter there is no
   119  // way to stop the iteration and all elements are visited.
   120  func (l List) IterAll(f func(v Value, index uint64)) {
   121  	iterAll(l, f)
   122  }
   123  
   124  func iterAll(col Collection, f func(v Value, index uint64)) {
   125  	concurrency := 6
   126  	vcChan := make(chan chan Value, concurrency)
   127  
   128  	// Target reading data in |targetBatchBytes| per thread. We don't know how
   129  	// many bytes each value is, so update |estimatedNumValues| as data is read.
   130  	targetBatchBytes := 1 << 23 // 8MB
   131  	estimatedNumValues := uint64(1000)
   132  
   133  	go func() {
   134  		for idx, l := uint64(0), col.Len(); idx < l; {
   135  			numValues := atomic.LoadUint64(&estimatedNumValues)
   136  
   137  			start := idx
   138  			blockLength := l - start
   139  			if blockLength > numValues {
   140  				blockLength = numValues
   141  			}
   142  			idx += blockLength
   143  
   144  			vc := make(chan Value)
   145  			vcChan <- vc
   146  
   147  			go func() {
   148  				numBytes := iterRange(col, start, start+blockLength, func(v Value) {
   149  					vc <- v
   150  				})
   151  				close(vc)
   152  
   153  				// Adjust the estimated number of values to try to read
   154  				// |targetBatchBytes| next time.
   155  				if numValues == blockLength {
   156  					scale := float64(targetBatchBytes) / float64(numBytes)
   157  					atomic.StoreUint64(&estimatedNumValues, uint64(float64(numValues)*scale))
   158  				}
   159  			}()
   160  		}
   161  		close(vcChan)
   162  	}()
   163  
   164  	// Ensure read-ahead goroutines can exit, because the `range` below might not
   165  	// finish if an |f| callback panics.
   166  	defer func() {
   167  		for vc := range vcChan {
   168  			close(vc)
   169  		}
   170  	}()
   171  
   172  	i := uint64(0)
   173  	for vc := range vcChan {
   174  		for v := range vc {
   175  			f(v, i)
   176  			i++
   177  		}
   178  	}
   179  }
   180  
   181  func iterRange(col Collection, startIdx, endIdx uint64, cb func(v Value)) (numBytes uint64) {
   182  	l := col.Len()
   183  	d.PanicIfTrue(startIdx > endIdx || endIdx > l)
   184  	if startIdx == endIdx {
   185  		return
   186  	}
   187  
   188  	leaves, localStart := LoadLeafNodes([]Collection{col}, startIdx, endIdx)
   189  	endIdx = localStart + endIdx - startIdx
   190  	startIdx = localStart
   191  	numValues := 0
   192  	valuesPerIdx := uint64(getValuesPerIdx(col.Kind()))
   193  
   194  	for _, leaf := range leaves {
   195  		seq := leaf.asSequence()
   196  		values := seq.valuesSlice(startIdx, endIdx)
   197  		numValues += len(values)
   198  
   199  		for _, v := range values {
   200  			cb(v)
   201  		}
   202  
   203  		endIdx = endIdx - uint64(len(values))/valuesPerIdx - startIdx
   204  		startIdx = 0
   205  		numBytes += uint64(len(seq.valueBytes())) // note: should really only include |values|
   206  	}
   207  	return
   208  }
   209  
   210  // Iterator returns a ListIterator which can be used to iterate efficiently over a list.
   211  func (l List) Iterator() ListIterator {
   212  	return l.IteratorAt(0)
   213  }
   214  
   215  // IteratorAt returns a ListIterator starting at index. If index is out of bound the iterator will
   216  // have reached its end on creation.
   217  func (l List) IteratorAt(index uint64) ListIterator {
   218  	return ListIterator{
   219  		newCursorAtIndex(l.sequence, index),
   220  	}
   221  }
   222  
   223  // Diff streams the diff from last to the current list to the changes channel. Caller can close
   224  // closeChan to cancel the diff operation.
   225  func (l List) Diff(last List, changes chan<- Splice, closeChan <-chan struct{}) {
   226  	l.DiffWithLimit(last, changes, closeChan, DEFAULT_MAX_SPLICE_MATRIX_SIZE)
   227  }
   228  
   229  // DiffWithLimit streams the diff from last to the current list to the changes channel. Caller can
   230  // close closeChan to cancel the diff operation.
   231  // The maxSpliceMatrixSize determines the how big of an edit distance matrix we are willing to
   232  // compute versus just saying the thing changed.
   233  func (l List) DiffWithLimit(last List, changes chan<- Splice, closeChan <-chan struct{}, maxSpliceMatrixSize uint64) {
   234  	if l.Equals(last) {
   235  		return
   236  	}
   237  	lLen, lastLen := l.Len(), last.Len()
   238  	if lLen == 0 {
   239  		changes <- Splice{0, lastLen, 0, 0} // everything removed
   240  		return
   241  	}
   242  	if lastLen == 0 {
   243  		changes <- Splice{0, 0, lLen, 0} // everything added
   244  		return
   245  	}
   246  
   247  	indexedSequenceDiff(last.sequence, 0, l.sequence, 0, changes, closeChan, maxSpliceMatrixSize)
   248  }
   249  
   250  func (l List) newChunker(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker {
   251  	return newSequenceChunker(cur, 0, vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes)
   252  }
   253  
   254  func makeListLeafChunkFn(vrw ValueReadWriter) makeChunkFn {
   255  	return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64) {
   256  		d.PanicIfFalse(level == 0)
   257  		values := make([]Value, len(items))
   258  
   259  		for i, v := range items {
   260  			values[i] = v.(Value)
   261  		}
   262  
   263  		list := newList(newListLeafSequence(vrw, values...))
   264  		return list, orderedKeyFromInt(len(values)), uint64(len(values))
   265  	}
   266  }
   267  
   268  func newEmptyListSequenceChunker(vrw ValueReadWriter) *sequenceChunker {
   269  	return newEmptySequenceChunker(vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes)
   270  }