github.com/jbendotnet/noms@v0.0.0-20190904222105-c43e4293ea92/go/types/list.go (about) 1 // Copyright 2016 Attic Labs, Inc. All rights reserved. 2 // Licensed under the Apache License, version 2.0: 3 // http://www.apache.org/licenses/LICENSE-2.0 4 5 package types 6 7 import ( 8 "sync/atomic" 9 10 "github.com/attic-labs/noms/go/d" 11 ) 12 13 // List represents a list or an array of Noms values. A list can contain zero or more values of zero 14 // or more types. The type of the list will reflect the type of the elements in the list. For 15 // example: 16 // 17 // l := NewList(Number(1), Bool(true)) 18 // fmt.Println(l.Type().Describe()) 19 // // outputs List<Bool | Number> 20 // 21 // Lists, like all Noms values are immutable so the "mutation" methods return a new list. 22 type List struct { 23 sequence 24 } 25 26 func newList(seq sequence) List { 27 return List{seq} 28 } 29 30 // NewList creates a new List where the type is computed from the elements in the list, populated 31 // with values, chunking if and when needed. 32 func NewList(vrw ValueReadWriter, values ...Value) List { 33 ch := newEmptyListSequenceChunker(vrw) 34 for _, v := range values { 35 ch.Append(v) 36 } 37 return newList(ch.Done()) 38 } 39 40 // NewStreamingList creates a new List, populated with values, chunking if and when needed. As 41 // chunks are created, they're written to vrw -- including the root chunk of the list. Once the 42 // caller has closed values, the caller can read the completed List from the returned channel. 43 func NewStreamingList(vrw ValueReadWriter, values <-chan Value) <-chan List { 44 out := make(chan List, 1) 45 go func() { 46 defer close(out) 47 ch := newEmptyListSequenceChunker(vrw) 48 for v := range values { 49 ch.Append(v) 50 } 51 out <- newList(ch.Done()) 52 }() 53 return out 54 } 55 56 func (l List) Edit() *ListEditor { 57 return NewListEditor(l) 58 } 59 60 // Collection interface 61 62 func (l List) asSequence() sequence { 63 return l.sequence 64 } 65 66 // Value interface 67 func (l List) Value() Value { 68 return l 69 } 70 71 func (l List) WalkValues(cb ValueCallback) { 72 iterAll(l, func(v Value, idx uint64) { 73 cb(v) 74 }) 75 } 76 77 // Get returns the value at the given index. If this list has been chunked then this will have to 78 // descend into the prolly-tree which leads to Get being O(depth). 79 func (l List) Get(idx uint64) Value { 80 d.PanicIfFalse(idx < l.Len()) 81 cur := newCursorAtIndex(l.sequence, idx) 82 return cur.current().(Value) 83 } 84 85 // Concat returns a new List comprised of this joined with other. It only needs 86 // to visit the rightmost prolly tree chunks of this List, and the leftmost 87 // prolly tree chunks of other, so it's efficient. 88 func (l List) Concat(other List) List { 89 seq := concat(l.sequence, other.sequence, func(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker { 90 return l.newChunker(cur, vrw) 91 }) 92 return newList(seq) 93 } 94 95 // Iter iterates over the list and calls f for every element in the list. If f returns true then the 96 // iteration stops. 97 func (l List) Iter(f func(v Value, index uint64) (stop bool)) { 98 idx := uint64(0) 99 cur := newCursorAtIndex(l.sequence, idx) 100 cur.iter(func(v interface{}) bool { 101 if f(v.(Value), uint64(idx)) { 102 return true 103 } 104 idx++ 105 return false 106 }) 107 } 108 109 func (l List) IterRange(startIdx, endIdx uint64, f func(v Value, idx uint64)) { 110 idx := uint64(startIdx) 111 cb := func(v Value) { 112 f(v, idx) 113 idx++ 114 } 115 iterRange(l, startIdx, endIdx, cb) 116 } 117 118 // IterAll iterates over the list and calls f for every element in the list. Unlike Iter there is no 119 // way to stop the iteration and all elements are visited. 120 func (l List) IterAll(f func(v Value, index uint64)) { 121 iterAll(l, f) 122 } 123 124 func iterAll(col Collection, f func(v Value, index uint64)) { 125 concurrency := 6 126 vcChan := make(chan chan Value, concurrency) 127 128 // Target reading data in |targetBatchBytes| per thread. We don't know how 129 // many bytes each value is, so update |estimatedNumValues| as data is read. 130 targetBatchBytes := 1 << 23 // 8MB 131 estimatedNumValues := uint64(1000) 132 133 go func() { 134 for idx, l := uint64(0), col.Len(); idx < l; { 135 numValues := atomic.LoadUint64(&estimatedNumValues) 136 137 start := idx 138 blockLength := l - start 139 if blockLength > numValues { 140 blockLength = numValues 141 } 142 idx += blockLength 143 144 vc := make(chan Value) 145 vcChan <- vc 146 147 go func() { 148 numBytes := iterRange(col, start, start+blockLength, func(v Value) { 149 vc <- v 150 }) 151 close(vc) 152 153 // Adjust the estimated number of values to try to read 154 // |targetBatchBytes| next time. 155 if numValues == blockLength { 156 scale := float64(targetBatchBytes) / float64(numBytes) 157 atomic.StoreUint64(&estimatedNumValues, uint64(float64(numValues)*scale)) 158 } 159 }() 160 } 161 close(vcChan) 162 }() 163 164 // Ensure read-ahead goroutines can exit, because the `range` below might not 165 // finish if an |f| callback panics. 166 defer func() { 167 for vc := range vcChan { 168 close(vc) 169 } 170 }() 171 172 i := uint64(0) 173 for vc := range vcChan { 174 for v := range vc { 175 f(v, i) 176 i++ 177 } 178 } 179 } 180 181 func iterRange(col Collection, startIdx, endIdx uint64, cb func(v Value)) (numBytes uint64) { 182 l := col.Len() 183 d.PanicIfTrue(startIdx > endIdx || endIdx > l) 184 if startIdx == endIdx { 185 return 186 } 187 188 leaves, localStart := LoadLeafNodes([]Collection{col}, startIdx, endIdx) 189 endIdx = localStart + endIdx - startIdx 190 startIdx = localStart 191 numValues := 0 192 valuesPerIdx := uint64(getValuesPerIdx(col.Kind())) 193 194 for _, leaf := range leaves { 195 seq := leaf.asSequence() 196 values := seq.valuesSlice(startIdx, endIdx) 197 numValues += len(values) 198 199 for _, v := range values { 200 cb(v) 201 } 202 203 endIdx = endIdx - uint64(len(values))/valuesPerIdx - startIdx 204 startIdx = 0 205 numBytes += uint64(len(seq.valueBytes())) // note: should really only include |values| 206 } 207 return 208 } 209 210 // Iterator returns a ListIterator which can be used to iterate efficiently over a list. 211 func (l List) Iterator() ListIterator { 212 return l.IteratorAt(0) 213 } 214 215 // IteratorAt returns a ListIterator starting at index. If index is out of bound the iterator will 216 // have reached its end on creation. 217 func (l List) IteratorAt(index uint64) ListIterator { 218 return ListIterator{ 219 newCursorAtIndex(l.sequence, index), 220 } 221 } 222 223 // Diff streams the diff from last to the current list to the changes channel. Caller can close 224 // closeChan to cancel the diff operation. 225 func (l List) Diff(last List, changes chan<- Splice, closeChan <-chan struct{}) { 226 l.DiffWithLimit(last, changes, closeChan, DEFAULT_MAX_SPLICE_MATRIX_SIZE) 227 } 228 229 // DiffWithLimit streams the diff from last to the current list to the changes channel. Caller can 230 // close closeChan to cancel the diff operation. 231 // The maxSpliceMatrixSize determines the how big of an edit distance matrix we are willing to 232 // compute versus just saying the thing changed. 233 func (l List) DiffWithLimit(last List, changes chan<- Splice, closeChan <-chan struct{}, maxSpliceMatrixSize uint64) { 234 if l.Equals(last) { 235 return 236 } 237 lLen, lastLen := l.Len(), last.Len() 238 if lLen == 0 { 239 changes <- Splice{0, lastLen, 0, 0} // everything removed 240 return 241 } 242 if lastLen == 0 { 243 changes <- Splice{0, 0, lLen, 0} // everything added 244 return 245 } 246 247 indexedSequenceDiff(last.sequence, 0, l.sequence, 0, changes, closeChan, maxSpliceMatrixSize) 248 } 249 250 func (l List) newChunker(cur *sequenceCursor, vrw ValueReadWriter) *sequenceChunker { 251 return newSequenceChunker(cur, 0, vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes) 252 } 253 254 func makeListLeafChunkFn(vrw ValueReadWriter) makeChunkFn { 255 return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64) { 256 d.PanicIfFalse(level == 0) 257 values := make([]Value, len(items)) 258 259 for i, v := range items { 260 values[i] = v.(Value) 261 } 262 263 list := newList(newListLeafSequence(vrw, values...)) 264 return list, orderedKeyFromInt(len(values)), uint64(len(values)) 265 } 266 } 267 268 func newEmptyListSequenceChunker(vrw ValueReadWriter) *sequenceChunker { 269 return newEmptySequenceChunker(vrw, makeListLeafChunkFn(vrw), newIndexedMetaSequenceChunkFn(ListKind, vrw), hashValueBytes) 270 }