github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/types/map.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  package types
    23  
    24  import (
    25  	"context"
    26  	"errors"
    27  	"fmt"
    28  	"io"
    29  
    30  	"golang.org/x/sync/errgroup"
    31  
    32  	"github.com/dolthub/dolt/go/store/d"
    33  )
    34  
    35  type ValueInRange func(Value) (bool, error)
    36  
    37  var ErrKeysNotOrdered = errors.New("streaming map keys not ordered")
    38  
    39  var EmptyMap Map
    40  
    41  type Map struct {
    42  	orderedSequence
    43  }
    44  
    45  func newMap(seq orderedSequence) Map {
    46  	return Map{seq}
    47  }
    48  
    49  func mapHashValueBytes(item sequenceItem, rv *rollingValueHasher) error {
    50  	entry := item.(mapEntry)
    51  	err := hashValueBytes(entry.key, rv)
    52  
    53  	if err != nil {
    54  		return err
    55  	}
    56  
    57  	err = hashValueBytes(entry.value, rv)
    58  
    59  	if err != nil {
    60  		return err
    61  	}
    62  
    63  	return nil
    64  }
    65  
    66  func NewMap(ctx context.Context, vrw ValueReadWriter, kv ...Value) (Map, error) {
    67  	entries, err := buildMapData(vrw.Format(), kv)
    68  
    69  	if err != nil {
    70  		return EmptyMap, err
    71  	}
    72  
    73  	ch, err := newEmptyMapSequenceChunker(ctx, vrw)
    74  
    75  	if err != nil {
    76  		return EmptyMap, err
    77  	}
    78  
    79  	for _, entry := range entries.entries {
    80  		_, err := ch.Append(ctx, entry)
    81  
    82  		if err != nil {
    83  			return EmptyMap, err
    84  		}
    85  	}
    86  
    87  	seq, err := ch.Done(ctx)
    88  
    89  	if err != nil {
    90  		return EmptyMap, err
    91  	}
    92  
    93  	return newMap(seq.(orderedSequence)), nil
    94  }
    95  
    96  // NewStreamingMap takes an input channel of values and returns a value that
    97  // will produce a finished Map when |.Wait()| is called.  Values sent to the
    98  // input channel must be alternating keys and values. (e.g.  k1, v1, k2,
    99  // v2...). Moreover keys need to be added to the channel in Noms sortorder,
   100  // adding key values to the input channel out of order will result in an error.
   101  // Once the input channel is closed by the caller, a finished Map will be
   102  // available from the |Wait| call.
   103  //
   104  // See graph_builder.go for building collections with values that are not in
   105  // order.
   106  func NewStreamingMap(ctx context.Context, vrw ValueReadWriter, kvs <-chan Value) *StreamingMap {
   107  	d.PanicIfTrue(vrw == nil)
   108  	sm := &StreamingMap{}
   109  	sm.eg, sm.egCtx = errgroup.WithContext(context.TODO())
   110  	sm.eg.Go(func() error {
   111  		m, err := readMapInput(sm.egCtx, vrw, kvs)
   112  		sm.m = m
   113  		return err
   114  	})
   115  	return sm
   116  }
   117  
   118  type StreamingMap struct {
   119  	eg    *errgroup.Group
   120  	egCtx context.Context
   121  	m     Map
   122  }
   123  
   124  func (sm *StreamingMap) Wait() (Map, error) {
   125  	err := sm.eg.Wait()
   126  	return sm.m, err
   127  }
   128  
   129  // Done returns a signal channel which is closed once the StreamingMap is no
   130  // longer reading from the key/values channel. A send to the key/value channel
   131  // should be in a select with a read from this channel to ensure that the send
   132  // does not deadlock.
   133  func (sm *StreamingMap) Done() <-chan struct{} {
   134  	return sm.egCtx.Done()
   135  }
   136  
   137  func readMapInput(ctx context.Context, vrw ValueReadWriter, kvs <-chan Value) (Map, error) {
   138  	ch, err := newEmptyMapSequenceChunker(ctx, vrw)
   139  	if err != nil {
   140  		return EmptyMap, err
   141  	}
   142  
   143  	var lastK Value
   144  	nextIsKey := true
   145  	var k Value
   146  LOOP:
   147  	for {
   148  		select {
   149  		case v, ok := <-kvs:
   150  			if !ok {
   151  				break LOOP
   152  			}
   153  			if nextIsKey {
   154  				k = v
   155  
   156  				if lastK != nil {
   157  					isLess, err := lastK.Less(vrw.Format(), k)
   158  					if err != nil {
   159  						return EmptyMap, err
   160  					}
   161  					if !isLess {
   162  						return EmptyMap, ErrKeysNotOrdered
   163  					}
   164  				}
   165  				lastK = k
   166  				nextIsKey = false
   167  			} else {
   168  				_, err := ch.Append(ctx, mapEntry{key: k, value: v})
   169  				if err != nil {
   170  					return EmptyMap, err
   171  				}
   172  
   173  				nextIsKey = true
   174  			}
   175  		case <-ctx.Done():
   176  			return EmptyMap, ctx.Err()
   177  		}
   178  	}
   179  
   180  	seq, err := ch.Done(ctx)
   181  	if err != nil {
   182  		return EmptyMap, err
   183  	}
   184  
   185  	return newMap(seq.(orderedSequence)), nil
   186  }
   187  
   188  // Diff computes the diff from |last| to |m| using the top-down algorithm,
   189  // which completes as fast as possible while taking longer to return early
   190  // results than left-to-right.
   191  func (m Map) Diff(ctx context.Context, last Map, changes chan<- ValueChanged) error {
   192  	if m.Equals(last) {
   193  		return nil
   194  	}
   195  	return orderedSequenceDiffLeftRight(ctx, last.orderedSequence, m.orderedSequence, changes)
   196  }
   197  
   198  // DiffLeftRight computes the diff from |last| to |m| using a left-to-right
   199  // streaming approach, optimised for returning results early, but not
   200  // completing quickly.
   201  func (m Map) DiffLeftRight(ctx context.Context, last Map, changes chan<- ValueChanged) error {
   202  	trueFunc := func(Value) (bool, error) {
   203  		return true, nil
   204  	}
   205  	return m.DiffLeftRightInRange(ctx, last, nil, trueFunc, changes)
   206  }
   207  
   208  func (m Map) DiffLeftRightInRange(ctx context.Context, last Map, start Value, inRange ValueInRange, changes chan<- ValueChanged) error {
   209  	if m.Equals(last) {
   210  		return nil
   211  	}
   212  
   213  	startKey := emptyKey
   214  	if !IsNull(start) {
   215  		var err error
   216  		startKey, err = newOrderedKey(start, m.Format())
   217  
   218  		if err != nil {
   219  			return err
   220  		}
   221  	}
   222  
   223  	return orderedSequenceDiffLeftRightInRange(ctx, last.orderedSequence, m.orderedSequence, startKey, inRange, changes)
   224  }
   225  
   226  // Collection interface
   227  
   228  func (m Map) asSequence() sequence {
   229  	return m.orderedSequence
   230  }
   231  
   232  // Value interface
   233  func (m Map) Value(ctx context.Context) (Value, error) {
   234  	return m, nil
   235  }
   236  
   237  func (m Map) WalkValues(ctx context.Context, cb ValueCallback) error {
   238  	err := iterAll(ctx, m, func(v Value, idx uint64) error {
   239  		return cb(v)
   240  	})
   241  
   242  	return err
   243  }
   244  
   245  func (m Map) firstOrLast(ctx context.Context, last bool) (Value, Value, error) {
   246  	cur, err := newCursorAt(ctx, m.orderedSequence, emptyKey, false, last)
   247  
   248  	if err != nil {
   249  		return nil, nil, err
   250  	}
   251  
   252  	if !cur.valid() {
   253  		return nil, nil, nil
   254  	}
   255  
   256  	currItem, err := cur.current()
   257  
   258  	if err != nil {
   259  		return nil, nil, err
   260  	}
   261  
   262  	entry := currItem.(mapEntry)
   263  	return entry.key, entry.value, nil
   264  }
   265  
   266  func (m Map) Format() *NomsBinFormat {
   267  	return m.format()
   268  }
   269  
   270  func (m Map) First(ctx context.Context) (Value, Value, error) {
   271  	return m.firstOrLast(ctx, false)
   272  }
   273  
   274  func (m Map) Last(ctx context.Context) (Value, Value, error) {
   275  	return m.firstOrLast(ctx, true)
   276  }
   277  
   278  func (m Map) At(ctx context.Context, idx uint64) (key, value Value, err error) {
   279  	if idx >= m.Len() {
   280  		panic(fmt.Errorf("out of bounds: %d >= %d", idx, m.Len()))
   281  	}
   282  
   283  	cur, err := newSequenceIteratorAtIndex(ctx, m.orderedSequence, idx)
   284  
   285  	if err != nil {
   286  		return nil, nil, err
   287  	}
   288  
   289  	item, err := cur.current()
   290  
   291  	if err != nil {
   292  		return nil, nil, err
   293  	}
   294  
   295  	entry := item.(mapEntry)
   296  	return entry.key, entry.value, nil
   297  }
   298  
   299  func (m Map) MaybeGet(ctx context.Context, key Value) (v Value, ok bool, err error) {
   300  	cur, err := newCursorAtValue(ctx, m.orderedSequence, key, false, false)
   301  
   302  	if err != nil {
   303  		return nil, false, err
   304  	}
   305  
   306  	if !cur.valid() {
   307  		return nil, false, nil
   308  	}
   309  
   310  	item, err := cur.current()
   311  
   312  	if err != nil {
   313  		return nil, false, err
   314  	}
   315  
   316  	entry := item.(mapEntry)
   317  
   318  	if !entry.key.Equals(key) {
   319  		return nil, false, nil
   320  	}
   321  
   322  	return entry.value, true, nil
   323  }
   324  
   325  func (m Map) MaybeGetTuple(ctx context.Context, key Tuple) (v Tuple, ok bool, err error) {
   326  	var val Value
   327  	val, ok, err = m.MaybeGet(ctx, key)
   328  
   329  	if val != nil {
   330  		return val.(Tuple), ok, err
   331  	}
   332  
   333  	return Tuple{}, ok, err
   334  }
   335  
   336  func (m Map) Has(ctx context.Context, key Value) (bool, error) {
   337  	cur, err := newCursorAtValue(ctx, m.orderedSequence, key, false, false)
   338  
   339  	if err != nil {
   340  		return false, err
   341  	}
   342  
   343  	if !cur.valid() {
   344  		return false, nil
   345  	}
   346  
   347  	item, err := cur.current()
   348  
   349  	if err != nil {
   350  		return false, err
   351  	}
   352  
   353  	entry := item.(mapEntry)
   354  	return entry.key.Equals(key), nil
   355  }
   356  
   357  type mapIterCallback func(key, value Value) (stop bool, err error)
   358  
   359  func (m Map) Iter(ctx context.Context, cb mapIterCallback) error {
   360  	cur, err := newCursorAt(ctx, m.orderedSequence, emptyKey, false, false)
   361  
   362  	if err != nil {
   363  		return err
   364  	}
   365  
   366  	return cur.iter(ctx, func(v interface{}) (bool, error) {
   367  		entry := v.(mapEntry)
   368  		return cb(entry.key, entry.value)
   369  	})
   370  }
   371  
   372  // Any returns true if cb() return true for any of the items in the map.
   373  func (m Map) Any(ctx context.Context, cb func(k, v Value) bool) (yep bool, err error) {
   374  	err = m.Iter(ctx, func(k, v Value) (bool, error) {
   375  		if cb(k, v) {
   376  			yep = true
   377  			return true, nil
   378  		}
   379  		return false, nil
   380  	})
   381  
   382  	return yep, err
   383  }
   384  
   385  func (m Map) isPrimitive() bool {
   386  	return false
   387  }
   388  
   389  func (m Map) Iterator(ctx context.Context) (MapIterator, error) {
   390  	return m.IteratorAt(ctx, 0)
   391  }
   392  
   393  func (m Map) IteratorAt(ctx context.Context, pos uint64) (MapIterator, error) {
   394  	cur, err := newSequenceIteratorAtIndex(ctx, m.orderedSequence, pos)
   395  
   396  	if err != nil {
   397  		return nil, err
   398  	}
   399  
   400  	return &mapIterator{
   401  		sequenceIter: cur,
   402  	}, nil
   403  }
   404  
   405  func (m Map) BufferedIterator(ctx context.Context) (MapIterator, error) {
   406  	return m.BufferedIteratorAt(ctx, 0)
   407  }
   408  
   409  func (m Map) BufferedIteratorAt(ctx context.Context, pos uint64) (MapIterator, error) {
   410  	bufCur, err := newBufferedIteratorAtIndex(ctx, m.orderedSequence, pos)
   411  
   412  	if err != nil {
   413  		return nil, err
   414  	}
   415  
   416  	return &mapIterator{
   417  		sequenceIter: bufCur,
   418  	}, nil
   419  }
   420  
   421  func (m Map) IteratorFrom(ctx context.Context, key Value) (MapIterator, error) {
   422  	cur, err := newCursorAtValue(ctx, m.orderedSequence, key, false, false)
   423  
   424  	if err != nil {
   425  		return nil, err
   426  	}
   427  
   428  	return &mapIterator{sequenceIter: cur}, nil
   429  }
   430  
   431  func (m Map) IteratorBackFrom(ctx context.Context, key Value) (MapIterator, error) {
   432  	cur, err := newCursorBackFromValue(ctx, m.orderedSequence, key)
   433  
   434  	if err != nil {
   435  		return nil, err
   436  	}
   437  
   438  	return &mapIterator{sequenceIter: cur}, nil
   439  }
   440  
   441  type mapIterAllCallback func(key, value Value) error
   442  
   443  func (m Map) IterAll(ctx context.Context, cb mapIterAllCallback) error {
   444  	var k Value
   445  	err := iterAll(ctx, m, func(v Value, _ uint64) error {
   446  		if k != nil {
   447  			err := cb(k, v)
   448  
   449  			if err != nil {
   450  				return err
   451  			}
   452  
   453  			k = nil
   454  		} else {
   455  			k = v
   456  		}
   457  
   458  		return nil
   459  	})
   460  
   461  	if err != nil {
   462  		return err
   463  	}
   464  
   465  	d.PanicIfFalse(k == nil)
   466  	return nil
   467  }
   468  
   469  func (m Map) IterRange(ctx context.Context, startIdx, endIdx uint64, cb mapIterAllCallback) error {
   470  	var k Value
   471  	_, err := iterRange(ctx, m, startIdx, endIdx, func(v Value) error {
   472  		if k != nil {
   473  			err := cb(k, v)
   474  
   475  			if err != nil {
   476  				return err
   477  			}
   478  
   479  			k = nil
   480  		} else {
   481  			k = v
   482  		}
   483  
   484  		return nil
   485  	})
   486  
   487  	if err != nil {
   488  		return err
   489  	}
   490  
   491  	d.PanicIfFalse(k == nil)
   492  	return nil
   493  }
   494  
   495  func (m Map) IterFrom(ctx context.Context, start Value, cb mapIterCallback) error {
   496  	cur, err := newCursorAtValue(ctx, m.orderedSequence, start, false, false)
   497  
   498  	if err != nil {
   499  		return err
   500  	}
   501  
   502  	return cur.iter(ctx, func(v interface{}) (bool, error) {
   503  		entry := v.(mapEntry)
   504  		return cb(entry.key, entry.value)
   505  	})
   506  }
   507  
   508  func (m Map) Edit() *MapEditor {
   509  	return NewMapEditor(m)
   510  }
   511  
   512  func buildMapData(nbf *NomsBinFormat, values []Value) (mapEntrySlice, error) {
   513  	if len(values) == 0 {
   514  		return mapEntrySlice{}, nil
   515  	}
   516  
   517  	if len(values)%2 != 0 {
   518  		d.Panic("Must specify even number of key/value pairs")
   519  	}
   520  	kvs := mapEntrySlice{
   521  		make([]mapEntry, len(values)/2),
   522  		nbf,
   523  	}
   524  
   525  	for i := 0; i < len(values); i += 2 {
   526  		d.PanicIfTrue(values[i] == nil)
   527  		d.PanicIfTrue(values[i+1] == nil)
   528  		entry := mapEntry{values[i], values[i+1]}
   529  		kvs.entries[i/2] = entry
   530  	}
   531  
   532  	uniqueSorted := mapEntrySlice{
   533  		make([]mapEntry, 0, len(kvs.entries)),
   534  		nbf,
   535  	}
   536  
   537  	err := SortWithErroringLess(kvs)
   538  
   539  	if err != nil {
   540  		return mapEntrySlice{}, err
   541  	}
   542  
   543  	last := kvs.entries[0]
   544  	for i := 1; i < kvs.Len(); i++ {
   545  		kv := kvs.entries[i]
   546  		if !kv.key.Equals(last.key) {
   547  			uniqueSorted.entries = append(uniqueSorted.entries, last)
   548  		}
   549  
   550  		last = kv
   551  	}
   552  
   553  	return mapEntrySlice{
   554  		append(uniqueSorted.entries, last),
   555  		uniqueSorted.nbf,
   556  	}, nil
   557  }
   558  
   559  func makeMapLeafChunkFn(vrw ValueReadWriter) makeChunkFn {
   560  	return func(level uint64, items []sequenceItem) (Collection, orderedKey, uint64, error) {
   561  		d.PanicIfFalse(level == 0)
   562  		mapData := make([]mapEntry, len(items))
   563  
   564  		var lastKey Value
   565  		for i, v := range items {
   566  			entry := v.(mapEntry)
   567  
   568  			if lastKey != nil {
   569  				isLess, err := lastKey.Less(vrw.Format(), entry.key)
   570  
   571  				if err != nil {
   572  					return nil, orderedKey{}, 0, err
   573  				}
   574  
   575  				d.PanicIfFalse(isLess)
   576  			}
   577  
   578  			lastKey = entry.key
   579  			mapData[i] = entry
   580  		}
   581  
   582  		seq, err := newMapLeafSequence(vrw, mapData...)
   583  
   584  		if err != nil {
   585  			return nil, orderedKey{}, 0, err
   586  		}
   587  
   588  		m := newMap(seq)
   589  		var key orderedKey
   590  		if len(mapData) > 0 {
   591  			key, err = newOrderedKey(mapData[len(mapData)-1].key, vrw.Format())
   592  
   593  			if err != nil {
   594  				return nil, orderedKey{}, 0, err
   595  			}
   596  		}
   597  
   598  		return m, key, uint64(len(items)), nil
   599  	}
   600  }
   601  
   602  func newEmptyMapSequenceChunker(ctx context.Context, vrw ValueReadWriter) (*sequenceChunker, error) {
   603  	return newEmptySequenceChunker(ctx, vrw, makeMapLeafChunkFn(vrw), newOrderedMetaSequenceChunkFn(MapKind, vrw), mapHashValueBytes)
   604  }
   605  
   606  func (m Map) readFrom(nbf *NomsBinFormat, b *binaryNomsReader) (Value, error) {
   607  	panic("unreachable")
   608  }
   609  
   610  func (m Map) skip(nbf *NomsBinFormat, b *binaryNomsReader) {
   611  	panic("unreachable")
   612  }
   613  
   614  func (m Map) String() string {
   615  	panic("unreachable")
   616  }
   617  
   618  func (m Map) HumanReadableString() string {
   619  	panic("unreachable")
   620  }
   621  
   622  // VisitMapLevelOrder writes hashes of internal node chunks to a writer
   623  // delimited with a newline character and returns the number or chunks written and the total number of
   624  // bytes written or an error if encountered
   625  func VisitMapLevelOrder(w io.Writer, m Map) (int64, int64, error) {
   626  	chunkCount := int64(0)
   627  	byteCount := int64(0)
   628  
   629  	curLevel := []Map{m}
   630  	for len(curLevel) > 0 {
   631  		nextLevel := []Map{}
   632  		for _, m := range curLevel {
   633  			if metaSeq, ok := m.orderedSequence.(metaSequence); ok {
   634  				ts, err := metaSeq.tuples()
   635  				if err != nil {
   636  					return 0, 0, err
   637  				}
   638  				for _, t := range ts {
   639  					r, err := t.ref()
   640  					if err != nil {
   641  						return 0, 0, err
   642  					}
   643  
   644  					p := []byte(r.TargetHash().String() + "\n")
   645  
   646  					n, err := w.Write(p)
   647  					if err != nil {
   648  						return 0, 0, err
   649  					}
   650  
   651  					chunkCount++
   652  					byteCount += int64(n)
   653  
   654  					v, err := r.TargetValue(context.Background(), m.valueReadWriter())
   655  					if err != nil {
   656  						return 0, 0, err
   657  					}
   658  
   659  					nextLevel = append(nextLevel, v.(Map))
   660  				}
   661  			} else if _, ok := m.orderedSequence.(mapLeafSequence); ok {
   662  
   663  			}
   664  		}
   665  		curLevel = nextLevel
   666  	}
   667  
   668  	return chunkCount, byteCount, nil
   669  }