github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/stats.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package tree
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  
    21  	"github.com/dolthub/dolt/go/store/hash"
    22  )
    23  
    24  type chunkDiff struct {
    25  	from []hash.Hash
    26  	to   []hash.Hash
    27  }
    28  
    29  // DiffChunksAtLevel returns a list of chunk diffs between two maps at a
    30  // specific level.
    31  func DiffChunksAtLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, level uint16, from, to StaticMap[K, V, O]) ([]chunkDiff, error) {
    32  	if from.Root.level < level || to.Root.level < level {
    33  		// |to| < level is a valid state, but  should have happened before calling
    34  		return nil, fmt.Errorf("level %d invalid for from height: %d, %d", level, from.Root.level, to.Root.level)
    35  	}
    36  	fromNode := from.Root
    37  	var err error
    38  	for fromNode.level > level {
    39  		fromNode, err = fetchChild(ctx, from.NodeStore, fromNode.getAddress(0))
    40  		if err != nil {
    41  			return nil, err
    42  		}
    43  	}
    44  
    45  	toNode := to.Root
    46  	for toNode.level > level {
    47  		toNode, err = fetchChild(ctx, to.NodeStore, toNode.getAddress(0))
    48  		if err != nil {
    49  			return nil, err
    50  		}
    51  	}
    52  
    53  	var diffs []chunkDiff
    54  	i := 0
    55  	j := 0
    56  	for i < fromNode.Count() && j < toNode.Count() {
    57  		fromAddr := fromNode.getAddress(i)
    58  		toAddr := toNode.getAddress(j)
    59  		if toAddr == fromAddr {
    60  			// same
    61  			i++
    62  			j++
    63  			continue
    64  		}
    65  
    66  		f := fromNode.GetKey(i)
    67  		t := toNode.GetKey(j)
    68  		cmp := from.Order.Compare(K(f), K(t))
    69  		if cmp == 0 {
    70  			// replace from->to
    71  			diffs = append(diffs, chunkDiff{from: []hash.Hash{fromAddr}, to: []hash.Hash{toAddr}})
    72  			i++
    73  			j++
    74  			continue
    75  		} else {
    76  			startI := i
    77  			startJ := j
    78  			for fromAddr != toAddr && cmp != 0 {
    79  				if cmp < 0 {
    80  					i++
    81  					fromAddr = fromNode.getAddress(i)
    82  				} else {
    83  					j++
    84  					toAddr = toNode.getAddress(j)
    85  				}
    86  				f = fromNode.GetKey(i)
    87  				t = toNode.GetKey(j)
    88  				cmp = from.Order.Compare(K(f), K(t))
    89  			}
    90  			// either addrs equal, or keys synced
    91  			var newChunkDiff chunkDiff
    92  			for k := startI; k < i; k++ {
    93  				newChunkDiff.from = append(newChunkDiff.from, fromNode.getAddress(k))
    94  			}
    95  			for k := startJ; k < j; k++ {
    96  				newChunkDiff.to = append(newChunkDiff.to, toNode.getAddress(k))
    97  			}
    98  			diffs = append(diffs, newChunkDiff)
    99  		}
   100  	}
   101  
   102  	if i == fromNode.Count() && j < toNode.Count() {
   103  		return diffs, nil
   104  	}
   105  
   106  	var newChunkDiff chunkDiff
   107  	for i < fromNode.Count() {
   108  		// deleted nodes
   109  		newChunkDiff.from = append(newChunkDiff.from, fromNode.getAddress(i))
   110  		i++
   111  
   112  	}
   113  	for j < toNode.Count() {
   114  		// added nodes
   115  		newChunkDiff.to = append(newChunkDiff.to, toNode.getAddress(i))
   116  		j++
   117  	}
   118  	diffs = append(diffs, newChunkDiff)
   119  	return diffs, nil
   120  }
   121  
   122  func GetChunksAtLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m StaticMap[K, V, O], level int) ([]hash.Hash, error) {
   123  	n := m.Root
   124  	var err error
   125  	for n.Level() > level {
   126  		n, err = fetchChild(ctx, m.NodeStore, n.getAddress(0))
   127  		if err != nil {
   128  			return nil, err
   129  		}
   130  	}
   131  
   132  	// get chunks at this level
   133  	var ret []hash.Hash
   134  	i := 0
   135  	for i < n.Count() {
   136  		ret = append(ret, n.getAddress(i))
   137  	}
   138  	return ret, nil
   139  }
   140  
   141  // GetHistogramLevel returns the highest internal level of the tree that has
   142  // more than |low| addresses.
   143  func GetHistogramLevel[K, V ~[]byte, O Ordering[K]](ctx context.Context, m StaticMap[K, V, O], low int) ([]Node, error) {
   144  	currentLevel := []Node{m.Root}
   145  	level := m.Root.Level()
   146  	for len(currentLevel) < low && level > 0 {
   147  		var nextLevel []Node
   148  		for _, node := range currentLevel {
   149  			for i := 0; i < node.Count(); i++ {
   150  				child, err := fetchChild(ctx, m.NodeStore, node.getAddress(i))
   151  				if err != nil {
   152  					return nil, err
   153  				}
   154  				nextLevel = append(nextLevel, child)
   155  			}
   156  		}
   157  		currentLevel = nextLevel
   158  		level--
   159  	}
   160  	return currentLevel, nil
   161  }