github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_net_count_additions.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"bytes"
    16  	"encoding/binary"
    17  	"errors"
    18  	"fmt"
    19  	"os"
    20  	"path/filepath"
    21  	"strings"
    22  
    23  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex"
    24  )
    25  
    26  // ErrInvalidChecksum indicates that the read file should not be trusted. For
    27  // any pre-computed data this is a recoverable issue, as the data can simply be
    28  // re-computed at read-time.
    29  var ErrInvalidChecksum = errors.New("invalid checksum")
    30  
    31  // existOnLowerSegments is a simple function that can be passed at segment
    32  // initialization time to check if any of the keys are truly new or previously
    33  // seen. This can in turn be used to build up the net count additions. The
    34  // reason this is abstract:
    35  type existsOnLowerSegmentsFn func(key []byte) (bool, error)
    36  
    37  func (s *segment) countNetPath() string {
    38  	return countNetPathFromSegmentPath(s.path)
    39  }
    40  
    41  func countNetPathFromSegmentPath(segPath string) string {
    42  	extless := strings.TrimSuffix(segPath, filepath.Ext(segPath))
    43  	return fmt.Sprintf("%s.cna", extless)
    44  }
    45  
    46  func (s *segment) initCountNetAdditions(exists existsOnLowerSegmentsFn, overwrite bool) error {
    47  	if s.strategy != segmentindex.StrategyReplace {
    48  		// replace is the only strategy that supports counting
    49  		return nil
    50  	}
    51  
    52  	path := s.countNetPath()
    53  
    54  	ok, err := fileExists(path)
    55  	if err != nil {
    56  		return err
    57  	}
    58  
    59  	if ok {
    60  		if overwrite {
    61  			err := os.Remove(path)
    62  			if err != nil {
    63  				return fmt.Errorf("delete existing net additions counter %s: %w", path, err)
    64  			}
    65  		} else {
    66  			err = s.loadCountNetFromDisk()
    67  			if err == nil {
    68  				return nil
    69  			}
    70  
    71  			if !errors.Is(err, ErrInvalidChecksum) {
    72  				// not a recoverable error
    73  				return err
    74  			}
    75  
    76  			// now continue re-calculating
    77  		}
    78  	}
    79  
    80  	var lastErr error
    81  	countNet := 0
    82  	cb := func(key []byte, tombstone bool) {
    83  		existedOnPrior, err := exists(key)
    84  		if err != nil {
    85  			lastErr = err
    86  		}
    87  
    88  		if tombstone && existedOnPrior {
    89  			countNet--
    90  		}
    91  
    92  		if !tombstone && !existedOnPrior {
    93  			countNet++
    94  		}
    95  	}
    96  
    97  	extr := newBufferedKeyAndTombstoneExtractor(s.contents, s.dataStartPos,
    98  		s.dataEndPos, 10e6, s.secondaryIndexCount, cb)
    99  
   100  	extr.do()
   101  
   102  	s.countNetAdditions = countNet
   103  
   104  	if lastErr != nil {
   105  		return lastErr
   106  	}
   107  
   108  	if err := s.storeCountNetOnDisk(); err != nil {
   109  		return fmt.Errorf("store count net additions on disk: %w", err)
   110  	}
   111  
   112  	return nil
   113  }
   114  
   115  func (s *segment) storeCountNetOnDisk() error {
   116  	return storeCountNetOnDisk(s.countNetPath(), s.countNetAdditions)
   117  }
   118  
   119  func storeCountNetOnDisk(path string, value int) error {
   120  	buf := new(bytes.Buffer)
   121  
   122  	if err := binary.Write(buf, binary.LittleEndian, uint64(value)); err != nil {
   123  		return fmt.Errorf("write cna to buf: %w", err)
   124  	}
   125  
   126  	return writeWithChecksum(buf.Bytes(), path)
   127  }
   128  
   129  func (s *segment) loadCountNetFromDisk() error {
   130  	data, err := loadWithChecksum(s.countNetPath(), 12)
   131  	if err != nil {
   132  		return err
   133  	}
   134  
   135  	s.countNetAdditions = int(binary.LittleEndian.Uint64(data[0:8]))
   136  
   137  	return nil
   138  }
   139  
   140  func (s *segment) precomputeCountNetAdditions(updatedCountNetAdditions int) ([]string, error) {
   141  	if s.strategy != segmentindex.StrategyReplace {
   142  		// only "replace" has count net additions, so we are done
   143  		return []string{}, nil
   144  	}
   145  
   146  	cnaPath := fmt.Sprintf("%s.tmp", s.countNetPath())
   147  	if err := storeCountNetOnDisk(cnaPath, updatedCountNetAdditions); err != nil {
   148  		return nil, err
   149  	}
   150  	return []string{cnaPath}, nil
   151  }