github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/segment_collection_strategy.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package lsmkv
    13  
    14  import (
    15  	"encoding/binary"
    16  	"fmt"
    17  
    18  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv/segmentindex"
    19  	"github.com/weaviate/weaviate/entities/lsmkv"
    20  )
    21  
    22  func (s *segment) getCollection(key []byte) ([]value, error) {
    23  	if s.strategy != segmentindex.StrategySetCollection &&
    24  		s.strategy != segmentindex.StrategyMapCollection {
    25  		return nil, fmt.Errorf("get only possible for strategies %q, %q",
    26  			StrategySetCollection, StrategyMapCollection)
    27  	}
    28  
    29  	if s.useBloomFilter && !s.bloomFilter.Test(key) {
    30  		return nil, lsmkv.NotFound
    31  	}
    32  
    33  	node, err := s.index.Get(key)
    34  	if err != nil {
    35  		return nil, err
    36  	}
    37  
    38  	// We need to copy the data we read from the segment exactly once in this
    39  	// place. This means that future processing can share this memory as much as
    40  	// it wants to, as it can now be considered immutable. If we didn't copy in
    41  	// this place it would only be safe to hold this data while still under the
    42  	// protection of the segmentGroup.maintenanceLock. This lock makes sure that
    43  	// no compaction is started during an ongoing read. However, as we could show
    44  	// as part of https://github.com/weaviate/weaviate/issues/1837
    45  	// further processing, such as map-decoding and eventually map-merging would
    46  	// happen inside the bucket.MapList() method. This scope has its own lock,
    47  	// but that lock can only protecting against flushing (i.e. changing the
    48  	// active/flushing memtable), not against removing the disk segment. If a
    49  	// compaction completes and the old segment is removed, we would be accessing
    50  	// invalid memory without the copy, thus leading to a SEGFAULT.
    51  	contentsCopy := make([]byte, node.End-node.Start)
    52  	if err = s.copyNode(contentsCopy, nodeOffset{node.Start, node.End}); err != nil {
    53  		return nil, err
    54  	}
    55  
    56  	return s.collectionStratParseData(contentsCopy)
    57  }
    58  
    59  func (s *segment) collectionStratParseData(in []byte) ([]value, error) {
    60  	if len(in) == 0 {
    61  		return nil, lsmkv.NotFound
    62  	}
    63  
    64  	offset := 0
    65  
    66  	valuesLen := binary.LittleEndian.Uint64(in[offset : offset+8])
    67  	offset += 8
    68  
    69  	values := make([]value, valuesLen)
    70  	valueIndex := 0
    71  	for valueIndex < int(valuesLen) {
    72  		values[valueIndex].tombstone = in[offset] == 0x01
    73  		offset += 1
    74  
    75  		valueLen := binary.LittleEndian.Uint64(in[offset : offset+8])
    76  		offset += 8
    77  
    78  		values[valueIndex].value = in[offset : offset+int(valueLen)]
    79  		offset += int(valueLen)
    80  
    81  		valueIndex++
    82  	}
    83  
    84  	return values, nil
    85  }