github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/docid/scan.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package docid
    13  
    14  import (
    15  	"encoding/binary"
    16  
    17  	"github.com/weaviate/weaviate/entities/storobj"
    18  
    19  	"github.com/weaviate/weaviate/entities/models"
    20  
    21  	"github.com/pkg/errors"
    22  	"github.com/weaviate/weaviate/adapters/repos/db/helpers"
    23  	"github.com/weaviate/weaviate/adapters/repos/db/lsmkv"
    24  )
    25  
    26  // ObjectScanFn is called once per object, if false or an error is returned,
    27  // the scanning will stop
    28  type ObjectScanFn func(prop *models.PropertySchema, docID uint64) (bool, error)
    29  
    30  // ScanObjectsLSM calls the provided scanFn on each object for the
    31  // specified pointer. If a pointer does not resolve to an object-id, the item
    32  // will be skipped. The number of times scanFn is called can therefore be
    33  // smaller than the input length of pointers.
    34  func ScanObjectsLSM(store *lsmkv.Store, pointers []uint64, scan ObjectScanFn, properties []string) error {
    35  	return newObjectScannerLSM(store, pointers, scan, properties).Do()
    36  }
    37  
    38  type objectScannerLSM struct {
    39  	store         *lsmkv.Store
    40  	pointers      []uint64
    41  	scanFn        ObjectScanFn
    42  	objectsBucket *lsmkv.Bucket
    43  	properties    []string
    44  }
    45  
    46  func newObjectScannerLSM(store *lsmkv.Store, pointers []uint64,
    47  	scan ObjectScanFn, properties []string,
    48  ) *objectScannerLSM {
    49  	return &objectScannerLSM{
    50  		store:      store,
    51  		pointers:   pointers,
    52  		scanFn:     scan,
    53  		properties: properties,
    54  	}
    55  }
    56  
    57  func (os *objectScannerLSM) Do() error {
    58  	if err := os.init(); err != nil {
    59  		return errors.Wrap(err, "init object scanner")
    60  	}
    61  
    62  	if err := os.scan(); err != nil {
    63  		return errors.Wrap(err, "scan")
    64  	}
    65  
    66  	return nil
    67  }
    68  
    69  func (os *objectScannerLSM) init() error {
    70  	bucket := os.store.Bucket(helpers.ObjectsBucketLSM)
    71  	if bucket == nil {
    72  		return errors.Errorf("objects bucket not found")
    73  	}
    74  	os.objectsBucket = bucket
    75  
    76  	return nil
    77  }
    78  
    79  func (os *objectScannerLSM) scan() error {
    80  	// each object is scanned one after the other, so we can reuse the same memory allocations for all objects
    81  	docIDBytes := make([]byte, 8)
    82  
    83  	// Preallocate strings needed for json unmarshalling
    84  	propStrings := make([][]string, len(os.properties))
    85  	for i := range os.properties {
    86  		propStrings[i] = []string{os.properties[i]}
    87  	}
    88  
    89  	// The typed properties are needed for extraction from json
    90  	var properties models.PropertySchema
    91  	propertiesTyped := map[string]interface{}{}
    92  
    93  	for _, prop := range os.properties {
    94  		propertiesTyped[prop] = nil
    95  	}
    96  
    97  	for _, id := range os.pointers {
    98  		binary.LittleEndian.PutUint64(docIDBytes, id)
    99  		res, err := os.objectsBucket.GetBySecondary(0, docIDBytes)
   100  		if err != nil {
   101  			return err
   102  		}
   103  
   104  		if res == nil {
   105  			continue
   106  		}
   107  
   108  		if len(os.properties) > 0 {
   109  			err = storobj.UnmarshalPropertiesFromObject(res, &propertiesTyped, os.properties, propStrings)
   110  			if err != nil {
   111  				return errors.Wrapf(err, "unmarshal data object")
   112  			}
   113  			properties = propertiesTyped
   114  		}
   115  
   116  		continueScan, err := os.scanFn(&properties, id)
   117  		if err != nil {
   118  			return errors.Wrapf(err, "scan")
   119  		}
   120  
   121  		if !continueScan {
   122  			break
   123  		}
   124  	}
   125  
   126  	return nil
   127  }