github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/docid/scan.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package docid 13 14 import ( 15 "encoding/binary" 16 17 "github.com/weaviate/weaviate/entities/storobj" 18 19 "github.com/weaviate/weaviate/entities/models" 20 21 "github.com/pkg/errors" 22 "github.com/weaviate/weaviate/adapters/repos/db/helpers" 23 "github.com/weaviate/weaviate/adapters/repos/db/lsmkv" 24 ) 25 26 // ObjectScanFn is called once per object, if false or an error is returned, 27 // the scanning will stop 28 type ObjectScanFn func(prop *models.PropertySchema, docID uint64) (bool, error) 29 30 // ScanObjectsLSM calls the provided scanFn on each object for the 31 // specified pointer. If a pointer does not resolve to an object-id, the item 32 // will be skipped. The number of times scanFn is called can therefore be 33 // smaller than the input length of pointers. 34 func ScanObjectsLSM(store *lsmkv.Store, pointers []uint64, scan ObjectScanFn, properties []string) error { 35 return newObjectScannerLSM(store, pointers, scan, properties).Do() 36 } 37 38 type objectScannerLSM struct { 39 store *lsmkv.Store 40 pointers []uint64 41 scanFn ObjectScanFn 42 objectsBucket *lsmkv.Bucket 43 properties []string 44 } 45 46 func newObjectScannerLSM(store *lsmkv.Store, pointers []uint64, 47 scan ObjectScanFn, properties []string, 48 ) *objectScannerLSM { 49 return &objectScannerLSM{ 50 store: store, 51 pointers: pointers, 52 scanFn: scan, 53 properties: properties, 54 } 55 } 56 57 func (os *objectScannerLSM) Do() error { 58 if err := os.init(); err != nil { 59 return errors.Wrap(err, "init object scanner") 60 } 61 62 if err := os.scan(); err != nil { 63 return errors.Wrap(err, "scan") 64 } 65 66 return nil 67 } 68 69 func (os *objectScannerLSM) init() error { 70 bucket := os.store.Bucket(helpers.ObjectsBucketLSM) 71 if bucket == nil { 72 return errors.Errorf("objects bucket not found") 73 } 74 os.objectsBucket = bucket 75 76 return nil 77 } 78 79 func (os *objectScannerLSM) scan() error { 80 // each object is scanned one after the other, so we can reuse the same memory allocations for all objects 81 docIDBytes := make([]byte, 8) 82 83 // Preallocate strings needed for json unmarshalling 84 propStrings := make([][]string, len(os.properties)) 85 for i := range os.properties { 86 propStrings[i] = []string{os.properties[i]} 87 } 88 89 // The typed properties are needed for extraction from json 90 var properties models.PropertySchema 91 propertiesTyped := map[string]interface{}{} 92 93 for _, prop := range os.properties { 94 propertiesTyped[prop] = nil 95 } 96 97 for _, id := range os.pointers { 98 binary.LittleEndian.PutUint64(docIDBytes, id) 99 res, err := os.objectsBucket.GetBySecondary(0, docIDBytes) 100 if err != nil { 101 return err 102 } 103 104 if res == nil { 105 continue 106 } 107 108 if len(os.properties) > 0 { 109 err = storobj.UnmarshalPropertiesFromObject(res, &propertiesTyped, os.properties, propStrings) 110 if err != nil { 111 return errors.Wrapf(err, "unmarshal data object") 112 } 113 properties = propertiesTyped 114 } 115 116 continueScan, err := os.scanFn(&properties, id) 117 if err != nil { 118 return errors.Wrapf(err, "scan") 119 } 120 121 if !continueScan { 122 break 123 } 124 } 125 126 return nil 127 }