github.com/weaviate/weaviate@v1.24.6/entities/schema/crossref/bulk_builder.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package crossref 13 14 import ( 15 "fmt" 16 17 "github.com/go-openapi/strfmt" 18 "github.com/weaviate/weaviate/usecases/byteops" 19 ) 20 21 // BulkBuilder is a low-alloc tool to build many beacon strings (as []byte). It 22 // is optimized to allocate just once as opposed to once per ID. This makes it 23 // considerably faster when generating 100s of thousand of beacons strings. The 24 // main intended use case for this is building propValuePairs in ref-filters. 25 // 26 // The BulkBuilder makes some estimations for how much memory will be necessary 27 // based on expected input params. If those requirements get exceeded, it will 28 // still be safe to use, but will fallback to allocating dynamically. 29 type BulkBuilder struct { 30 byteops.ReadWriter 31 prefix []byte 32 } 33 34 func NewBulkBuilderWithEstimates(expectedCount int, exampleClassName string, 35 overheadRatio float64, 36 ) *BulkBuilder { 37 prefix := []byte("weaviate://localhost/") 38 39 lenOfTypicalClassName := int(float64(len(exampleClassName)) * overheadRatio) 40 predictedSize := expectedCount * (len(prefix) + 1 + lenOfTypicalClassName + 36) 41 42 bb := &BulkBuilder{ 43 prefix: prefix, 44 ReadWriter: byteops.NewReadWriter(make([]byte, predictedSize)), 45 } 46 47 return bb 48 } 49 50 func (bb *BulkBuilder) ClassAndID(className string, 51 id strfmt.UUID, 52 ) []byte { 53 requiredSpace := len(bb.prefix) + len(id) 54 if int(bb.Position)+requiredSpace >= len(bb.Buffer) { 55 return bb.fallbackWithClassName(className, id) 56 } 57 58 // copy the start pos, we will need this at the end to know what to return to 59 // the caller 60 start := bb.Position 61 bb.CopyBytesToBuffer(bb.prefix) 62 63 // This is a safe way, in case a class-name ever contains non-ASCII 64 // characters. If we could be 100% sure that a class is ASCII-only, we could 65 // remove this allocation and instead use the same copy-by-rune approach that 66 // we use later on for the ID. 67 bb.CopyBytesToBuffer([]byte(className)) 68 bb.WriteByte('/') // The separating slash between class and ID 69 for _, runeValue := range id { 70 // We know that the UUID-string never contains non-ASCII characters. This 71 // means it safe to convert the uint32-rune into a uint8. This allows us to 72 // copy char by char without any additional allocs 73 bb.WriteByte(uint8(runeValue)) 74 } 75 76 return bb.Buffer[start:bb.Position] 77 } 78 79 func (bb *BulkBuilder) LegacyIDOnly(id strfmt.UUID) []byte { 80 requiredSpace := len(bb.prefix) + len(id) 81 if int(bb.Position)+requiredSpace >= len(bb.Buffer) { 82 return bb.fallbackWithoutClassName(id) 83 } 84 85 // copy the start pos, we will need this at the end to know what to return to 86 // the caller 87 start := bb.Position 88 bb.CopyBytesToBuffer(bb.prefix) 89 for _, runeValue := range id { 90 // We know that the UUID-string never contains non-ASCII characters. This 91 // means it safe to convert the uint32-rune into a uint8. This allows us to 92 // copy char by char without any additional allocs 93 bb.WriteByte(uint8(runeValue)) 94 } 95 96 return bb.Buffer[start:bb.Position] 97 } 98 99 func (bb *BulkBuilder) fallbackWithClassName( 100 className string, id strfmt.UUID, 101 ) []byte { 102 return []byte(fmt.Sprintf("%s%s/%s", bb.prefix, className, id)) 103 } 104 105 func (bb *BulkBuilder) fallbackWithoutClassName(id strfmt.UUID) []byte { 106 return []byte(fmt.Sprintf("%s%s", bb.prefix, id)) 107 }