github.com/weaviate/weaviate@v1.24.6/entities/schema/crossref/bulk_builder.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package crossref
    13  
    14  import (
    15  	"fmt"
    16  
    17  	"github.com/go-openapi/strfmt"
    18  	"github.com/weaviate/weaviate/usecases/byteops"
    19  )
    20  
    21  // BulkBuilder is a low-alloc tool to build many beacon strings (as []byte). It
    22  // is optimized to allocate just once as opposed to once per ID. This makes it
    23  // considerably faster when generating 100s of thousand of beacons strings. The
    24  // main intended use case for this is building propValuePairs in ref-filters.
    25  //
    26  // The BulkBuilder makes some estimations for how much memory will be necessary
    27  // based on expected input params. If those requirements get exceeded, it will
    28  // still be safe to use, but will fallback to allocating dynamically.
    29  type BulkBuilder struct {
    30  	byteops.ReadWriter
    31  	prefix []byte
    32  }
    33  
    34  func NewBulkBuilderWithEstimates(expectedCount int, exampleClassName string,
    35  	overheadRatio float64,
    36  ) *BulkBuilder {
    37  	prefix := []byte("weaviate://localhost/")
    38  
    39  	lenOfTypicalClassName := int(float64(len(exampleClassName)) * overheadRatio)
    40  	predictedSize := expectedCount * (len(prefix) + 1 + lenOfTypicalClassName + 36)
    41  
    42  	bb := &BulkBuilder{
    43  		prefix:     prefix,
    44  		ReadWriter: byteops.NewReadWriter(make([]byte, predictedSize)),
    45  	}
    46  
    47  	return bb
    48  }
    49  
    50  func (bb *BulkBuilder) ClassAndID(className string,
    51  	id strfmt.UUID,
    52  ) []byte {
    53  	requiredSpace := len(bb.prefix) + len(id)
    54  	if int(bb.Position)+requiredSpace >= len(bb.Buffer) {
    55  		return bb.fallbackWithClassName(className, id)
    56  	}
    57  
    58  	// copy the start pos, we will need this at the end to know what to return to
    59  	// the caller
    60  	start := bb.Position
    61  	bb.CopyBytesToBuffer(bb.prefix)
    62  
    63  	// This is a safe way, in case a class-name ever contains non-ASCII
    64  	// characters. If we could be 100% sure that a class is ASCII-only, we could
    65  	// remove this allocation and instead use the same copy-by-rune approach that
    66  	// we use later on for the ID.
    67  	bb.CopyBytesToBuffer([]byte(className))
    68  	bb.WriteByte('/') // The separating slash between class and ID
    69  	for _, runeValue := range id {
    70  		// We know that the UUID-string never contains non-ASCII characters. This
    71  		// means it safe to convert the uint32-rune into a uint8. This allows us to
    72  		// copy char by char without any additional allocs
    73  		bb.WriteByte(uint8(runeValue))
    74  	}
    75  
    76  	return bb.Buffer[start:bb.Position]
    77  }
    78  
    79  func (bb *BulkBuilder) LegacyIDOnly(id strfmt.UUID) []byte {
    80  	requiredSpace := len(bb.prefix) + len(id)
    81  	if int(bb.Position)+requiredSpace >= len(bb.Buffer) {
    82  		return bb.fallbackWithoutClassName(id)
    83  	}
    84  
    85  	// copy the start pos, we will need this at the end to know what to return to
    86  	// the caller
    87  	start := bb.Position
    88  	bb.CopyBytesToBuffer(bb.prefix)
    89  	for _, runeValue := range id {
    90  		// We know that the UUID-string never contains non-ASCII characters. This
    91  		// means it safe to convert the uint32-rune into a uint8. This allows us to
    92  		// copy char by char without any additional allocs
    93  		bb.WriteByte(uint8(runeValue))
    94  	}
    95  
    96  	return bb.Buffer[start:bb.Position]
    97  }
    98  
    99  func (bb *BulkBuilder) fallbackWithClassName(
   100  	className string, id strfmt.UUID,
   101  ) []byte {
   102  	return []byte(fmt.Sprintf("%s%s/%s", bb.prefix, className, id))
   103  }
   104  
   105  func (bb *BulkBuilder) fallbackWithoutClassName(id strfmt.UUID) []byte {
   106  	return []byte(fmt.Sprintf("%s%s", bb.prefix, id))
   107  }