Spaces:
Running
Running
File size: 3,630 Bytes
b110593 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package crossref
import (
"fmt"
"github.com/go-openapi/strfmt"
"github.com/weaviate/weaviate/usecases/byteops"
)
// BulkBuilder is a low-alloc tool to build many beacon strings (as []byte). It
// is optimized to allocate just once as opposed to once per ID. This makes it
// considerably faster when generating 100s of thousand of beacons strings. The
// main intended use case for this is building propValuePairs in ref-filters.
//
// The BulkBuilder makes some estimations for how much memory will be necessary
// based on expected input params. If those requirements get exceeded, it will
// still be safe to use, but will fallback to allocating dynamically.
type BulkBuilder struct {
byteops.ReadWriter
prefix []byte
}
func NewBulkBuilderWithEstimates(expectedCount int, exampleClassName string,
overheadRatio float64,
) *BulkBuilder {
prefix := []byte("weaviate://localhost/")
lenOfTypicalClassName := int(float64(len(exampleClassName)) * overheadRatio)
predictedSize := expectedCount * (len(prefix) + 1 + lenOfTypicalClassName + 36)
bb := &BulkBuilder{
prefix: prefix,
ReadWriter: byteops.NewReadWriter(make([]byte, predictedSize)),
}
return bb
}
func (bb *BulkBuilder) ClassAndID(className string,
id strfmt.UUID,
) []byte {
requiredSpace := len(bb.prefix) + len(id)
if int(bb.Position)+requiredSpace >= len(bb.Buffer) {
return bb.fallbackWithClassName(className, id)
}
// copy the start pos, we will need this at the end to know what to return to
// the caller
start := bb.Position
bb.CopyBytesToBuffer(bb.prefix)
// This is a safe way, in case a class-name ever contains non-ASCII
// characters. If we could be 100% sure that a class is ASCII-only, we could
// remove this allocation and instead use the same copy-by-rune approach that
// we use later on for the ID.
bb.CopyBytesToBuffer([]byte(className))
bb.WriteByte('/') // The separating slash between class and ID
for _, runeValue := range id {
// We know that the UUID-string never contains non-ASCII characters. This
// means it safe to convert the uint32-rune into a uint8. This allows us to
// copy char by char without any additional allocs
bb.WriteByte(uint8(runeValue))
}
return bb.Buffer[start:bb.Position]
}
func (bb *BulkBuilder) LegacyIDOnly(id strfmt.UUID) []byte {
requiredSpace := len(bb.prefix) + len(id)
if int(bb.Position)+requiredSpace >= len(bb.Buffer) {
return bb.fallbackWithoutClassName(id)
}
// copy the start pos, we will need this at the end to know what to return to
// the caller
start := bb.Position
bb.CopyBytesToBuffer(bb.prefix)
for _, runeValue := range id {
// We know that the UUID-string never contains non-ASCII characters. This
// means it safe to convert the uint32-rune into a uint8. This allows us to
// copy char by char without any additional allocs
bb.WriteByte(uint8(runeValue))
}
return bb.Buffer[start:bb.Position]
}
func (bb *BulkBuilder) fallbackWithClassName(
className string, id strfmt.UUID,
) []byte {
return []byte(fmt.Sprintf("%s%s/%s", bb.prefix, className, id))
}
func (bb *BulkBuilder) fallbackWithoutClassName(id strfmt.UUID) []byte {
return []byte(fmt.Sprintf("%s%s", bb.prefix, id))
}
|