KevinStephenson
Adding in weaviate code
b110593
raw
history blame
2.85 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package hnsw
import (
"context"
"errors"
"fmt"
"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"
"github.com/weaviate/weaviate/entities/storobj"
ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
)
func (h *hnsw) calculateOptimalSegments(dims int) int {
if dims >= 2048 && dims%8 == 0 {
return dims / 8
} else if dims >= 768 && dims%6 == 0 {
return dims / 6
} else if dims >= 256 && dims%4 == 0 {
return dims / 4
} else if dims%2 == 0 {
return dims / 2
}
return dims
}
func (h *hnsw) compress(cfg ent.UserConfig) error {
if !cfg.PQ.Enabled && !cfg.BQ.Enabled {
return nil
}
h.compressActionLock.Lock()
defer h.compressActionLock.Unlock()
data := h.cache.All()
if cfg.PQ.Enabled {
if h.isEmpty() {
return errors.New("Compress command cannot be executed before inserting some data. Please, insert your data first.")
}
dims := int(h.dims)
if cfg.PQ.Segments <= 0 {
cfg.PQ.Segments = h.calculateOptimalSegments(dims)
h.pqConfig.Segments = cfg.PQ.Segments
}
cleanData := make([][]float32, 0, len(data))
for i := range data {
// Rather than just taking the cache dump at face value, let's explicitly
// request the vectors. Otherwise we would miss any vector that's currently
// not in the cache, for example because the cache is not hot yet after a
// restart.
p, err := h.cache.Get(context.Background(), uint64(i))
if err != nil {
var e storobj.ErrNotFound
if errors.As(err, &e) {
// already deleted, ignore
continue
} else {
return fmt.Errorf("unexpected error obtaining vectors for fitting: %w", err)
}
}
if p == nil {
// already deleted, ignore
continue
}
cleanData = append(cleanData, p)
}
var err error
h.compressor, err = compressionhelpers.NewPQCompressor(cfg.PQ, h.distancerProvider, dims, 1e12, h.logger, cleanData, h.store)
if err != nil {
return fmt.Errorf("Compressing vectors: %w", err)
}
h.commitLog.AddPQ(h.compressor.ExposeFields())
} else {
var err error
h.compressor, err = compressionhelpers.NewBQCompressor(h.distancerProvider, 1e12, h.logger, h.store)
if err != nil {
return err
}
}
compressionhelpers.Concurrently(uint64(len(data)),
func(index uint64) {
if data[index] == nil {
return
}
h.compressor.Preload(index, data[index])
})
h.compressed.Store(true)
h.cache.Drop()
return nil
}