Spaces:
Running
Running
// _ _ | |
// __ _____ __ ___ ___ __ _| |_ ___ | |
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ | |
// \ V V / __/ (_| |\ V /| | (_| | || __/ | |
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| | |
// | |
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. | |
// | |
// CONTACT: [email protected] | |
// | |
package hnsw | |
import ( | |
"context" | |
"fmt" | |
"sort" | |
"testing" | |
"github.com/stretchr/testify/assert" | |
"github.com/stretchr/testify/require" | |
"github.com/weaviate/weaviate/adapters/repos/db/helpers" | |
"github.com/weaviate/weaviate/adapters/repos/db/lsmkv" | |
"github.com/weaviate/weaviate/adapters/repos/db/vector/common" | |
"github.com/weaviate/weaviate/adapters/repos/db/vector/hnsw/distancer" | |
"github.com/weaviate/weaviate/adapters/repos/db/vector/testinghelpers" | |
"github.com/weaviate/weaviate/entities/cyclemanager" | |
"github.com/weaviate/weaviate/entities/storobj" | |
ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw" | |
) | |
func TempVectorForIDThunk(vectors [][]float32) func(context.Context, uint64, *common.VectorSlice) ([]float32, error) { | |
return func(ctx context.Context, id uint64, container *common.VectorSlice) ([]float32, error) { | |
copy(container.Slice, vectors[int(id)]) | |
return vectors[int(id)], nil | |
} | |
} | |
func TestDelete_WithoutCleaningUpTombstones(t *testing.T) { | |
vectors := vectorsForDeleteTest() | |
var vectorIndex *hnsw | |
store := testinghelpers.NewDummyStore(t) | |
t.Run("import the test vectors", func(t *testing.T) { | |
index, err := New(Config{ | |
RootPath: "doesnt-matter-as-committlogger-is-mocked-out", | |
ID: "delete-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewCosineDistanceProvider(), | |
VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { | |
return vectors[int(id)], nil | |
}, | |
TempVectorForIDThunk: TempVectorForIDThunk(vectors), | |
}, ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), store) | |
require.Nil(t, err) | |
vectorIndex = index | |
for i, vec := range vectors { | |
err := vectorIndex.Add(uint64(i), vec) | |
require.Nil(t, err) | |
} | |
}) | |
var control []uint64 | |
t.Run("vectors are cached correctly", func(t *testing.T) { | |
assert.Equal(t, len(vectors), int(vectorIndex.cache.CountVectors())) | |
}) | |
t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) { | |
allowList := helpers.NewAllowList() | |
for i := range vectors { | |
if i%2 == 0 { | |
continue | |
} | |
allowList.Insert(uint64(i)) | |
} | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
control = res | |
}) | |
t.Run("deleting every even element", func(t *testing.T) { | |
for i := range vectors { | |
if i%2 != 0 { | |
continue | |
} | |
err := vectorIndex.Delete(uint64(i)) | |
require.Nil(t, err) | |
} | |
}) | |
t.Run("vector cache holds half the original vectors", func(t *testing.T) { | |
vectorIndex.CleanUpTombstonedNodes(neverStop) | |
assert.Equal(t, len(vectors)/2, int(vectorIndex.cache.CountVectors())) | |
}) | |
t.Run("start a search that should only contain the remaining elements", func(t *testing.T) { | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
for _, elem := range res { | |
if elem%2 == 0 { | |
t.Errorf("search result contained an even element: %d", elem) | |
} | |
} | |
assert.Equal(t, control, res) | |
}) | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, vectorIndex.Drop(context.Background())) | |
}) | |
t.Run("vector cache holds no vectors", func(t *testing.T) { | |
assert.Equal(t, 0, int(vectorIndex.cache.CountVectors())) | |
}) | |
} | |
func TestDelete_WithCleaningUpTombstonesOnce(t *testing.T) { | |
// there is a single bulk clean event after all the deletes | |
vectors := vectorsForDeleteTest() | |
var vectorIndex *hnsw | |
store := testinghelpers.NewDummyStore(t) | |
t.Run("import the test vectors", func(t *testing.T) { | |
index, err := New(Config{ | |
RootPath: "doesnt-matter-as-committlogger-is-mocked-out", | |
ID: "delete-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewCosineDistanceProvider(), | |
VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { | |
return vectors[int(id)], nil | |
}, | |
TempVectorForIDThunk: TempVectorForIDThunk(vectors), | |
}, ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), store) | |
require.Nil(t, err) | |
vectorIndex = index | |
for i, vec := range vectors { | |
err := vectorIndex.Add(uint64(i), vec) | |
require.Nil(t, err) | |
} | |
}) | |
var control []uint64 | |
var bfControl []uint64 | |
t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) { | |
allowList := helpers.NewAllowList() | |
for i := range vectors { | |
if i%2 == 0 { | |
continue | |
} | |
allowList.Insert(uint64(i)) | |
} | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
require.Len(t, res, 20) | |
control = res | |
}) | |
t.Run("brute force control", func(t *testing.T) { | |
bf := bruteForceCosine(vectors, []float32{0.1, 0.1, 0.1}, 100) | |
bfControl = make([]uint64, len(bf)) | |
i := 0 | |
for _, elem := range bf { | |
if elem%2 == 0 { | |
continue | |
} | |
bfControl[i] = elem | |
i++ | |
} | |
if i > 20 { | |
i = 20 | |
} | |
bfControl = bfControl[:i] | |
assert.Equal(t, bfControl, control, "control should match bf control") | |
}) | |
fmt.Printf("entrypoint before %d\n", vectorIndex.entryPointID) | |
t.Run("deleting every even element", func(t *testing.T) { | |
for i := range vectors { | |
if i%2 != 0 { | |
continue | |
} | |
err := vectorIndex.Delete(uint64(i)) | |
require.Nil(t, err) | |
} | |
}) | |
t.Run("running the cleanup", func(t *testing.T) { | |
err := vectorIndex.CleanUpTombstonedNodes(neverStop) | |
require.Nil(t, err) | |
}) | |
t.Run("start a search that should only contain the remaining elements", func(t *testing.T) { | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
for _, elem := range res { | |
if elem%2 == 0 { | |
t.Errorf("search result contained an even element: %d", elem) | |
} | |
} | |
assert.Equal(t, control, res) | |
}) | |
t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { | |
assert.Len(t, vectorIndex.tombstones, 0) | |
}) | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, vectorIndex.Drop(context.Background())) | |
}) | |
} | |
func TestDelete_WithCleaningUpTombstonesInBetween(t *testing.T) { | |
// there is a single bulk clean event after all the deletes | |
vectors := vectorsForDeleteTest() | |
var vectorIndex *hnsw | |
store := testinghelpers.NewDummyStore(t) | |
t.Run("import the test vectors", func(t *testing.T) { | |
index, err := New(Config{ | |
RootPath: "doesnt-matter-as-committlogger-is-mocked-out", | |
ID: "delete-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewCosineDistanceProvider(), | |
VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { | |
return vectors[int(id)], nil | |
}, | |
TempVectorForIDThunk: TempVectorForIDThunk(vectors), | |
}, ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), store) | |
// makes sure index is build only with level 0. To be removed after fixing WEAVIATE-179 | |
index.randFunc = func() float64 { return 0.1 } | |
require.Nil(t, err) | |
vectorIndex = index | |
for i, vec := range vectors { | |
err := vectorIndex.Add(uint64(i), vec) | |
require.Nil(t, err) | |
} | |
}) | |
var control []uint64 | |
t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) { | |
allowList := helpers.NewAllowList() | |
for i := range vectors { | |
if i%2 == 0 { | |
continue | |
} | |
allowList.Insert(uint64(i)) | |
} | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
control = res | |
}) | |
t.Run("deleting every even element", func(t *testing.T) { | |
for i := range vectors { | |
if i%10 == 0 { | |
// occasionally run clean up | |
err := vectorIndex.CleanUpTombstonedNodes(neverStop) | |
require.Nil(t, err) | |
} | |
if i%2 != 0 { | |
continue | |
} | |
err := vectorIndex.Delete(uint64(i)) | |
require.Nil(t, err) | |
} | |
// finally run one final cleanup | |
err := vectorIndex.CleanUpTombstonedNodes(neverStop) | |
require.Nil(t, err) | |
}) | |
t.Run("start a search that should only contain the remaining elements", func(t *testing.T) { | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
for _, elem := range res { | |
if elem%2 == 0 { | |
t.Errorf("search result contained an even element: %d", elem) | |
} | |
} | |
assert.Equal(t, control, res) | |
}) | |
t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { | |
assert.Len(t, vectorIndex.tombstones, 0) | |
}) | |
t.Run("delete the remaining elements", func(t *testing.T) { | |
for i := range vectors { | |
if i%2 == 0 { | |
continue | |
} | |
err := vectorIndex.Delete(uint64(i)) | |
require.Nil(t, err) | |
} | |
err := vectorIndex.CleanUpTombstonedNodes(neverStop) | |
require.Nil(t, err) | |
}) | |
t.Run("try to insert again and search", func(t *testing.T) { | |
for i := 0; i < 5; i++ { | |
err := vectorIndex.Add(uint64(i), vectors[i]) | |
require.Nil(t, err) | |
} | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) | |
require.Nil(t, err) | |
assert.ElementsMatch(t, []uint64{0, 1, 2, 3, 4}, res) | |
}) | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, vectorIndex.Drop(context.Background())) | |
}) | |
} | |
func createIndexImportAllVectorsAndDeleteEven(t *testing.T, vectors [][]float32, store *lsmkv.Store) (index *hnsw, remainingResult []uint64) { | |
index, err := New(Config{ | |
RootPath: "doesnt-matter-as-committlogger-is-mocked-out", | |
ID: "delete-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewCosineDistanceProvider(), | |
VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { | |
return vectors[int(id)], nil | |
}, | |
TempVectorForIDThunk: TempVectorForIDThunk(vectors), | |
}, ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), store) | |
require.Nil(t, err) | |
// makes sure index is build only with level 0. To be removed after fixing WEAVIATE-179 | |
index.randFunc = func() float64 { return 0.1 } | |
// to speed up test execution, size of nodes array is decreased | |
// from default 25k to little over number of vectors | |
index.nodes = make([]*vertex, int(1.2*float64(len(vectors)))) | |
for i, vec := range vectors { | |
err := index.Add(uint64(i), vec) | |
require.Nil(t, err) | |
} | |
for i := range vectors { | |
if i%2 != 0 { | |
continue | |
} | |
err := index.Delete(uint64(i)) | |
require.Nil(t, err) | |
} | |
res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
for _, elem := range res { | |
if elem%2 == 0 { | |
t.Errorf("search result contained an even element: %d", elem) | |
} | |
} | |
return index, res | |
} | |
func genStopAtFunc(i int) func() bool { | |
counter := 0 | |
return func() bool { | |
if counter < i { | |
counter++ | |
return false | |
} | |
return true | |
} | |
} | |
func TestDelete_WithCleaningUpTombstonesStopped(t *testing.T) { | |
vectors := vectorsForDeleteTest() | |
var index *hnsw | |
var possibleStopsCount int | |
// due to not yet resolved bug (https://semi-technology.atlassian.net/browse/WEAVIATE-179) | |
// db can return less vectors than are actually stored after tombstones cleanup | |
// controlRemainingResult contains all odd vectors (before cleanup was performed) | |
// controlRemainingResultAfterCleanup contains most of odd vectors (after cleanup was performed) | |
// | |
// this test verifies if partial cleanup will not change search output, therefore depending on | |
// where cleanup method was stopped, subset of controlRemainingResult is expected, though all | |
// vectors from controlRemainingResultAfterCleanup should be returned | |
// TODO to be simplified after fixing WEAVIATE-179, all results should be the same | |
var controlRemainingResult []uint64 | |
var controlRemainingResultAfterCleanup []uint64 | |
store := testinghelpers.NewDummyStore(t) | |
t.Run("create control index", func(t *testing.T) { | |
index, controlRemainingResult = createIndexImportAllVectorsAndDeleteEven(t, vectors, store) | |
}) | |
t.Run("count all cleanup tombstones stops", func(t *testing.T) { | |
counter := 0 | |
countingStopFunc := func() bool { | |
counter++ | |
return false | |
} | |
err := index.CleanUpTombstonedNodes(countingStopFunc) | |
require.Nil(t, err) | |
possibleStopsCount = counter | |
}) | |
t.Run("search remaining elements after cleanup", func(t *testing.T) { | |
res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
for _, elem := range res { | |
if elem%2 == 0 { | |
t.Errorf("search result contained an even element: %d", elem) | |
} | |
} | |
controlRemainingResultAfterCleanup = res | |
}) | |
t.Run("destroy the control index", func(t *testing.T) { | |
require.Nil(t, index.Drop(context.Background())) | |
}) | |
for i := 0; i < possibleStopsCount; i++ { | |
index, _ = createIndexImportAllVectorsAndDeleteEven(t, vectors, store) | |
t.Run("stop cleanup at place", func(t *testing.T) { | |
require.Nil(t, index.CleanUpTombstonedNodes(genStopAtFunc(i))) | |
}) | |
t.Run("search remaining elements after partial cleanup", func(t *testing.T) { | |
res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil) | |
require.Nil(t, err) | |
require.Subset(t, controlRemainingResult, res) | |
require.Subset(t, res, controlRemainingResultAfterCleanup) | |
}) | |
t.Run("run complete cleanup", func(t *testing.T) { | |
require.Nil(t, index.CleanUpTombstonedNodes(neverStop)) | |
}) | |
t.Run("search remaining elements after complete cleanup", func(t *testing.T) { | |
res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, len(vectors), nil) | |
require.Nil(t, err) | |
require.Subset(t, controlRemainingResult, res) | |
require.Subset(t, res, controlRemainingResultAfterCleanup) | |
}) | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, index.Drop(context.Background())) | |
}) | |
} | |
} | |
func TestDelete_InCompressedIndex_WithCleaningUpTombstonesOnce(t *testing.T) { | |
var ( | |
vectorIndex *hnsw | |
// there is a single bulk clean event after all the deletes | |
vectors = vectorsForDeleteTest() | |
rootPath = t.TempDir() | |
userConfig = ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
PQ: ent.PQConfig{ | |
Enabled: true, | |
Encoder: ent.PQEncoder{ | |
Type: ent.PQEncoderTypeTile, | |
Distribution: ent.PQEncoderDistributionNormal, | |
}, | |
}, | |
} | |
) | |
store := testinghelpers.NewDummyStore(t) | |
t.Run("import the test vectors", func(t *testing.T) { | |
index, err := New(Config{ | |
RootPath: rootPath, | |
ID: "delete-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewCosineDistanceProvider(), | |
VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { | |
if int(id) >= len(vectors) { | |
return nil, storobj.NewErrNotFoundf(id, "out of range") | |
} | |
return vectors[int(id)], nil | |
}, | |
TempVectorForIDThunk: TempVectorForIDThunk(vectors), | |
}, userConfig, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), store) | |
require.Nil(t, err) | |
vectorIndex = index | |
for i, vec := range vectors { | |
err := vectorIndex.Add(uint64(i), vec) | |
require.Nil(t, err) | |
} | |
cfg := ent.PQConfig{ | |
Enabled: true, | |
Encoder: ent.PQEncoder{ | |
Type: ent.PQEncoderTypeTile, | |
Distribution: ent.PQEncoderDistributionLogNormal, | |
}, | |
BitCompression: false, | |
Segments: 3, | |
Centroids: 256, | |
} | |
userConfig.PQ = cfg | |
index.compress(userConfig) | |
}) | |
var control []uint64 | |
var bfControl []uint64 | |
t.Run("doing a control search before delete with the respective allow list", func(t *testing.T) { | |
allowList := helpers.NewAllowList() | |
for i := range vectors { | |
if i%2 == 0 { | |
continue | |
} | |
allowList.Insert(uint64(i)) | |
} | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
require.Len(t, res, 20) | |
control = res | |
}) | |
t.Run("brute force control", func(t *testing.T) { | |
bf := bruteForceCosine(vectors, []float32{0.1, 0.1, 0.1}, 100) | |
bfControl = make([]uint64, len(bf)) | |
i := 0 | |
for _, elem := range bf { | |
if elem%2 == 0 { | |
continue | |
} | |
bfControl[i] = elem | |
i++ | |
} | |
if i > 20 { | |
i = 20 | |
} | |
bfControl = bfControl[:i] | |
recall := float32(testinghelpers.MatchesInLists(bfControl, control)) / float32(len(bfControl)) | |
fmt.Println(recall) | |
assert.True(t, recall > 0.6, "control should match bf control") | |
}) | |
fmt.Printf("entrypoint before %d\n", vectorIndex.entryPointID) | |
t.Run("deleting every even element", func(t *testing.T) { | |
for i := range vectors { | |
if i%2 != 0 { | |
continue | |
} | |
err := vectorIndex.Delete(uint64(i)) | |
require.Nil(t, err) | |
} | |
}) | |
t.Run("running the cleanup", func(t *testing.T) { | |
err := vectorIndex.CleanUpTombstonedNodes(neverStop) | |
require.Nil(t, err) | |
}) | |
t.Run("start a search that should only contain the remaining elements", func(t *testing.T) { | |
res, _, err := vectorIndex.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
for _, elem := range res { | |
if elem%2 == 0 { | |
t.Errorf("search result contained an even element: %d", elem) | |
} | |
} | |
recall := float32(testinghelpers.MatchesInLists(res, control)) / float32(len(control)) | |
assert.True(t, recall > 0.6) | |
}) | |
t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { | |
assert.Len(t, vectorIndex.tombstones, 0) | |
}) | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, vectorIndex.Drop(context.Background())) | |
}) | |
} | |
func TestDelete_InCompressedIndex_WithCleaningUpTombstonesOnce_DoesNotCrash(t *testing.T) { | |
var ( | |
vectorIndex *hnsw | |
// there is a single bulk clean event after all the deletes | |
vectors = vectorsForDeleteTest() | |
rootPath = t.TempDir() | |
userConfig = ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
PQ: ent.PQConfig{Enabled: true, Encoder: ent.PQEncoder{Type: "tile", Distribution: "normal"}}, | |
} | |
) | |
store := testinghelpers.NewDummyStore(t) | |
t.Run("import the test vectors", func(t *testing.T) { | |
index, err := New(Config{ | |
RootPath: rootPath, | |
ID: "delete-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewCosineDistanceProvider(), | |
VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { | |
return vectors[int(id%uint64(len(vectors)))], nil | |
}, | |
TempVectorForIDThunk: TempVectorForIDThunk(vectors), | |
}, userConfig, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), store) | |
require.Nil(t, err) | |
vectorIndex = index | |
for i, vec := range vectors { | |
err := vectorIndex.Add(uint64(i), vec) | |
require.Nil(t, err) | |
} | |
cfg := ent.PQConfig{ | |
Enabled: true, | |
Encoder: ent.PQEncoder{ | |
Type: ent.PQEncoderTypeTile, | |
Distribution: ent.PQEncoderDistributionLogNormal, | |
}, | |
BitCompression: false, | |
Segments: 3, | |
Centroids: 256, | |
} | |
userConfig.PQ = cfg | |
index.compress(userConfig) | |
for i := len(vectors); i < 1000; i++ { | |
err := vectorIndex.Add(uint64(i), vectors[i%len(vectors)]) | |
require.Nil(t, err) | |
} | |
}) | |
t.Run("deleting every even element", func(t *testing.T) { | |
for i := range vectors { | |
if i%2 != 0 { | |
continue | |
} | |
err := vectorIndex.Delete(uint64(i)) | |
require.Nil(t, err) | |
} | |
}) | |
t.Run("running the cleanup", func(t *testing.T) { | |
err := vectorIndex.CleanUpTombstonedNodes(neverStop) | |
require.Nil(t, err) | |
}) | |
t.Run("verify the graph no longer has any tombstones", func(t *testing.T) { | |
assert.Len(t, vectorIndex.tombstones, 0) | |
}) | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, vectorIndex.Drop(context.Background())) | |
}) | |
} | |
// we need a certain number of elements so that we can make sure that nodes | |
// from all layers will eventually be deleted, otherwise our test only tests | |
// edge cases which aren't very common in real life, but ignore the most common | |
// deletes | |
func vectorsForDeleteTest() [][]float32 { | |
return [][]float32{ | |
{0.27335858, 0.42670676, 0.12599982}, | |
{0.34369454, 0.78510034, 0.78000546}, | |
{0.2342731, 0.076864816, 0.6405078}, | |
{0.07597838, 0.7752282, 0.87022865}, | |
{0.78632426, 0.06902865, 0.7423889}, | |
{0.3055758, 0.3901508, 0.9399572}, | |
{0.48687622, 0.26338226, 0.06495104}, | |
{0.5384028, 0.35410047, 0.8821815}, | |
{0.25123185, 0.62722564, 0.86443096}, | |
{0.58484185, 0.13103616, 0.4034975}, | |
{0.0019696166, 0.46822622, 0.42492124}, | |
{0.42401955, 0.8278863, 0.5952888}, | |
{0.15367928, 0.70778894, 0.0070928824}, | |
{0.95760256, 0.45898128, 0.1541115}, | |
{0.9125976, 0.9021616, 0.21607016}, | |
{0.9876307, 0.5243228, 0.37294936}, | |
{0.8194746, 0.56142205, 0.5130103}, | |
{0.805065, 0.62250346, 0.63715476}, | |
{0.9969276, 0.5115748, 0.18916714}, | |
{0.16419733, 0.15029702, 0.36020836}, | |
{0.9660323, 0.35887036, 0.6072966}, | |
{0.72765416, 0.27891788, 0.9094314}, | |
{0.8626208, 0.3540126, 0.3100354}, | |
{0.7153876, 0.17094712, 0.7801294}, | |
{0.23180388, 0.107446484, 0.69542855}, | |
{0.54731685, 0.8949827, 0.68316746}, | |
{0.15049729, 0.1293767, 0.0574729}, | |
{0.89379513, 0.67022973, 0.57360715}, | |
{0.725353, 0.25326362, 0.44264215}, | |
{0.2568602, 0.4986094, 0.9759933}, | |
{0.7300015, 0.70019704, 0.49546525}, | |
{0.54314494, 0.2004176, 0.63803226}, | |
{0.6180191, 0.5260845, 0.9373999}, | |
{0.63356537, 0.81430644, 0.78373694}, | |
{0.69995105, 0.84198904, 0.17851257}, | |
{0.5197941, 0.11502675, 0.95129955}, | |
{0.15791401, 0.07516741, 0.113447875}, | |
{0.06811827, 0.4450082, 0.98595786}, | |
{0.7153448, 0.41833848, 0.06332495}, | |
{0.6704102, 0.28931814, 0.031580303}, | |
{0.47773632, 0.73334247, 0.6925025}, | |
{0.7976896, 0.9499536, 0.6394833}, | |
{0.3074854, 0.14025249, 0.35961738}, | |
{0.49956197, 0.093575336, 0.790093}, | |
{0.4641653, 0.21276893, 0.528895}, | |
{0.1021849, 0.9416305, 0.46738508}, | |
{0.3790398, 0.50099677, 0.98233247}, | |
{0.39650732, 0.020929832, 0.53968865}, | |
{0.77604437, 0.8554197, 0.24056046}, | |
{0.07174444, 0.28758526, 0.67587185}, | |
{0.22292718, 0.66624546, 0.6077909}, | |
{0.22090498, 0.36197436, 0.40415043}, | |
{0.04838009, 0.120789215, 0.17928012}, | |
{0.55166364, 0.3400502, 0.43698996}, | |
{0.7638108, 0.47014108, 0.23208627}, | |
{0.9239513, 0.8418566, 0.23518613}, | |
{0.289589, 0.85010827, 0.055741556}, | |
{0.32436147, 0.18756394, 0.4217864}, | |
{0.041671168, 0.37824047, 0.66486764}, | |
{0.5052222, 0.07982704, 0.64345413}, | |
{0.62675995, 0.20138603, 0.8231867}, | |
{0.86306876, 0.9698708, 0.11398846}, | |
{0.68566775, 0.22026269, 0.13525572}, | |
{0.57706076, 0.32325208, 0.6122228}, | |
{0.80035216, 0.18560356, 0.6328281}, | |
{0.87145543, 0.19380389, 0.8863942}, | |
{0.33777508, 0.6056442, 0.9110077}, | |
{0.3961719, 0.49714503, 0.14191929}, | |
{0.5344362, 0.8166916, 0.75880384}, | |
{0.015749464, 0.63223976, 0.5470922}, | |
{0.10512444, 0.2212036, 0.24995685}, | |
{0.10831311, 0.27044898, 0.8668174}, | |
{0.3272971, 0.6659298, 0.87119603}, | |
{0.42913893, 0.14528985, 0.69957525}, | |
{0.33012474, 0.81964344, 0.092787445}, | |
{0.093618214, 0.90637344, 0.94406706}, | |
{0.12161567, 0.75131124, 0.40563175}, | |
{0.9154454, 0.75925833, 0.8406739}, | |
{0.81649286, 0.9025715, 0.3105051}, | |
{0.2927649, 0.22649862, 0.9708593}, | |
{0.30813727, 0.0079439245, 0.39662006}, | |
{0.94943213, 0.36778906, 0.217876}, | |
{0.716794, 0.3811725, 0.18448676}, | |
{0.66879725, 0.29722908, 0.0031202603}, | |
{0.11104216, 0.13094379, 0.0787222}, | |
{0.8508966, 0.86416596, 0.15885831}, | |
{0.2303136, 0.56660503, 0.17114973}, | |
{0.8632685, 0.4229249, 0.1936724}, | |
{0.03060897, 0.35226125, 0.8115969}, | |
} | |
} | |
func TestDelete_EntrypointIssues(t *testing.T) { | |
// This test is motivated by flakyness of other tests. We seemed to have | |
// experienced a failure with the following structure | |
// | |
// Entrypoint: 6 | |
// Max Level: 1 | |
// Tombstones map[] | |
// Nodes and Connections: | |
// Node 0 | |
// Level 0: Connections: [1 2 3 4 5 6 7 8] | |
// Node 1 | |
// Level 0: Connections: [0 2 3 4 5 6 7 8] | |
// Node 2 | |
// Level 0: Connections: [1 0 3 4 5 6 7 8] | |
// Node 3 | |
// Level 0: Connections: [2 1 0 4 5 6 7 8] | |
// Node 4 | |
// Level 0: Connections: [3 2 1 0 5 6 7 8] | |
// Node 5 | |
// Level 0: Connections: [3 4 2 1 0 6 7 8] | |
// Node 6 | |
// Level 0: Connections: [4 2 1 3 5 0 7 8] | |
// Level 1: Connections: [7] | |
// Node 7 | |
// Level 1: Connections: [6] | |
// Level 0: Connections: [6 4 3 5 2 1 0 8] | |
// Node 8 | |
// Level 0: Connections: [7 6 4 3 5 2 1 0] | |
// | |
// This test aims to rebuild this tree exactly (manually) and verifies that | |
// deletion of the old entrypoint (element 6), works without issue | |
// | |
// The underlying test set can be found in vectors_for_test.go | |
index, err := New(Config{ | |
RootPath: "doesnt-matter-as-committlogger-is-mocked-out", | |
ID: "delete-entrypoint-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewCosineDistanceProvider(), | |
VectorForIDThunk: testVectorForID, | |
}, ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t)) | |
require.Nil(t, err) | |
// manually build the index | |
index.entryPointID = 6 | |
index.currentMaximumLayer = 1 | |
index.nodes = make([]*vertex, 50) | |
index.nodes[0] = &vertex{ | |
id: 0, | |
connections: [][]uint64{ | |
{1, 2, 3, 4, 5, 6, 7, 8}, | |
}, | |
} | |
index.nodes[1] = &vertex{ | |
id: 1, | |
connections: [][]uint64{ | |
{0, 2, 3, 4, 5, 6, 7, 8}, | |
}, | |
} | |
index.nodes[2] = &vertex{ | |
id: 2, | |
connections: [][]uint64{ | |
{1, 0, 3, 4, 5, 6, 7, 8}, | |
}, | |
} | |
index.nodes[3] = &vertex{ | |
id: 3, | |
connections: [][]uint64{ | |
{2, 1, 0, 4, 5, 6, 7, 8}, | |
}, | |
} | |
index.nodes[4] = &vertex{ | |
id: 4, | |
connections: [][]uint64{ | |
{3, 2, 1, 0, 5, 6, 7, 8}, | |
}, | |
} | |
index.nodes[5] = &vertex{ | |
id: 5, | |
connections: [][]uint64{ | |
{3, 4, 2, 1, 0, 6, 7, 8}, | |
}, | |
} | |
index.nodes[6] = &vertex{ | |
id: 6, | |
connections: [][]uint64{ | |
{4, 3, 1, 3, 5, 0, 7, 8}, | |
{7}, | |
}, | |
level: 1, | |
} | |
index.nodes[7] = &vertex{ | |
id: 7, | |
connections: [][]uint64{ | |
{6, 4, 3, 5, 2, 1, 0, 8}, | |
{6}, | |
}, | |
level: 1, | |
} | |
index.nodes[8] = &vertex{ | |
id: 8, | |
connections: [][]uint64{ | |
8: {7, 6, 4, 3, 5, 2, 1, 0}, | |
}, | |
} | |
dumpIndex(index, "before delete") | |
t.Run("delete some elements and permanently delete tombstoned elements", | |
func(t *testing.T) { | |
err := index.Delete(6) | |
require.Nil(t, err) | |
err = index.Delete(8) | |
require.Nil(t, err) | |
err = index.CleanUpTombstonedNodes(neverStop) | |
require.Nil(t, err) | |
}) | |
dumpIndex(index, "after delete") | |
expectedResults := []uint64{ | |
3, 5, 4, // cluster 2 | |
7, // cluster 3 with element 6 and 8 deleted | |
2, 1, 0, // cluster 1 | |
} | |
t.Run("verify that the results are correct", func(t *testing.T) { | |
position := 3 | |
res, _, err := index.knnSearchByVector(testVectors[position], 50, 36, nil) | |
require.Nil(t, err) | |
assert.Equal(t, expectedResults, res) | |
}) | |
// t.Fail() | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, index.Drop(context.Background())) | |
}) | |
} | |
func TestDelete_MoreEntrypointIssues(t *testing.T) { | |
vectors := [][]float32{ | |
{7, 1}, | |
{8, 2}, | |
{23, 14}, | |
{6.5, -1}, | |
} | |
vecForID := func(ctx context.Context, id uint64) ([]float32, error) { | |
return vectors[int(id)], nil | |
} | |
// This test is motivated by flakyness of other tests. We seemed to have | |
// experienced a failure with the following structure | |
// | |
// ID: thing_geoupdatetestclass_single_location | |
// Entrypoint: 2 | |
// Max Level: 1 | |
// Tombstones map[0:{} 1:{}] | |
// | |
// Nodes and Connections: | |
// Node 0 | |
// Level 0: Connections: [1] | |
// Node 1 | |
// Level 0: Connections: [0 2] | |
// Level 1: Connections: [2] | |
// Node 2 | |
// Level 1: Connections: [1] | |
// Level 0: Connections: [1] | |
index, err := New(Config{ | |
RootPath: "doesnt-matter-as-committlogger-is-mocked-out", | |
ID: "more-delete-entrypoint-flakyness-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewGeoProvider(), | |
VectorForIDThunk: vecForID, | |
TempVectorForIDThunk: TempVectorForIDThunk(vectors), | |
}, ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t)) | |
require.Nil(t, err) | |
// manually build the index | |
index.entryPointID = 2 | |
index.currentMaximumLayer = 1 | |
index.tombstones = map[uint64]struct{}{ | |
0: {}, | |
1: {}, | |
} | |
index.nodes = make([]*vertex, 50) | |
index.nodes[0] = &vertex{ | |
id: 0, | |
connections: [][]uint64{ | |
0: {1}, | |
}, | |
} | |
index.nodes[1] = &vertex{ | |
id: 1, | |
connections: [][]uint64{ | |
0: {0, 2}, | |
1: {2}, | |
}, | |
} | |
index.nodes[2] = &vertex{ | |
id: 2, | |
connections: [][]uint64{ | |
0: {1}, | |
1: {1}, | |
}, | |
} | |
dumpIndex(index, "before adding another element") | |
t.Run("adding a third element", func(t *testing.T) { | |
vec, _ := testVectorForID(context.TODO(), 3) | |
index.Add(3, vec) | |
}) | |
expectedResults := []uint64{ | |
3, 2, | |
} | |
t.Run("verify that the results are correct", func(t *testing.T) { | |
position := 3 | |
res, _, err := index.knnSearchByVector(testVectors[position], 50, 36, nil) | |
require.Nil(t, err) | |
assert.Equal(t, expectedResults, res) | |
}) | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, index.Drop(context.Background())) | |
}) | |
} | |
func TestDelete_TombstonedEntrypoint(t *testing.T) { | |
vecForID := func(ctx context.Context, id uint64) ([]float32, error) { | |
// always return same vec for all elements | |
return []float32{0.1, 0.2}, nil | |
} | |
index, err := New(Config{ | |
RootPath: "doesnt-matter-as-committlogger-is-mocked-out", | |
ID: "tombstoned-entrypoint-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewCosineDistanceProvider(), | |
VectorForIDThunk: vecForID, | |
TempVectorForIDThunk: TempVectorForIDThunk([][]float32{{0.1, 0.2}}), | |
}, ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// explicitly turn off, so we only focus on the tombstoned periods | |
CleanupIntervalSeconds: 0, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), testinghelpers.NewDummyStore(t)) | |
require.Nil(t, err) | |
objVec := []float32{0.1, 0.2} | |
searchVec := []float32{0.05, 0.05} | |
require.Nil(t, index.Add(0, objVec)) | |
require.Nil(t, index.Delete(0)) | |
require.Nil(t, index.Add(1, objVec)) | |
res, _, err := index.SearchByVector(searchVec, 100, nil) | |
require.Nil(t, err) | |
assert.Equal(t, []uint64{1}, res, "should contain the only result") | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, index.Drop(context.Background())) | |
}) | |
} | |
func TestDelete_Flakyness_gh_1369(t *testing.T) { | |
// parse a snapshot form a flaky test | |
snapshotBefore := []byte(`{"labels":["ran a cleanup cycle"],"id":"delete-test","entrypoint":3,"currentMaximumLayer":3,"tombstones":{},"nodes":[{"id":1,"level":0,"connections":{"0":[11,25,33,3,29,32,5,19,30,7,17,27,21,31,36,34,35,23,15,9,13]}},{"id":3,"level":3,"connections":{"0":[1,29,11,5,25,33,19,32,7,17,30,21,35,31,27,36,23,34,9,15,13],"1":[29,36,13],"2":[29,36],"3":[36]}},{"id":5,"level":0,"connections":{"0":[29,19,7,32,35,21,1,31,3,33,23,25,11,17,36,27,30,9,15,34,13]}},{"id":7,"level":0,"connections":{"0":[32,19,21,31,5,35,23,29,33,36,17,1,9,27,25,30,11,3,15,13,34]}},{"id":9,"level":0,"connections":{"0":[36,23,31,21,15,17,27,7,32,35,30,13,19,33,5,25,29,11,1,34,3]}},{"id":11,"level":0,"connections":{"0":[25,33,1,30,17,3,27,32,34,29,19,7,5,36,15,21,31,23,9,13,35]}},{"id":13,"level":1,"connections":{"0":[15,27,34,36,30,17,9,33,25,31,23,21,11,32,7,1,19,35,5,29,3],"1":[36,29,3]}},{"id":15,"level":0,"connections":{"0":[13,27,36,17,30,9,34,33,31,23,25,21,32,11,7,1,19,35,5,29,3]}},{"id":17,"level":0,"connections":{"0":[27,30,36,33,15,32,25,31,9,11,21,7,23,1,34,13,19,5,29,35,3]}},{"id":19,"level":0,"connections":{"0":[5,7,32,29,35,21,31,23,1,33,17,3,25,36,11,27,9,30,15,34,13]}},{"id":21,"level":0,"connections":{"0":[31,23,7,35,32,19,9,36,5,17,27,33,29,30,15,1,25,11,3,13,34]}},{"id":23,"level":0,"connections":{"0":[31,21,9,35,7,36,32,19,17,5,27,33,15,29,30,25,1,13,11,3,34]}},{"id":25,"level":0,"connections":{"0":[11,33,1,30,17,27,32,3,34,29,7,19,36,5,15,21,31,23,9,13,35]}},{"id":27,"level":0,"connections":{"0":[17,30,36,15,33,25,13,9,34,32,11,31,21,7,23,1,19,5,29,35,3]}},{"id":29,"level":2,"connections":{"0":[5,19,32,7,3,1,33,35,21,25,31,11,23,17,30,36,27,9,15,34,13],"1":[3,36,13],"2":[3,36]}},{"id":30,"level":0,"connections":{"0":[27,17,33,25,15,36,11,34,32,1,13,9,31,7,21,23,19,29,5,3,35]}},{"id":31,"level":0,"connections":{"0":[21,23,7,32,35,9,36,19,17,5,27,33,29,30,15,25,1,11,13,3,34]}},{"id":32,"level":0,"connections":{"0":[7,19,21,31,5,33,29,17,23,1,35,36,25,27,30,11,9,3,15,34,13]}},{"id":33,"level":0,"connections":{"0":[25,11,1,17,30,32,27,7,19,36,29,5,21,31,3,34,15,23,9,35,13]}},{"id":34,"level":0,"connections":{"0":[30,27,15,13,25,17,11,33,36,1,32,9,31,7,21,3,23,19,29,5,35]}},{"id":35,"level":0,"connections":{"0":[21,7,31,23,19,5,32,29,9,36,17,33,1,27,25,30,3,11,15,13,34]}},{"id":36,"level":3,"connections":{"0":[17,9,27,15,31,23,21,30,32,7,33,13,25,19,35,11,34,1,5,29,3],"1":[13,29,3],"2":[29,3],"3":[3]}}]} | |
`) | |
vectors := vectorsForDeleteTest() | |
vecForID := func(ctx context.Context, id uint64) ([]float32, error) { | |
return vectors[int(id)], nil | |
} | |
index, err := NewFromJSONDumpMap(snapshotBefore, vecForID) | |
require.Nil(t, err) | |
index.forbidFlat = true | |
var control []uint64 | |
t.Run("control search before delete with the respective allow list", func(t *testing.T) { | |
allowList := helpers.NewAllowList() | |
for i := range vectors { | |
if i%2 == 0 { | |
continue | |
} | |
allowList.Insert(uint64(i)) | |
} | |
res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, allowList) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
control = res | |
}) | |
t.Run("delete the remaining even entries", func(t *testing.T) { | |
require.Nil(t, index.Delete(30)) | |
require.Nil(t, index.Delete(32)) | |
require.Nil(t, index.Delete(34)) | |
require.Nil(t, index.Delete(36)) | |
}) | |
t.Run("verify against control BEFORE Tombstone Cleanup", func(t *testing.T) { | |
res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
assert.Equal(t, control, res) | |
}) | |
t.Run("clean up tombstoned nodes", func(t *testing.T) { | |
require.Nil(t, index.CleanUpTombstonedNodes(neverStop)) | |
}) | |
t.Run("verify against control AFTER Tombstone Cleanup", func(t *testing.T) { | |
res, _, err := index.SearchByVector([]float32{0.1, 0.1, 0.1}, 20, nil) | |
require.Nil(t, err) | |
require.True(t, len(res) > 0) | |
assert.Equal(t, control, res) | |
}) | |
t.Run("now delete the entrypoint", func(t *testing.T) { | |
require.Nil(t, index.Delete(index.entryPointID)) | |
}) | |
t.Run("clean up tombstoned nodes", func(t *testing.T) { | |
require.Nil(t, index.CleanUpTombstonedNodes(neverStop)) | |
}) | |
t.Run("now delete the entrypoint", func(t *testing.T) { | |
// this verifies that our findNewLocalEntrypoint also works when the global | |
// entrypoint is affected | |
require.Nil(t, index.Delete(index.entryPointID)) | |
}) | |
t.Run("clean up tombstoned nodes", func(t *testing.T) { | |
require.Nil(t, index.CleanUpTombstonedNodes(neverStop)) | |
}) | |
t.Run("destroy the index", func(t *testing.T) { | |
require.Nil(t, index.Drop(context.Background())) | |
}) | |
} | |
func bruteForceCosine(vectors [][]float32, query []float32, k int) []uint64 { | |
type distanceAndIndex struct { | |
distance float32 | |
index uint64 | |
} | |
distances := make([]distanceAndIndex, len(vectors)) | |
d := distancer.NewCosineDistanceProvider().New(distancer.Normalize(query)) | |
for i, vec := range vectors { | |
dist, _, _ := d.Distance(distancer.Normalize(vec)) | |
distances[i] = distanceAndIndex{ | |
index: uint64(i), | |
distance: dist, | |
} | |
} | |
sort.Slice(distances, func(a, b int) bool { | |
return distances[a].distance < distances[b].distance | |
}) | |
if len(distances) < k { | |
k = len(distances) | |
} | |
out := make([]uint64, k) | |
for i := 0; i < k; i++ { | |
out[i] = distances[i].index | |
} | |
return out | |
} | |
func neverStop() bool { | |
return false | |
} | |
// This test simulates what happens when the EP is removed from the | |
// VectorForID-serving store | |
func Test_DeleteEPVecInUnderlyingObjectStore(t *testing.T) { | |
var vectorIndex *hnsw | |
vectors := [][]float32{ | |
{1, 1}, | |
{2, 2}, | |
{3, 3}, | |
} | |
vectorErrors := []error{ | |
nil, | |
nil, | |
nil, | |
} | |
store := testinghelpers.NewDummyStore(t) | |
t.Run("import the test vectors", func(t *testing.T) { | |
index, err := New(Config{ | |
RootPath: "doesnt-matter-as-committlogger-is-mocked-out", | |
ID: "delete-ep-in-underlying-store-test", | |
MakeCommitLoggerThunk: MakeNoopCommitLogger, | |
DistanceProvider: distancer.NewL2SquaredProvider(), | |
VectorForIDThunk: func(ctx context.Context, id uint64) ([]float32, error) { | |
fmt.Printf("vec for pos=%d is %v\n", id, vectors[int(id)]) | |
return vectors[int(id)], vectorErrors[int(id)] | |
}, | |
TempVectorForIDThunk: TempVectorForIDThunk(vectors), | |
}, ent.UserConfig{ | |
MaxConnections: 30, | |
EFConstruction: 128, | |
// The actual size does not matter for this test, but if it defaults to | |
// zero it will constantly think it's full and needs to be deleted - even | |
// after just being deleted, so make sure to use a positive number here. | |
VectorCacheMaxObjects: 100000, | |
}, cyclemanager.NewCallbackGroupNoop(), cyclemanager.NewCallbackGroupNoop(), | |
cyclemanager.NewCallbackGroupNoop(), store) | |
require.Nil(t, err) | |
vectorIndex = index | |
for i, vec := range vectors { | |
err := vectorIndex.Add(uint64(i), vec) | |
require.Nil(t, err) | |
} | |
fmt.Printf("ep is %d\n", vectorIndex.entryPointID) | |
}) | |
t.Run("simulate ep vec deletion in object store", func(t *testing.T) { | |
vectors[0] = nil | |
vectorErrors[0] = storobj.NewErrNotFoundf(0, "deleted") | |
vectorIndex.cache.Delete(context.Background(), 0) | |
}) | |
t.Run("try to insert a fourth vector", func(t *testing.T) { | |
vectors = append(vectors, []float32{4, 4}) | |
vectorErrors = append(vectorErrors, nil) | |
pos := len(vectors) - 1 | |
err := vectorIndex.Add(uint64(pos), vectors[pos]) | |
require.Nil(t, err) | |
}) | |
} | |