SemanticSearchPOC / usecases /objects /batch_add_test.go
KevinStephenson
Adding in weaviate code
b110593
raw
history blame
14.6 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package objects
import (
"context"
"fmt"
"testing"
"github.com/go-openapi/strfmt"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/schema"
"github.com/weaviate/weaviate/entities/vectorindex/hnsw"
"github.com/weaviate/weaviate/usecases/config"
)
func Test_BatchManager_AddObjects_WithNoVectorizerModule(t *testing.T) {
var (
vectorRepo *fakeVectorRepo
modulesProvider *fakeModulesProvider
manager *BatchManager
)
schema := schema.Schema{
Objects: &models.Schema{
Classes: []*models.Class{
{
Vectorizer: config.VectorizerModuleNone,
Class: "Foo",
VectorIndexConfig: hnsw.UserConfig{},
},
{
Vectorizer: config.VectorizerModuleNone,
Class: "FooSkipped",
VectorIndexConfig: hnsw.UserConfig{
Skip: true,
},
},
},
},
}
resetAutoSchema := func(autoSchema bool) {
vectorRepo = &fakeVectorRepo{}
config := &config.WeaviateConfig{
Config: config.Config{
AutoSchema: config.AutoSchema{
Enabled: autoSchema,
},
TrackVectorDimensions: true,
},
}
locks := &fakeLocks{}
schemaManager := &fakeSchemaManager{
GetSchemaResponse: schema,
}
logger, _ := test.NewNullLogger()
authorizer := &fakeAuthorizer{}
modulesProvider = getFakeModulesProvider()
manager = NewBatchManager(vectorRepo, modulesProvider, locks,
schemaManager, config, logger, authorizer, nil)
}
reset := func() {
resetAutoSchema(false)
}
ctx := context.Background()
t.Run("without any objects", func(t *testing.T) {
reset()
expectedErr := NewErrInvalidUserInput("invalid param 'objects': cannot be empty, need at least" +
" one object for batching")
_, err := manager.AddObjects(ctx, nil, []*models.Object{}, []*string{}, nil)
assert.Equal(t, expectedErr, err)
})
t.Run("with objects without IDs", func(t *testing.T) {
reset()
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
objects := []*models.Object{
{
Class: "Foo",
Vector: []float32{0.1, 0.1, 0.1111},
},
{
Class: "Foo",
Vector: []float32{0.2, 0.2, 0.2222},
},
}
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(nil, nil)
}
_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
assert.Nil(t, err)
require.Len(t, repoCalledWithObjects, 2)
assert.Len(t, repoCalledWithObjects[0].UUID, 36,
"a uuid was set for the first object")
assert.Len(t, repoCalledWithObjects[1].UUID, 36,
"a uuid was set for the second object")
assert.Nil(t, repoCalledWithObjects[0].Err)
assert.Nil(t, repoCalledWithObjects[1].Err)
assert.Equal(t, []float32{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Vector,
"the correct vector was used")
assert.Equal(t, []float32{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Vector,
"the correct vector was used")
})
t.Run("with objects without IDs and nonexistent class and auto schema enabled", func(t *testing.T) {
resetAutoSchema(true)
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
objects := []*models.Object{
{
Class: "NonExistentFoo",
Vector: []float32{0.1, 0.1, 0.1111},
},
{
Class: "NonExistentFoo",
Vector: []float32{0.2, 0.2, 0.2222},
},
}
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(nil, nil)
}
_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
assert.Nil(t, err)
require.Len(t, repoCalledWithObjects, 2)
assert.Len(t, repoCalledWithObjects[0].UUID, 36,
"a uuid was set for the first object")
assert.Len(t, repoCalledWithObjects[1].UUID, 36,
"a uuid was set for the second object")
assert.Nil(t, repoCalledWithObjects[0].Err)
assert.Nil(t, repoCalledWithObjects[1].Err)
assert.Equal(t, []float32{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Vector,
"the correct vector was used")
assert.Equal(t, []float32{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Vector,
"the correct vector was used")
})
t.Run("with user-specified IDs", func(t *testing.T) {
reset()
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
id1 := strfmt.UUID("2d3942c3-b412-4d80-9dfa-99a646629cd2")
id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6")
objects := []*models.Object{
{
ID: id1,
Class: "Foo",
Vector: []float32{0.1, 0.1, 0.1111},
},
{
ID: id2,
Class: "Foo",
Vector: []float32{0.2, 0.2, 0.2222},
},
}
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(nil, nil)
}
_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
assert.Nil(t, err)
require.Len(t, repoCalledWithObjects, 2)
assert.Equal(t, id1, repoCalledWithObjects[0].UUID, "the user-specified uuid was used")
assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used")
assert.Nil(t, repoCalledWithObjects[0].Err)
assert.Nil(t, repoCalledWithObjects[1].Err)
assert.Equal(t, []float32{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Vector,
"the correct vector was used")
assert.Equal(t, []float32{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Vector,
"the correct vector was used")
})
t.Run("with an invalid user-specified IDs", func(t *testing.T) {
reset()
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
id1 := strfmt.UUID("invalid")
id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6")
objects := []*models.Object{
{
ID: id1,
Class: "Foo",
Vector: []float32{0.1, 0.1, 0.1111},
},
{
ID: id2,
Class: "Foo",
Vector: []float32{0.2, 0.2, 0.2222},
},
}
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(nil, nil)
}
_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
assert.Nil(t, err)
require.Len(t, repoCalledWithObjects, 2)
assert.Equal(t, repoCalledWithObjects[0].Err.Error(), fmt.Sprintf("invalid UUID length: %d", len(id1)))
assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used")
})
t.Run("without any vectors", func(t *testing.T) {
// prior to v1.10 this was the desired behavior:
// note that this should fail on class Foo, but be accepted on class
// FooSkipped
//
// However, since v1.10, it is acceptable to exclude a vector, even if
// indexing is not skipped. In this case only the individual element is
// skipped. See https://github.com/weaviate/weaviate/issues/1800
reset()
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
objects := []*models.Object{
{
Class: "Foo",
},
{
Class: "FooSkipped",
},
}
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(nil, nil)
}
_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
assert.Nil(t, err)
require.Len(t, repoCalledWithObjects, 2)
assert.Nil(t, repoCalledWithObjects[0].Err)
assert.Nil(t, repoCalledWithObjects[1].Err)
})
}
func Test_BatchManager_AddObjects_WithExternalVectorizerModule(t *testing.T) {
var (
vectorRepo *fakeVectorRepo
modulesProvider *fakeModulesProvider
manager *BatchManager
)
schema := schema.Schema{
Objects: &models.Schema{
Classes: []*models.Class{
{
Vectorizer: config.VectorizerModuleText2VecContextionary,
VectorIndexConfig: hnsw.UserConfig{},
Class: "Foo",
},
},
},
}
reset := func() {
vectorRepo = &fakeVectorRepo{}
config := &config.WeaviateConfig{}
locks := &fakeLocks{}
schemaManager := &fakeSchemaManager{
GetSchemaResponse: schema,
}
logger, _ := test.NewNullLogger()
authorizer := &fakeAuthorizer{}
modulesProvider = getFakeModulesProvider()
manager = NewBatchManager(vectorRepo, modulesProvider, locks,
schemaManager, config, logger, authorizer, nil)
}
ctx := context.Background()
t.Run("without any objects", func(t *testing.T) {
reset()
expectedErr := NewErrInvalidUserInput("invalid param 'objects': cannot be empty, need at least" +
" one object for batching")
_, err := manager.AddObjects(ctx, nil, []*models.Object{}, []*string{}, nil)
assert.Equal(t, expectedErr, err)
})
t.Run("with objects without IDs", func(t *testing.T) {
reset()
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
expectedVector := []float32{0, 1, 2}
objects := []*models.Object{
{
Class: "Foo",
},
{
Class: "Foo",
},
}
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(expectedVector, nil)
}
_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
assert.Nil(t, err)
require.Len(t, repoCalledWithObjects, 2)
assert.Len(t, repoCalledWithObjects[0].UUID, 36, "a uuid was set for the first object")
assert.Len(t, repoCalledWithObjects[1].UUID, 36, "a uuid was set for the second object")
assert.Nil(t, repoCalledWithObjects[0].Err)
assert.Nil(t, repoCalledWithObjects[1].Err)
assert.Equal(t, expectedVector, repoCalledWithObjects[0].Vector,
"the correct vector was used")
assert.Equal(t, expectedVector, repoCalledWithObjects[1].Vector,
"the correct vector was used")
})
t.Run("with user-specified IDs", func(t *testing.T) {
reset()
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
id1 := strfmt.UUID("2d3942c3-b412-4d80-9dfa-99a646629cd2")
id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6")
objects := []*models.Object{
{
ID: id1,
Class: "Foo",
},
{
ID: id2,
Class: "Foo",
},
}
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(nil, nil)
}
_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
assert.Nil(t, err)
require.Len(t, repoCalledWithObjects, 2)
assert.Equal(t, id1, repoCalledWithObjects[0].UUID, "the user-specified uuid was used")
assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used")
})
t.Run("with an invalid user-specified IDs", func(t *testing.T) {
reset()
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
id1 := strfmt.UUID("invalid")
id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6")
objects := []*models.Object{
{
ID: id1,
Class: "Foo",
},
{
ID: id2,
Class: "Foo",
},
}
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(nil, nil)
}
_, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects)
assert.Nil(t, err)
require.Len(t, repoCalledWithObjects, 2)
assert.Equal(t, repoCalledWithObjects[0].Err.Error(), fmt.Sprintf("invalid UUID length: %d", len(id1)))
assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used")
})
}
func Test_BatchManager_AddObjectsEmptyProperties(t *testing.T) {
var (
vectorRepo *fakeVectorRepo
modulesProvider *fakeModulesProvider
manager *BatchManager
)
schema := schema.Schema{
Objects: &models.Schema{
Classes: []*models.Class{
{
Class: "TestClass",
VectorIndexConfig: hnsw.UserConfig{},
Properties: []*models.Property{
{
Name: "strings",
DataType: schema.DataTypeTextArray.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
},
},
},
},
}
reset := func() {
vectorRepo = &fakeVectorRepo{}
vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once()
config := &config.WeaviateConfig{}
locks := &fakeLocks{}
schemaManager := &fakeSchemaManager{
GetSchemaResponse: schema,
}
logger, _ := test.NewNullLogger()
authorizer := &fakeAuthorizer{}
modulesProvider = getFakeModulesProvider()
manager = NewBatchManager(vectorRepo, modulesProvider, locks,
schemaManager, config, logger, authorizer, nil)
}
reset()
objects := []*models.Object{
{
ID: strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6"),
Class: "TestClass",
},
{
ID: strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff3"),
Class: "TestClass",
Properties: map[string]interface{}{
"name": "testName",
},
},
}
require.Nil(t, objects[0].Properties)
require.NotNil(t, objects[1].Properties)
ctx := context.Background()
for range objects {
modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)).
Return(nil, nil)
}
addedObjects, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil)
assert.Nil(t, err)
require.Len(t, addedObjects, 2)
require.NotNil(t, addedObjects[0].Object.Properties)
require.NotNil(t, addedObjects[1].Object.Properties)
}