Spaces:
Sleeping
Sleeping
| // _ _ | |
| // __ _____ __ ___ ___ __ _| |_ ___ | |
| // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ | |
| // \ V V / __/ (_| |\ V /| | (_| | || __/ | |
| // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| | |
| // | |
| // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. | |
| // | |
| // CONTACT: [email protected] | |
| // | |
| package objects | |
| import ( | |
| "context" | |
| "fmt" | |
| "testing" | |
| "github.com/go-openapi/strfmt" | |
| "github.com/sirupsen/logrus/hooks/test" | |
| "github.com/stretchr/testify/assert" | |
| "github.com/stretchr/testify/mock" | |
| "github.com/stretchr/testify/require" | |
| "github.com/weaviate/weaviate/entities/models" | |
| "github.com/weaviate/weaviate/entities/schema" | |
| "github.com/weaviate/weaviate/entities/vectorindex/hnsw" | |
| "github.com/weaviate/weaviate/usecases/config" | |
| ) | |
| func Test_BatchManager_AddObjects_WithNoVectorizerModule(t *testing.T) { | |
| var ( | |
| vectorRepo *fakeVectorRepo | |
| modulesProvider *fakeModulesProvider | |
| manager *BatchManager | |
| ) | |
| schema := schema.Schema{ | |
| Objects: &models.Schema{ | |
| Classes: []*models.Class{ | |
| { | |
| Vectorizer: config.VectorizerModuleNone, | |
| Class: "Foo", | |
| VectorIndexConfig: hnsw.UserConfig{}, | |
| }, | |
| { | |
| Vectorizer: config.VectorizerModuleNone, | |
| Class: "FooSkipped", | |
| VectorIndexConfig: hnsw.UserConfig{ | |
| Skip: true, | |
| }, | |
| }, | |
| }, | |
| }, | |
| } | |
| resetAutoSchema := func(autoSchema bool) { | |
| vectorRepo = &fakeVectorRepo{} | |
| config := &config.WeaviateConfig{ | |
| Config: config.Config{ | |
| AutoSchema: config.AutoSchema{ | |
| Enabled: autoSchema, | |
| }, | |
| TrackVectorDimensions: true, | |
| }, | |
| } | |
| locks := &fakeLocks{} | |
| schemaManager := &fakeSchemaManager{ | |
| GetSchemaResponse: schema, | |
| } | |
| logger, _ := test.NewNullLogger() | |
| authorizer := &fakeAuthorizer{} | |
| modulesProvider = getFakeModulesProvider() | |
| manager = NewBatchManager(vectorRepo, modulesProvider, locks, | |
| schemaManager, config, logger, authorizer, nil) | |
| } | |
| reset := func() { | |
| resetAutoSchema(false) | |
| } | |
| ctx := context.Background() | |
| t.Run("without any objects", func(t *testing.T) { | |
| reset() | |
| expectedErr := NewErrInvalidUserInput("invalid param 'objects': cannot be empty, need at least" + | |
| " one object for batching") | |
| _, err := manager.AddObjects(ctx, nil, []*models.Object{}, []*string{}, nil) | |
| assert.Equal(t, expectedErr, err) | |
| }) | |
| t.Run("with objects without IDs", func(t *testing.T) { | |
| reset() | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| objects := []*models.Object{ | |
| { | |
| Class: "Foo", | |
| Vector: []float32{0.1, 0.1, 0.1111}, | |
| }, | |
| { | |
| Class: "Foo", | |
| Vector: []float32{0.2, 0.2, 0.2222}, | |
| }, | |
| } | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(nil, nil) | |
| } | |
| _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) | |
| assert.Nil(t, err) | |
| require.Len(t, repoCalledWithObjects, 2) | |
| assert.Len(t, repoCalledWithObjects[0].UUID, 36, | |
| "a uuid was set for the first object") | |
| assert.Len(t, repoCalledWithObjects[1].UUID, 36, | |
| "a uuid was set for the second object") | |
| assert.Nil(t, repoCalledWithObjects[0].Err) | |
| assert.Nil(t, repoCalledWithObjects[1].Err) | |
| assert.Equal(t, []float32{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Vector, | |
| "the correct vector was used") | |
| assert.Equal(t, []float32{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Vector, | |
| "the correct vector was used") | |
| }) | |
| t.Run("with objects without IDs and nonexistent class and auto schema enabled", func(t *testing.T) { | |
| resetAutoSchema(true) | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| objects := []*models.Object{ | |
| { | |
| Class: "NonExistentFoo", | |
| Vector: []float32{0.1, 0.1, 0.1111}, | |
| }, | |
| { | |
| Class: "NonExistentFoo", | |
| Vector: []float32{0.2, 0.2, 0.2222}, | |
| }, | |
| } | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(nil, nil) | |
| } | |
| _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) | |
| assert.Nil(t, err) | |
| require.Len(t, repoCalledWithObjects, 2) | |
| assert.Len(t, repoCalledWithObjects[0].UUID, 36, | |
| "a uuid was set for the first object") | |
| assert.Len(t, repoCalledWithObjects[1].UUID, 36, | |
| "a uuid was set for the second object") | |
| assert.Nil(t, repoCalledWithObjects[0].Err) | |
| assert.Nil(t, repoCalledWithObjects[1].Err) | |
| assert.Equal(t, []float32{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Vector, | |
| "the correct vector was used") | |
| assert.Equal(t, []float32{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Vector, | |
| "the correct vector was used") | |
| }) | |
| t.Run("with user-specified IDs", func(t *testing.T) { | |
| reset() | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| id1 := strfmt.UUID("2d3942c3-b412-4d80-9dfa-99a646629cd2") | |
| id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6") | |
| objects := []*models.Object{ | |
| { | |
| ID: id1, | |
| Class: "Foo", | |
| Vector: []float32{0.1, 0.1, 0.1111}, | |
| }, | |
| { | |
| ID: id2, | |
| Class: "Foo", | |
| Vector: []float32{0.2, 0.2, 0.2222}, | |
| }, | |
| } | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(nil, nil) | |
| } | |
| _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) | |
| assert.Nil(t, err) | |
| require.Len(t, repoCalledWithObjects, 2) | |
| assert.Equal(t, id1, repoCalledWithObjects[0].UUID, "the user-specified uuid was used") | |
| assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used") | |
| assert.Nil(t, repoCalledWithObjects[0].Err) | |
| assert.Nil(t, repoCalledWithObjects[1].Err) | |
| assert.Equal(t, []float32{0.1, 0.1, 0.1111}, repoCalledWithObjects[0].Vector, | |
| "the correct vector was used") | |
| assert.Equal(t, []float32{0.2, 0.2, 0.2222}, repoCalledWithObjects[1].Vector, | |
| "the correct vector was used") | |
| }) | |
| t.Run("with an invalid user-specified IDs", func(t *testing.T) { | |
| reset() | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| id1 := strfmt.UUID("invalid") | |
| id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6") | |
| objects := []*models.Object{ | |
| { | |
| ID: id1, | |
| Class: "Foo", | |
| Vector: []float32{0.1, 0.1, 0.1111}, | |
| }, | |
| { | |
| ID: id2, | |
| Class: "Foo", | |
| Vector: []float32{0.2, 0.2, 0.2222}, | |
| }, | |
| } | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(nil, nil) | |
| } | |
| _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) | |
| assert.Nil(t, err) | |
| require.Len(t, repoCalledWithObjects, 2) | |
| assert.Equal(t, repoCalledWithObjects[0].Err.Error(), fmt.Sprintf("invalid UUID length: %d", len(id1))) | |
| assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used") | |
| }) | |
| t.Run("without any vectors", func(t *testing.T) { | |
| // prior to v1.10 this was the desired behavior: | |
| // note that this should fail on class Foo, but be accepted on class | |
| // FooSkipped | |
| // | |
| // However, since v1.10, it is acceptable to exclude a vector, even if | |
| // indexing is not skipped. In this case only the individual element is | |
| // skipped. See https://github.com/weaviate/weaviate/issues/1800 | |
| reset() | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| objects := []*models.Object{ | |
| { | |
| Class: "Foo", | |
| }, | |
| { | |
| Class: "FooSkipped", | |
| }, | |
| } | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(nil, nil) | |
| } | |
| _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) | |
| assert.Nil(t, err) | |
| require.Len(t, repoCalledWithObjects, 2) | |
| assert.Nil(t, repoCalledWithObjects[0].Err) | |
| assert.Nil(t, repoCalledWithObjects[1].Err) | |
| }) | |
| } | |
| func Test_BatchManager_AddObjects_WithExternalVectorizerModule(t *testing.T) { | |
| var ( | |
| vectorRepo *fakeVectorRepo | |
| modulesProvider *fakeModulesProvider | |
| manager *BatchManager | |
| ) | |
| schema := schema.Schema{ | |
| Objects: &models.Schema{ | |
| Classes: []*models.Class{ | |
| { | |
| Vectorizer: config.VectorizerModuleText2VecContextionary, | |
| VectorIndexConfig: hnsw.UserConfig{}, | |
| Class: "Foo", | |
| }, | |
| }, | |
| }, | |
| } | |
| reset := func() { | |
| vectorRepo = &fakeVectorRepo{} | |
| config := &config.WeaviateConfig{} | |
| locks := &fakeLocks{} | |
| schemaManager := &fakeSchemaManager{ | |
| GetSchemaResponse: schema, | |
| } | |
| logger, _ := test.NewNullLogger() | |
| authorizer := &fakeAuthorizer{} | |
| modulesProvider = getFakeModulesProvider() | |
| manager = NewBatchManager(vectorRepo, modulesProvider, locks, | |
| schemaManager, config, logger, authorizer, nil) | |
| } | |
| ctx := context.Background() | |
| t.Run("without any objects", func(t *testing.T) { | |
| reset() | |
| expectedErr := NewErrInvalidUserInput("invalid param 'objects': cannot be empty, need at least" + | |
| " one object for batching") | |
| _, err := manager.AddObjects(ctx, nil, []*models.Object{}, []*string{}, nil) | |
| assert.Equal(t, expectedErr, err) | |
| }) | |
| t.Run("with objects without IDs", func(t *testing.T) { | |
| reset() | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| expectedVector := []float32{0, 1, 2} | |
| objects := []*models.Object{ | |
| { | |
| Class: "Foo", | |
| }, | |
| { | |
| Class: "Foo", | |
| }, | |
| } | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(expectedVector, nil) | |
| } | |
| _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) | |
| assert.Nil(t, err) | |
| require.Len(t, repoCalledWithObjects, 2) | |
| assert.Len(t, repoCalledWithObjects[0].UUID, 36, "a uuid was set for the first object") | |
| assert.Len(t, repoCalledWithObjects[1].UUID, 36, "a uuid was set for the second object") | |
| assert.Nil(t, repoCalledWithObjects[0].Err) | |
| assert.Nil(t, repoCalledWithObjects[1].Err) | |
| assert.Equal(t, expectedVector, repoCalledWithObjects[0].Vector, | |
| "the correct vector was used") | |
| assert.Equal(t, expectedVector, repoCalledWithObjects[1].Vector, | |
| "the correct vector was used") | |
| }) | |
| t.Run("with user-specified IDs", func(t *testing.T) { | |
| reset() | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| id1 := strfmt.UUID("2d3942c3-b412-4d80-9dfa-99a646629cd2") | |
| id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6") | |
| objects := []*models.Object{ | |
| { | |
| ID: id1, | |
| Class: "Foo", | |
| }, | |
| { | |
| ID: id2, | |
| Class: "Foo", | |
| }, | |
| } | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(nil, nil) | |
| } | |
| _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) | |
| assert.Nil(t, err) | |
| require.Len(t, repoCalledWithObjects, 2) | |
| assert.Equal(t, id1, repoCalledWithObjects[0].UUID, "the user-specified uuid was used") | |
| assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used") | |
| }) | |
| t.Run("with an invalid user-specified IDs", func(t *testing.T) { | |
| reset() | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| id1 := strfmt.UUID("invalid") | |
| id2 := strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6") | |
| objects := []*models.Object{ | |
| { | |
| ID: id1, | |
| Class: "Foo", | |
| }, | |
| { | |
| ID: id2, | |
| Class: "Foo", | |
| }, | |
| } | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(nil, nil) | |
| } | |
| _, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| repoCalledWithObjects := vectorRepo.Calls[0].Arguments[0].(BatchObjects) | |
| assert.Nil(t, err) | |
| require.Len(t, repoCalledWithObjects, 2) | |
| assert.Equal(t, repoCalledWithObjects[0].Err.Error(), fmt.Sprintf("invalid UUID length: %d", len(id1))) | |
| assert.Equal(t, id2, repoCalledWithObjects[1].UUID, "the user-specified uuid was used") | |
| }) | |
| } | |
| func Test_BatchManager_AddObjectsEmptyProperties(t *testing.T) { | |
| var ( | |
| vectorRepo *fakeVectorRepo | |
| modulesProvider *fakeModulesProvider | |
| manager *BatchManager | |
| ) | |
| schema := schema.Schema{ | |
| Objects: &models.Schema{ | |
| Classes: []*models.Class{ | |
| { | |
| Class: "TestClass", | |
| VectorIndexConfig: hnsw.UserConfig{}, | |
| Properties: []*models.Property{ | |
| { | |
| Name: "strings", | |
| DataType: schema.DataTypeTextArray.PropString(), | |
| Tokenization: models.PropertyTokenizationWhitespace, | |
| }, | |
| }, | |
| }, | |
| }, | |
| }, | |
| } | |
| reset := func() { | |
| vectorRepo = &fakeVectorRepo{} | |
| vectorRepo.On("BatchPutObjects", mock.Anything).Return(nil).Once() | |
| config := &config.WeaviateConfig{} | |
| locks := &fakeLocks{} | |
| schemaManager := &fakeSchemaManager{ | |
| GetSchemaResponse: schema, | |
| } | |
| logger, _ := test.NewNullLogger() | |
| authorizer := &fakeAuthorizer{} | |
| modulesProvider = getFakeModulesProvider() | |
| manager = NewBatchManager(vectorRepo, modulesProvider, locks, | |
| schemaManager, config, logger, authorizer, nil) | |
| } | |
| reset() | |
| objects := []*models.Object{ | |
| { | |
| ID: strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff6"), | |
| Class: "TestClass", | |
| }, | |
| { | |
| ID: strfmt.UUID("cf918366-3d3b-4b90-9bc6-bc5ea8762ff3"), | |
| Class: "TestClass", | |
| Properties: map[string]interface{}{ | |
| "name": "testName", | |
| }, | |
| }, | |
| } | |
| require.Nil(t, objects[0].Properties) | |
| require.NotNil(t, objects[1].Properties) | |
| ctx := context.Background() | |
| for range objects { | |
| modulesProvider.On("UpdateVector", mock.Anything, mock.AnythingOfType(FindObjectFn)). | |
| Return(nil, nil) | |
| } | |
| addedObjects, err := manager.AddObjects(ctx, nil, objects, []*string{}, nil) | |
| assert.Nil(t, err) | |
| require.Len(t, addedObjects, 2) | |
| require.NotNil(t, addedObjects[0].Object.Properties) | |
| require.NotNil(t, addedObjects[1].Object.Properties) | |
| } | |