SemanticSearchPOC / adapters /repos /db /helper_for_test.go
KevinStephenson
Adding in weaviate code
b110593
raw
history blame
9.36 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
//go:build integrationTest
// +build integrationTest
package db
import (
"context"
"math/rand"
"testing"
"time"
"github.com/go-openapi/strfmt"
"github.com/google/uuid"
"github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/weaviate/weaviate/adapters/repos/db/indexcheckpoint"
"github.com/weaviate/weaviate/adapters/repos/db/inverted"
"github.com/weaviate/weaviate/adapters/repos/db/inverted/stopwords"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/schema"
"github.com/weaviate/weaviate/entities/storobj"
enthnsw "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
)
func parkingGaragesSchema() schema.Schema {
return schema.Schema{
Objects: &models.Schema{
Classes: []*models.Class{
{
Class: "MultiRefParkingGarage",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
{
Name: "location",
DataType: []string{string(schema.DataTypeGeoCoordinates)},
},
},
},
{
Class: "MultiRefParkingLot",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
},
},
{
Class: "MultiRefCar",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
{
Name: "parkedAt",
DataType: []string{"MultiRefParkingGarage", "MultiRefParkingLot"},
},
},
},
{
Class: "MultiRefDriver",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
{
Name: "drives",
DataType: []string{"MultiRefCar"},
},
},
},
{
Class: "MultiRefPerson",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
{
Name: "friendsWith",
DataType: []string{"MultiRefDriver"},
},
},
},
{
Class: "MultiRefSociety",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
{
Name: "hasMembers",
DataType: []string{"MultiRefPerson"},
},
},
},
// for classifications test
{
Class: "ExactCategory",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
},
},
{
Class: "MainCategory",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{
Name: "name",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWhitespace,
},
},
},
},
},
}
}
func cityCountryAirportSchema() schema.Schema {
return schema.Schema{
Objects: &models.Schema{
Classes: []*models.Class{
{
Class: "Country",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{Name: "name", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace},
},
},
{
Class: "City",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{Name: "name", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace},
{Name: "inCountry", DataType: []string{"Country"}},
{Name: "population", DataType: []string{"int"}},
{Name: "location", DataType: []string{"geoCoordinates"}},
},
},
{
Class: "Airport",
VectorIndexConfig: enthnsw.NewDefaultUserConfig(),
InvertedIndexConfig: invertedConfig(),
Properties: []*models.Property{
{Name: "code", DataType: schema.DataTypeText.PropString(), Tokenization: models.PropertyTokenizationWhitespace},
{Name: "phone", DataType: []string{"phoneNumber"}},
{Name: "inCity", DataType: []string{"City"}},
},
},
},
},
}
}
func testCtx() context.Context {
//nolint:govet
ctx, _ := context.WithTimeout(context.Background(), 30*time.Second)
return ctx
}
func getRandomSeed() *rand.Rand {
return rand.New(rand.NewSource(time.Now().UnixNano()))
}
func testShard(t *testing.T, ctx context.Context, className string, indexOpts ...func(*Index)) (ShardLike, *Index) {
return testShardWithSettings(t, ctx, &models.Class{Class: className}, enthnsw.UserConfig{Skip: true},
false, false, indexOpts...)
}
func testShardWithSettings(t *testing.T, ctx context.Context, class *models.Class,
vic schema.VectorIndexConfig, withStopwords, withCheckpoints bool, indexOpts ...func(*Index),
) (ShardLike, *Index) {
tmpDir := t.TempDir()
logger, _ := test.NewNullLogger()
maxResults := int64(10_000)
repo, err := New(logger, Config{
MemtablesFlushIdleAfter: 60,
RootPath: tmpDir,
QueryMaximumResults: maxResults,
MaxImportGoroutinesFactor: 1,
}, &fakeRemoteClient{}, &fakeNodeResolver{}, &fakeRemoteNodeClient{}, &fakeReplicationClient{}, nil)
require.Nil(t, err)
shardState := singleShardState()
sch := schema.Schema{
Objects: &models.Schema{
Classes: []*models.Class{class},
},
}
schemaGetter := &fakeSchemaGetter{shardState: shardState, schema: sch}
iic := schema.InvertedIndexConfig{}
if class.InvertedIndexConfig != nil {
iic = inverted.ConfigFromModel(class.InvertedIndexConfig)
}
var sd *stopwords.Detector
if withStopwords {
sd, err = stopwords.NewDetectorFromConfig(iic.Stopwords)
require.NoError(t, err)
}
var checkpts *indexcheckpoint.Checkpoints
if withCheckpoints {
checkpts, err = indexcheckpoint.New(tmpDir, logger)
require.NoError(t, err)
}
idx := &Index{
Config: IndexConfig{
RootPath: tmpDir,
ClassName: schema.ClassName(class.Class),
QueryMaximumResults: maxResults,
},
invertedIndexConfig: iic,
vectorIndexUserConfig: vic,
logger: logger,
getSchema: schemaGetter,
centralJobQueue: repo.jobQueueCh,
stopwords: sd,
indexCheckpoints: checkpts,
}
idx.closingCtx, idx.closingCancel = context.WithCancel(context.Background())
idx.initCycleCallbacksNoop()
for _, opt := range indexOpts {
opt(idx)
}
shardName := shardState.AllPhysicalShards()[0]
shard, err := idx.initShard(ctx, shardName, class, nil)
require.NoError(t, err)
idx.shards.Store(shardName, shard)
return shard, idx
}
func testObject(className string) *storobj.Object {
return &storobj.Object{
MarshallerVersion: 1,
Object: models.Object{
ID: strfmt.UUID(uuid.NewString()),
Class: className,
},
Vector: []float32{1, 2, 3},
}
}
func createRandomObjects(r *rand.Rand, className string, numObj int) []*storobj.Object {
obj := make([]*storobj.Object, numObj)
for i := 0; i < numObj; i++ {
obj[i] = &storobj.Object{
MarshallerVersion: 1,
Object: models.Object{
ID: strfmt.UUID(uuid.NewString()),
Class: className,
},
Vector: []float32{r.Float32(), r.Float32(), r.Float32(), r.Float32()},
}
}
return obj
}