KevinStephenson
Adding in weaviate code
b110593
raw
history blame
13 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package acceptance_with_go_client
import (
"context"
"fmt"
"testing"
"github.com/google/uuid"
"github.com/stretchr/testify/require"
client "github.com/weaviate/weaviate-go-client/v4/weaviate"
"github.com/weaviate/weaviate-go-client/v4/weaviate/graphql"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/schema"
)
var paragraphs = []string{
"Some random text",
"Other text",
"completely unrelated",
"this has nothing to do with the rest",
}
var (
TRUE = true
ctx = context.Background()
)
func AddClassAndObjects(t *testing.T, className string, datatype string, c *client.Client, vectorizer string) {
class := &models.Class{
Class: className,
Properties: []*models.Property{
{Name: "contents", DataType: []string{datatype}, Tokenization: "word", IndexFilterable: &TRUE, IndexSearchable: &TRUE},
{Name: "num", DataType: []string{"int"}},
},
InvertedIndexConfig: &models.InvertedIndexConfig{Bm25: &models.BM25Config{K1: 1.2, B: 0.75}},
Vectorizer: vectorizer,
}
require.Nil(t, c.Schema().ClassCreator().WithClass(class).Do(ctx))
creator := c.Data().Creator()
_, err := creator.WithClassName(className).WithProperties(
map[string]interface{}{"contents": []string{"nice", "what a rain day"}, "num": 0}).Do(ctx)
require.Nil(t, err)
_, err = creator.WithClassName(className).WithProperties(
map[string]interface{}{"contents": []string{"rain", "snow and sun at once? nice"}, "num": 1}).Do(ctx)
require.Nil(t, err)
_, err = creator.WithClassName(className).WithProperties(
map[string]interface{}{"contents": []string{
"super long text to get the score down",
"snow and sun at the same time? How nice",
"long text without any meaning",
"just ignore this",
"this too, it doesn't matter",
}, "num": 2}).Do(ctx)
_, err = creator.WithClassName(className).WithProperties(
map[string]interface{}{"contents": []string{
"super long text to get the score down",
"rain is necessary",
"long text without any meaning",
"just ignore this",
"this too, it doesn't matter",
}, "num": 3}).Do(ctx)
}
func TestSearchOnArrays(t *testing.T) {
ctx := context.Background()
c, err := client.NewClient(client.Config{Scheme: "http", Host: "localhost:8080"})
require.Nil(t, err)
c.Schema().AllDeleter().Do(ctx)
cases := []struct {
datatype schema.DataType
useHybrid bool // bm25 if not
}{
{datatype: schema.DataTypeTextArray, useHybrid: true},
{datatype: schema.DataTypeTextArray, useHybrid: false},
// deprecated string
{datatype: schema.DataTypeStringArray, useHybrid: false},
}
for _, tt := range cases {
t.Run("arrays "+tt.datatype.String(), func(t *testing.T) {
className := "Paragraph15845"
class := &models.Class{
Class: className,
Properties: []*models.Property{
{
Name: "contents",
DataType: tt.datatype.PropString(),
Tokenization: models.PropertyTokenizationWord,
IndexFilterable: &vFalse,
IndexSearchable: &vTrue,
},
{
Name: "num",
DataType: schema.DataTypeInt.PropString(),
IndexFilterable: &vTrue,
},
},
InvertedIndexConfig: &models.InvertedIndexConfig{Bm25: &models.BM25Config{K1: 1.2, B: 0.75}},
Vectorizer: "none",
}
require.Nil(t, c.Schema().ClassCreator().WithClass(class).Do(ctx))
defer c.Schema().ClassDeleter().WithClassName(className).Do(ctx)
creator := c.Data().Creator()
_, err := creator.WithClassName(className).WithProperties(
map[string]interface{}{"contents": []string{"what a nice day", "what a rainy day"}, "num": 0}).Do(ctx)
require.Nil(t, err)
_, err = creator.WithClassName(className).WithProperties(
map[string]interface{}{"contents": []string{"rain all day", "snow and sun at the same time? How nice"}, "num": 1}).Do(ctx)
require.Nil(t, err)
var results *models.GraphQLResponse
if tt.useHybrid {
builder := c.GraphQL().HybridArgumentBuilder().WithQuery("nice").WithAlpha(0)
results, err = c.GraphQL().Get().WithClassName(className).WithHybrid(builder).WithFields(graphql.Field{Name: "num"}).Do(ctx)
require.Nil(t, err)
} else {
builder := c.GraphQL().Bm25ArgBuilder().WithQuery("nice").WithProperties("contents")
results, err = c.GraphQL().Get().WithClassName(className).WithBM25(builder).WithFields(graphql.Field{Name: "num"}).Do(ctx)
require.Nil(t, err)
}
result := results.Data["Get"].(map[string]interface{})[className].([]interface{})
require.Len(t, result, 2)
require.Equal(t, 0., result[0].(map[string]interface{})["num"])
require.Equal(t, 1., result[1].(map[string]interface{})["num"])
})
}
}
func TestSearchOnSomeProperties(t *testing.T) {
ctx := context.Background()
c, err := client.NewClient(client.Config{Scheme: "http", Host: "localhost:8080"})
require.Nil(t, err)
c.Schema().AllDeleter().Do(ctx)
// only one property contains the search term
cases := []struct {
queryType string // hybrid or bm25
property string
results int
}{
{queryType: "bm25", property: "one", results: 1},
{queryType: "hybrid", property: "one", results: 1},
{queryType: "bm25", property: "two", results: 0},
{queryType: "hybrid", property: "two", results: 0},
}
for _, tt := range cases {
t.Run("search on some properties "+tt.queryType, func(t *testing.T) {
className := "Paragraph15845"
class := &models.Class{
Class: className,
Properties: []*models.Property{
{
Name: "one",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWord,
IndexFilterable: &vFalse,
IndexSearchable: &vTrue,
},
{
Name: "two",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWord,
IndexFilterable: &vFalse,
IndexSearchable: &vTrue,
},
},
InvertedIndexConfig: &models.InvertedIndexConfig{Bm25: &models.BM25Config{K1: 1.2, B: 0.75}},
Vectorizer: "none",
}
require.Nil(t, c.Schema().ClassCreator().WithClass(class).Do(ctx))
defer c.Schema().ClassDeleter().WithClassName(className).Do(ctx)
creator := c.Data().Creator()
_, err := creator.WithClassName(className).WithProperties(
map[string]interface{}{"one": "hello", "two": "world"}).Do(ctx)
require.Nil(t, err)
alpha := ""
if tt.queryType == "hybrid" {
alpha = "alpha:0" // exclude vector search, it doesn't matter for this testcase
}
results, err := c.GraphQL().Raw().WithQuery(fmt.Sprintf("{Get{%s(%s:{query:\"hello\", properties: [\"%s\"] %s} ){_additional{id}}}}", className, tt.queryType, tt.property, alpha)).Do(ctx)
result := results.Data["Get"].(map[string]interface{})[className].([]interface{})
require.Len(t, result, tt.results)
})
}
}
func TestAutocut(t *testing.T) {
ctx := context.Background()
c := client.New(client.Config{Scheme: "http", Host: "localhost:8080"})
c.Schema().AllDeleter().Do(ctx)
className := "Paragraph453745"
AddClassAndObjects(t, className, string(schema.DataTypeTextArray), c, "none")
defer c.Schema().ClassDeleter().WithClassName(className).Do(ctx)
searchQuery := []string{"hybrid:{query:\"rain nice\", alpha: 0.0, fusionType: relativeScoreFusion", "bm25:{query:\"rain nice\""}
cases := []struct {
autocut int
numResults int
}{
{autocut: 1, numResults: 2}, {autocut: 2, numResults: 4}, {autocut: -1, numResults: 4 /*disabled*/},
}
for _, tt := range cases {
for _, search := range searchQuery {
t.Run("autocut "+fmt.Sprint(tt.autocut, " ", search), func(t *testing.T) {
results, err := c.GraphQL().Raw().WithQuery(fmt.Sprintf("{Get{%s(%s, properties: [\"contents\"]}, autocut: %d){num}}}", className, search, tt.autocut)).Do(ctx)
require.Nil(t, err)
result := results.Data["Get"].(map[string]interface{})[className].([]interface{})
require.Len(t, result, tt.numResults)
require.Equal(t, 0., result[0].(map[string]interface{})["num"])
require.Equal(t, 1., result[1].(map[string]interface{})["num"])
})
}
}
}
func TestHybridWithPureVectorSearch(t *testing.T) {
ctx := context.Background()
c := client.New(client.Config{Scheme: "http", Host: "localhost:8080"})
c.Schema().AllDeleter().Do(ctx)
className := "ParagraphWithManyWords"
AddClassAndObjects(t, className, string(schema.DataTypeTextArray), c, "text2vec-contextionary")
defer c.Schema().ClassDeleter().WithClassName(className).Do(ctx)
results, err := c.GraphQL().Raw().WithQuery(fmt.Sprintf("{Get{%s(hybrid: {query: \"rain nice\" properties: [\"contents\"], alpha:1}, autocut: -1){num}}}", className)).Do(ctx)
require.Nil(t, err)
result := results.Data["Get"].(map[string]interface{})[className].([]interface{})
require.Len(t, result, 4)
}
func TestNearVectorAndObjectAutocut(t *testing.T) {
ctx := context.Background()
c := client.New(client.Config{Scheme: "http", Host: "localhost:8080"})
c.Schema().AllDeleter().Do(ctx)
className := "YellowAndBlueTrain"
class := &models.Class{
Class: className,
Vectorizer: "none",
}
require.Nil(t, c.Schema().ClassCreator().WithClass(class).Do(ctx))
defer c.Schema().ClassDeleter().WithClassName(className).Do(ctx)
var uuids []string
creator := c.Data().Creator()
vectorNumbers := []float32{1, 1.1, 1.2, 2.0, 2.1, 2.2, 3.1, 3.2, 3.2}
for _, vectorNumber := range vectorNumbers {
uuids = append(uuids, uuid.New().String())
_, err := creator.WithClassName(className).WithVector([]float32{1, 1, 1, 1, 1, vectorNumber}).WithID(uuids[len(uuids)-1]).Do(ctx)
require.Nil(t, err)
}
t.Run("near vector", func(t *testing.T) {
cases := []struct {
autocut int
numResults int
}{
{autocut: 1, numResults: 3}, {autocut: 2, numResults: 6}, {autocut: -1, numResults: 9 /*disabled*/},
}
for _, tt := range cases {
t.Run("autocut "+fmt.Sprint(tt.autocut), func(t *testing.T) {
results, err := c.GraphQL().Raw().WithQuery(fmt.Sprintf("{Get{%s(nearVector:{vector:[1, 1, 1, 1, 1, 1]}, autocut: %d){_additional{vector}}}}", className, tt.autocut)).Do(ctx)
require.Nil(t, err)
result := results.Data["Get"].(map[string]interface{})[className].([]interface{})
require.Len(t, result, tt.numResults)
})
}
})
t.Run("near object", func(t *testing.T) {
cases := []struct {
autocut int
numResults int
}{
{autocut: 1, numResults: 3}, {autocut: 2, numResults: 6}, {autocut: -1, numResults: 9 /*disabled*/},
}
for _, tt := range cases {
t.Run("autocut "+fmt.Sprint(tt.autocut), func(t *testing.T) {
results, err := c.GraphQL().Raw().WithQuery(fmt.Sprintf("{Get{%s(nearObject:{id:%q}, autocut: %d){_additional{vector}}}}", className, uuids[0], tt.autocut)).Do(ctx)
require.Nil(t, err)
result := results.Data["Get"].(map[string]interface{})[className].([]interface{})
require.Len(t, result, tt.numResults)
})
}
})
}
func TestNearTextAutocut(t *testing.T) {
ctx := context.Background()
c := client.New(client.Config{Scheme: "http", Host: "localhost:8080"})
c.Schema().AllDeleter().Do(ctx)
className := "YellowAndBlueSub"
class := &models.Class{
Class: className,
Properties: []*models.Property{
{
Name: "text",
DataType: schema.DataTypeText.PropString(),
Tokenization: models.PropertyTokenizationWord,
},
},
Vectorizer: "text2vec-contextionary",
}
require.Nil(t, c.Schema().ClassCreator().WithClass(class).Do(ctx))
defer c.Schema().ClassDeleter().WithClassName(className).Do(ctx)
creator := c.Data().Creator()
texts := []string{"word", "another word", "another word and", "completely unrelated"}
for _, text := range texts {
_, err := creator.WithClassName(className).WithProperties(map[string]interface{}{"text": text}).Do(ctx)
require.Nil(t, err)
}
cases := []struct {
autocut int
numResults int
}{
{autocut: 1, numResults: 3}, {autocut: -1, numResults: 4 /*disabled*/},
}
for _, tt := range cases {
t.Run("autocut "+fmt.Sprint(tt.autocut), func(t *testing.T) {
results, err := c.GraphQL().Raw().WithQuery(fmt.Sprintf("{Get{%s(nearText:{concepts: \"word\"}, autocut: %d){_additional{vector}}}}", className, tt.autocut)).Do(ctx)
require.Nil(t, err)
result := results.Data["Get"].(map[string]interface{})[className].([]interface{})
require.Len(t, result, tt.numResults)
})
}
}