KevinStephenson
Adding in weaviate code
b110593
raw
history blame
5.59 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package inverted
import (
"math"
"testing"
"github.com/stretchr/testify/assert"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/schema"
"github.com/weaviate/weaviate/usecases/config"
)
const float64EqualityThreshold = 1e-6
func almostEqual(t *testing.T, a, b float64) bool {
closeEnough := math.Abs(a-b) <= float64EqualityThreshold
if !closeEnough {
t.Logf("%f and %f differ by more than a threshold of %f",
a, b, float64EqualityThreshold)
}
return closeEnough
}
func TestValidateConfig(t *testing.T) {
t.Run("with invalid BM25.k1", func(t *testing.T) {
in := &models.InvertedIndexConfig{
Bm25: &models.BM25Config{
K1: -1,
B: 0.7,
},
}
err := ValidateConfig(in)
assert.EqualError(t, err, "BM25.k1 must be >= 0")
})
t.Run("with invalid BM25.b", func(t *testing.T) {
in := &models.InvertedIndexConfig{
Bm25: &models.BM25Config{
K1: 1,
B: 1.001,
},
}
err := ValidateConfig(in)
assert.EqualError(t, err, "BM25.b must be <= 0 and <= 1")
})
t.Run("with valid config", func(t *testing.T) {
in := &models.InvertedIndexConfig{
Bm25: &models.BM25Config{
K1: 1,
B: 0.1,
},
}
err := ValidateConfig(in)
assert.Nil(t, err)
})
t.Run("with nonexistent stopword preset", func(t *testing.T) {
in := &models.InvertedIndexConfig{
Stopwords: &models.StopwordConfig{
Preset: "DNE",
},
}
err := ValidateConfig(in)
assert.EqualError(t, err, "stopwordPreset 'DNE' does not exist")
})
t.Run("with whitespace stopword additions", func(t *testing.T) {
additions := [][]string{
{"bats", " "},
{""},
{"something", " ", "skippable"},
}
for _, addList := range additions {
in := &models.InvertedIndexConfig{
Stopwords: &models.StopwordConfig{
Additions: addList,
},
}
err := ValidateConfig(in)
assert.EqualError(t, err, "cannot use whitespace in stopword.additions")
}
})
t.Run("with whitespace stopword removals", func(t *testing.T) {
removals := [][]string{
{"bats", " "},
{""},
{"something", " ", "skippable"},
}
for _, remList := range removals {
in := &models.InvertedIndexConfig{
Stopwords: &models.StopwordConfig{
Removals: remList,
},
}
err := ValidateConfig(in)
assert.EqualError(t, err, "cannot use whitespace in stopword.removals")
}
})
t.Run("with shared additions/removals items", func(t *testing.T) {
in := &models.InvertedIndexConfig{
Stopwords: &models.StopwordConfig{
Additions: []string{"some", "words", "are", "different"},
Removals: []string{"and", "some", "the", "same"},
},
}
err := ValidateConfig(in)
assert.EqualError(t, err,
"found 'some' in both stopwords.additions and stopwords.removals")
})
t.Run("with additions that exist in preset", func(t *testing.T) {
tests := []struct {
additions []string
expectedLength int
}{
{
additions: []string{"superfluous", "extravagant", "a"},
expectedLength: 2,
},
{
additions: []string{"a", "are", "the"},
expectedLength: 0,
},
{
additions: []string{"everyone", "sleeps", "eventually"},
expectedLength: 3,
},
}
for _, test := range tests {
in := &models.InvertedIndexConfig{
Stopwords: &models.StopwordConfig{
Preset: "en",
Additions: test.additions,
},
}
err := ValidateConfig(in)
assert.Nil(t, err)
assert.Equal(t, test.expectedLength, len(in.Stopwords.Additions))
}
})
}
func TestConfigFromModel(t *testing.T) {
t.Run("with all fields set", func(t *testing.T) {
k1 := 1.12
b := 0.7
in := &models.InvertedIndexConfig{
Bm25: &models.BM25Config{
K1: float32(k1),
B: float32(b),
},
Stopwords: &models.StopwordConfig{
Preset: "en",
},
}
expected := schema.InvertedIndexConfig{
BM25: schema.BM25Config{
K1: k1,
B: b,
},
Stopwords: models.StopwordConfig{
Preset: "en",
},
}
conf := ConfigFromModel(in)
assert.True(t, almostEqual(t, conf.BM25.K1, expected.BM25.K1))
assert.True(t, almostEqual(t, conf.BM25.B, expected.BM25.B))
assert.Equal(t, expected.Stopwords, conf.Stopwords)
})
t.Run("with no BM25 params set", func(t *testing.T) {
interval := int64(1)
in := &models.InvertedIndexConfig{
CleanupIntervalSeconds: interval,
}
expected := schema.InvertedIndexConfig{
BM25: schema.BM25Config{
K1: float64(config.DefaultBM25k1),
B: float64(config.DefaultBM25b),
},
}
conf := ConfigFromModel(in)
assert.True(t, almostEqual(t, conf.BM25.K1, expected.BM25.K1))
assert.True(t, almostEqual(t, conf.BM25.B, expected.BM25.B))
})
t.Run("with no Stopword config set", func(t *testing.T) {
interval := int64(1)
in := &models.InvertedIndexConfig{
CleanupIntervalSeconds: interval,
}
expected := schema.InvertedIndexConfig{
Stopwords: models.StopwordConfig{
Preset: "en",
},
}
conf := ConfigFromModel(in)
assert.Equal(t, expected.Stopwords, conf.Stopwords)
})
}