KevinStephenson
Adding in weaviate code
b110593
raw
history blame
3.75 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package stopwords
import (
"testing"
"github.com/stretchr/testify/require"
"github.com/weaviate/weaviate/entities/models"
)
func TestStopwordDetector(t *testing.T) {
type testcase struct {
cfg models.StopwordConfig
input []string
expectedCountable int
}
runTest := func(t *testing.T, tests []testcase) {
for _, test := range tests {
sd, err := NewDetectorFromConfig(test.cfg)
require.Nil(t, err)
var result []string
for _, word := range test.input {
if !sd.IsStopword(word) {
result = append(result, word)
}
}
require.Equal(t, test.expectedCountable, len(result))
}
}
t.Run("with en preset, additions", func(t *testing.T) {
tests := []testcase{
{
cfg: models.StopwordConfig{
Preset: "en",
Additions: []string{"dog"},
},
input: []string{"dog", "dog", "dog", "dog"},
expectedCountable: 0,
},
{
cfg: models.StopwordConfig{
Preset: "en",
Additions: []string{"dog"},
},
input: []string{"dog", "dog", "dog", "cat"},
expectedCountable: 1,
},
{
cfg: models.StopwordConfig{
Preset: "en",
Additions: []string{"dog"},
},
input: []string{"a", "dog", "is", "the", "best"},
expectedCountable: 1,
},
}
runTest(t, tests)
})
t.Run("with no preset, additions", func(t *testing.T) {
tests := []testcase{
{
cfg: models.StopwordConfig{
Preset: "none",
Additions: []string{"dog"},
},
input: []string{"a", "dog", "is", "the", "best"},
expectedCountable: 4,
},
}
runTest(t, tests)
})
t.Run("with en preset, removals", func(t *testing.T) {
tests := []testcase{
{
cfg: models.StopwordConfig{
Preset: "en",
Removals: []string{"a"},
},
input: []string{"a", "dog", "is", "the", "best"},
expectedCountable: 3,
},
{
cfg: models.StopwordConfig{
Preset: "en",
Removals: []string{"a", "is", "the"},
},
input: []string{"a", "dog", "is", "the", "best"},
expectedCountable: 5,
},
}
runTest(t, tests)
})
t.Run("with en preset, removals", func(t *testing.T) {
tests := []testcase{
{
cfg: models.StopwordConfig{
Preset: "en",
Removals: []string{"a"},
},
input: []string{"a", "dog", "is", "the", "best"},
expectedCountable: 3,
},
{
cfg: models.StopwordConfig{
Preset: "en",
Removals: []string{"a", "is", "the"},
},
input: []string{"a", "dog", "is", "the", "best"},
expectedCountable: 5,
},
}
runTest(t, tests)
})
t.Run("with en preset, additions, removals", func(t *testing.T) {
tests := []testcase{
{
cfg: models.StopwordConfig{
Preset: "en",
Additions: []string{"dog"},
Removals: []string{"a"},
},
input: []string{"a", "dog", "is", "the", "best"},
expectedCountable: 2,
},
{
cfg: models.StopwordConfig{
Preset: "en",
Additions: []string{"dog", "best"},
Removals: []string{"a", "the", "is"},
},
input: []string{"a", "dog", "is", "the", "best"},
expectedCountable: 3,
},
}
runTest(t, tests)
})
}