KevinStephenson
Adding in weaviate code
b110593
raw
history blame
1.77 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package stopwords
import (
"sync"
"github.com/weaviate/weaviate/entities/models"
"github.com/pkg/errors"
)
type StopwordDetector interface {
IsStopword(string) bool
}
type Detector struct {
sync.Mutex
stopwords map[string]struct{}
}
func NewDetectorFromConfig(config models.StopwordConfig) (*Detector, error) {
d, err := NewDetectorFromPreset(config.Preset)
if err != nil {
return nil, errors.Wrap(err, "failed to create new detector from config")
}
d.SetAdditions(config.Additions)
d.SetRemovals(config.Removals)
return d, nil
}
func NewDetectorFromPreset(preset string) (*Detector, error) {
var list []string
var ok bool
if preset != "" {
list, ok = Presets[preset]
if !ok {
return nil, errors.Errorf("preset %q not known to stopword detector", preset)
}
}
d := &Detector{
stopwords: map[string]struct{}{},
}
for _, word := range list {
d.stopwords[word] = struct{}{}
}
return d, nil
}
func (d *Detector) SetAdditions(additions []string) {
d.Lock()
defer d.Unlock()
for _, add := range additions {
d.stopwords[add] = struct{}{}
}
}
func (d *Detector) SetRemovals(removals []string) {
d.Lock()
defer d.Unlock()
for _, rem := range removals {
delete(d.stopwords, rem)
}
}
func (d *Detector) IsStopword(word string) bool {
d.Lock()
defer d.Unlock()
_, ok := d.stopwords[word]
return ok
}