KevinStephenson
Adding in weaviate code
b110593
raw
history blame
811 Bytes
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package classification
// TODO: This code is duplicated across weaviate and contextionary which makes
// changes risky. Can we find a single source of truth for this logic
import (
"strings"
"unicode"
)
func newSplitter() *splitter {
return &splitter{}
}
type splitter struct{}
func (s *splitter) Split(corpus string) []string {
return strings.FieldsFunc(corpus, func(c rune) bool {
return !unicode.IsLetter(c) && !unicode.IsNumber(c)
})
}