Spaces:
Running
Running
File size: 4,892 Bytes
b110593 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package vectorizer
import (
"context"
"fmt"
"sort"
"strings"
"github.com/fatih/camelcase"
"github.com/weaviate/weaviate/entities/models"
"github.com/weaviate/weaviate/entities/moduletools"
"github.com/weaviate/weaviate/modules/text2vec-palm/ent"
)
type Vectorizer struct {
client Client
}
func New(client Client) *Vectorizer {
return &Vectorizer{
client: client,
}
}
type Client interface {
Vectorize(ctx context.Context, input []string,
config ent.VectorizationConfig, titlePropertyValue string) (*ent.VectorizationResult, error)
VectorizeQuery(ctx context.Context, input []string,
config ent.VectorizationConfig) (*ent.VectorizationResult, error)
}
// IndexCheck returns whether a property of a class should be indexed
type ClassSettings interface {
PropertyIndexed(property string) bool
VectorizePropertyName(propertyName string) bool
VectorizeClassName() bool
ApiEndpoint() string
ProjectID() string
ModelID() string
TitleProperty() string
}
func sortStringKeys(schema_map map[string]interface{}) []string {
keys := make([]string, 0, len(schema_map))
for k := range schema_map {
keys = append(keys, k)
}
sort.Strings(keys)
return keys
}
func (v *Vectorizer) Object(ctx context.Context, object *models.Object,
objDiff *moduletools.ObjectDiff, settings ClassSettings,
) error {
vec, err := v.object(ctx, object.Class, object.Properties, objDiff, settings)
if err != nil {
return err
}
object.Vector = vec
return nil
}
func appendPropIfText(icheck ClassSettings, list *[]string, propName string,
value interface{},
) bool {
valueString, ok := value.(string)
if ok {
if icheck.VectorizePropertyName(propName) {
// use prop and value
*list = append(*list, strings.ToLower(
fmt.Sprintf("%s %s", camelCaseToLower(propName), valueString)))
} else {
*list = append(*list, strings.ToLower(valueString))
}
return true
}
return false
}
func appendTitlePropIfText(icheck ClassSettings, list *[]string, propName string,
value interface{},
) bool {
if icheck.TitleProperty() == propName {
return appendPropIfText(icheck, list, propName, value)
}
return false
}
func (v *Vectorizer) object(ctx context.Context, className string,
schema interface{}, objDiff *moduletools.ObjectDiff, icheck ClassSettings,
) ([]float32, error) {
vectorize := objDiff == nil || objDiff.GetVec() == nil
var titlePropertyValue []string
var corpi []string
if icheck.VectorizeClassName() {
corpi = append(corpi, camelCaseToLower(className))
}
if schema != nil {
schemamap := schema.(map[string]interface{})
for _, prop := range sortStringKeys(schemamap) {
if !icheck.PropertyIndexed(prop) {
continue
}
appended := false
switch val := schemamap[prop].(type) {
case []string:
for _, elem := range val {
appended = appendPropIfText(icheck, &corpi, prop, elem) || appended
appendTitlePropIfText(icheck, &titlePropertyValue, prop, elem)
}
case []interface{}:
for _, elem := range val {
appended = appendPropIfText(icheck, &corpi, prop, elem) || appended
appendTitlePropIfText(icheck, &titlePropertyValue, prop, elem)
}
default:
appended = appendPropIfText(icheck, &corpi, prop, val)
appendTitlePropIfText(icheck, &titlePropertyValue, prop, val)
}
vectorize = vectorize || (appended && objDiff != nil && objDiff.IsChangedProp(prop))
}
}
if len(corpi) == 0 {
// fall back to using the class name
corpi = append(corpi, camelCaseToLower(className))
}
// no property was changed, old vector can be used
if !vectorize {
return objDiff.GetVec(), nil
}
text := []string{strings.Join(corpi, " ")}
titleProperty := strings.Join(titlePropertyValue, " ")
res, err := v.client.Vectorize(ctx, text, ent.VectorizationConfig{
ApiEndpoint: icheck.ApiEndpoint(),
ProjectID: icheck.ProjectID(),
Model: icheck.ModelID(),
}, titleProperty)
if err != nil {
return nil, err
}
if len(res.Vectors) == 0 {
return nil, fmt.Errorf("no vectors generated")
}
if len(res.Vectors) > 1 {
return v.CombineVectors(res.Vectors), nil
}
return res.Vectors[0], nil
}
func camelCaseToLower(in string) string {
parts := camelcase.Split(in)
var sb strings.Builder
for i, part := range parts {
if part == " " {
continue
}
if i > 0 {
sb.WriteString(" ")
}
sb.WriteString(strings.ToLower(part))
}
return sb.String()
}
|