KevinStephenson
Adding in weaviate code
b110593
raw
history blame
5.28 kB
// _ _
// __ _____ __ ___ ___ __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
// \ V V / __/ (_| |\ V /| | (_| | || __/
// \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
// CONTACT: [email protected]
//
package hybrid
import (
"fmt"
"sort"
"github.com/go-openapi/strfmt"
)
func FusionRanked(weights []float64, resultSets [][]*Result, setNames []string) []*Result {
combinedResults := map[strfmt.UUID]*Result{}
for resultSetIndex, resultSet := range resultSets {
for i, res := range resultSet {
tempResult := res
docId := tempResult.ID
score := weights[resultSetIndex] / float64(i+60+1) // TODO replace 60 with a class configured variable
if tempResult.AdditionalProperties == nil {
tempResult.AdditionalProperties = map[string]interface{}{}
}
// Get previous results from the map, if any
previousResult, ok := combinedResults[docId]
if ok {
tempResult.AdditionalProperties["explainScore"] = fmt.Sprintf(
"%v\n(Result Set %v) Document %v contributed %v to the score",
previousResult.AdditionalProperties["explainScore"], setNames[resultSetIndex], tempResult.ID, score)
score += float64(previousResult.Score)
} else {
tempResult.AdditionalProperties["explainScore"] = fmt.Sprintf(
"%v\n(Result Set %v) Document %v contributed %v to the score",
tempResult.ExplainScore, setNames[resultSetIndex], tempResult.ID, score)
}
tempResult.AdditionalProperties["rank_score"] = score
tempResult.AdditionalProperties["score"] = score
tempResult.Score = float32(score)
combinedResults[docId] = tempResult
}
}
// Sort the results
var (
concat = make([]*Result, len(combinedResults))
i = 0
)
for _, res := range combinedResults {
res.ExplainScore = res.AdditionalProperties["explainScore"].(string)
concat[i] = res
i++
}
sort.Slice(concat, func(i, j int) bool {
if concat[j].Score == concat[i].Score {
return concat[i].SecondarySortValue > concat[j].SecondarySortValue
}
return float64(concat[i].Score) > float64(concat[j].Score)
})
return concat
}
// FusionRelativeScore uses the relative differences in the scores from keyword and vector search to combine the
// results. This method retains more information than ranked fusion and should result in better results.
//
// The scores from each result are normalized between 0 and 1, e.g. the maximum score becomes 1 and the minimum 0 and the
// other scores are in between, keeping their relative distance to the other scores.
// Example:
//
// Input score = [1, 8, 6, 11] => [0, 0.7, 0.5, 1]
//
// The normalized scores are then combined using their respective weight and the combined scores are sorted
func FusionRelativeScore(weights []float64, resultSets [][]*Result, names []string) []*Result {
if len(resultSets[0]) == 0 && (len(resultSets) == 1 || len(resultSets[1]) == 0) {
return []*Result{}
}
var maximum []float32
var minimum []float32
for i := range resultSets {
if len(resultSets[i]) > 0 {
maximum = append(maximum, resultSets[i][0].SecondarySortValue)
minimum = append(minimum, resultSets[i][0].SecondarySortValue)
} else { // dummy values so the indices match
maximum = append(maximum, 0)
minimum = append(minimum, 0)
}
for _, res := range resultSets[i] {
if res.SecondarySortValue > maximum[i] {
maximum[i] = res.SecondarySortValue
}
if res.SecondarySortValue < minimum[i] {
minimum[i] = res.SecondarySortValue
}
}
}
// normalize scores between 0 and 1 and sum uo the normalized scores from different sources
// pre-allocate map, at this stage we do not know how many total, combined results there are, but it is at least the
// length of the longer input list
numResults := len(resultSets[0])
if len(resultSets) > 1 && len(resultSets[1]) > numResults {
numResults = len(resultSets[1])
}
mapResults := make(map[strfmt.UUID]*Result, numResults)
for i := range resultSets {
weight := float32(weights[i])
for _, res := range resultSets[i] {
// If all scores are identical min and max are the same => just set score to the weight.
score := weight
if maximum[i] != minimum[i] {
score *= (res.SecondarySortValue - minimum[i]) / (maximum[i] - minimum[i])
}
previousResult, ok := mapResults[res.ID]
explainScore := fmt.Sprintf("(Result Set '%v') Document %v: original score %v, normalized score: %v", names[i], res.ID, res.SecondarySortValue, score)
if ok {
score += previousResult.Score
explainScore += " - " + previousResult.ExplainScore
}
res.Score = score
res.ExplainScore = explainScore
mapResults[res.ID] = res
}
}
concat := make([]*Result, 0, len(mapResults))
for _, res := range mapResults {
concat = append(concat, res)
}
sort.Slice(concat, func(i, j int) bool {
a_b := float64(concat[j].Score - concat[i].Score)
if a_b*a_b < 1e-14 {
return concat[i].SecondarySortValue > concat[j].SecondarySortValue
}
return float64(concat[i].Score) > float64(concat[j].Score)
})
return concat
}