Spaces:

MVPilgrim
/

SemanticSearchPOC

Running

SemanticSearchPOC / usecases /traverser /hybrid /hybrid_fusion.go

KevinStephenson

Adding in weaviate code

b110593 over 1 year ago

5.28 kB

	// _ _
	// __ _____ __ ___ ___ __ _\| \|_ ___
	// \ \ /\ / / _ \/ _` \ \ / / \|/ _` \| __/ _ \
	// \ V V / __/ (_\| \|\ V /\| \| (_\| \| \|\| __/
	// \_/\_/ \___\|\__,_\| \_/ \|_\|\__,_\|\__\___\|
	//
	// Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
	//
	// CONTACT: [email protected]
	//

	package hybrid

	import (
	"fmt"
	"sort"

	"github.com/go-openapi/strfmt"
	)

	func FusionRanked(weights []float64, resultSets [][]Result, setNames []string) []Result {
	combinedResults := map[strfmt.UUID]*Result{}
	for resultSetIndex, resultSet := range resultSets {
	for i, res := range resultSet {
	tempResult := res
	docId := tempResult.ID
	score := weights[resultSetIndex] / float64(i+60+1) // TODO replace 60 with a class configured variable

	if tempResult.AdditionalProperties == nil {
	tempResult.AdditionalProperties = map[string]interface{}{}
	}

	// Get previous results from the map, if any
	previousResult, ok := combinedResults[docId]
	if ok {
	tempResult.AdditionalProperties["explainScore"] = fmt.Sprintf(
	"%v\n(Result Set %v) Document %v contributed %v to the score",
	previousResult.AdditionalProperties["explainScore"], setNames[resultSetIndex], tempResult.ID, score)
	score += float64(previousResult.Score)
	} else {
	tempResult.AdditionalProperties["explainScore"] = fmt.Sprintf(
	"%v\n(Result Set %v) Document %v contributed %v to the score",
	tempResult.ExplainScore, setNames[resultSetIndex], tempResult.ID, score)
	}
	tempResult.AdditionalProperties["rank_score"] = score
	tempResult.AdditionalProperties["score"] = score

	tempResult.Score = float32(score)
	combinedResults[docId] = tempResult
	}
	}

	// Sort the results
	var (
	concat = make([]*Result, len(combinedResults))
	i = 0
	)
	for _, res := range combinedResults {
	res.ExplainScore = res.AdditionalProperties["explainScore"].(string)
	concat[i] = res
	i++
	}

	sort.Slice(concat, func(i, j int) bool {
	if concat[j].Score == concat[i].Score {
	return concat[i].SecondarySortValue > concat[j].SecondarySortValue
	}
	return float64(concat[i].Score) > float64(concat[j].Score)
	})
	return concat
	}

	// FusionRelativeScore uses the relative differences in the scores from keyword and vector search to combine the
	// results. This method retains more information than ranked fusion and should result in better results.
	//
	// The scores from each result are normalized between 0 and 1, e.g. the maximum score becomes 1 and the minimum 0 and the
	// other scores are in between, keeping their relative distance to the other scores.
	// Example:
	//
	// Input score = [1, 8, 6, 11] => [0, 0.7, 0.5, 1]
	//
	// The normalized scores are then combined using their respective weight and the combined scores are sorted
	func FusionRelativeScore(weights []float64, resultSets [][]Result, names []string) []Result {
	if len(resultSets[0]) == 0 && (len(resultSets) == 1 \|\| len(resultSets[1]) == 0) {
	return []*Result{}
	}

	var maximum []float32
	var minimum []float32

	for i := range resultSets {
	if len(resultSets[i]) > 0 {
	maximum = append(maximum, resultSets[i][0].SecondarySortValue)
	minimum = append(minimum, resultSets[i][0].SecondarySortValue)
	} else { // dummy values so the indices match
	maximum = append(maximum, 0)
	minimum = append(minimum, 0)
	}
	for _, res := range resultSets[i] {
	if res.SecondarySortValue > maximum[i] {
	maximum[i] = res.SecondarySortValue
	}

	if res.SecondarySortValue < minimum[i] {
	minimum[i] = res.SecondarySortValue
	}
	}
	}

	// normalize scores between 0 and 1 and sum uo the normalized scores from different sources
	// pre-allocate map, at this stage we do not know how many total, combined results there are, but it is at least the
	// length of the longer input list
	numResults := len(resultSets[0])
	if len(resultSets) > 1 && len(resultSets[1]) > numResults {
	numResults = len(resultSets[1])
	}
	mapResults := make(map[strfmt.UUID]*Result, numResults)
	for i := range resultSets {
	weight := float32(weights[i])
	for _, res := range resultSets[i] {
	// If all scores are identical min and max are the same => just set score to the weight.
	score := weight
	if maximum[i] != minimum[i] {
	score *= (res.SecondarySortValue - minimum[i]) / (maximum[i] - minimum[i])
	}

	previousResult, ok := mapResults[res.ID]
	explainScore := fmt.Sprintf("(Result Set '%v') Document %v: original score %v, normalized score: %v", names[i], res.ID, res.SecondarySortValue, score)
	if ok {
	score += previousResult.Score
	explainScore += " - " + previousResult.ExplainScore
	}
	res.Score = score
	res.ExplainScore = explainScore

	mapResults[res.ID] = res
	}
	}

	concat := make([]*Result, 0, len(mapResults))
	for _, res := range mapResults {
	concat = append(concat, res)
	}

	sort.Slice(concat, func(i, j int) bool {
	a_b := float64(concat[j].Score - concat[i].Score)
	if a_b*a_b < 1e-14 {
	return concat[i].SecondarySortValue > concat[j].SecondarySortValue
	}
	return float64(concat[i].Score) > float64(concat[j].Score)
	})
	return concat
	}