File size: 5,276 Bytes
b110593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
//                           _       _
// __      _____  __ ___   ___  __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
//  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
//   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
//  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
//  CONTACT: [email protected]
//

package hybrid

import (
	"fmt"
	"sort"

	"github.com/go-openapi/strfmt"
)

func FusionRanked(weights []float64, resultSets [][]*Result, setNames []string) []*Result {
	combinedResults := map[strfmt.UUID]*Result{}
	for resultSetIndex, resultSet := range resultSets {
		for i, res := range resultSet {
			tempResult := res
			docId := tempResult.ID
			score := weights[resultSetIndex] / float64(i+60+1) // TODO replace 60 with a class configured variable

			if tempResult.AdditionalProperties == nil {
				tempResult.AdditionalProperties = map[string]interface{}{}
			}

			// Get previous results from the map, if any
			previousResult, ok := combinedResults[docId]
			if ok {
				tempResult.AdditionalProperties["explainScore"] = fmt.Sprintf(
					"%v\n(Result Set %v) Document %v contributed %v to the score",
					previousResult.AdditionalProperties["explainScore"], setNames[resultSetIndex], tempResult.ID, score)
				score += float64(previousResult.Score)
			} else {
				tempResult.AdditionalProperties["explainScore"] = fmt.Sprintf(
					"%v\n(Result Set %v) Document %v contributed %v to the score",
					tempResult.ExplainScore, setNames[resultSetIndex], tempResult.ID, score)
			}
			tempResult.AdditionalProperties["rank_score"] = score
			tempResult.AdditionalProperties["score"] = score

			tempResult.Score = float32(score)
			combinedResults[docId] = tempResult
		}
	}

	// Sort the results
	var (
		concat = make([]*Result, len(combinedResults))
		i      = 0
	)
	for _, res := range combinedResults {
		res.ExplainScore = res.AdditionalProperties["explainScore"].(string)
		concat[i] = res
		i++
	}

	sort.Slice(concat, func(i, j int) bool {
		if concat[j].Score == concat[i].Score {
			return concat[i].SecondarySortValue > concat[j].SecondarySortValue
		}
		return float64(concat[i].Score) > float64(concat[j].Score)
	})
	return concat
}

// FusionRelativeScore uses the relative differences in the scores from keyword and vector search to combine the
// results. This method retains more information than ranked fusion and should result in better results.
//
// The scores from each result are normalized between 0 and 1, e.g. the maximum score becomes 1 and the minimum 0 and the
// other scores are in between, keeping their relative distance to the other scores.
// Example:
//
//	Input score = [1, 8, 6, 11] => [0, 0.7, 0.5, 1]
//
// The normalized scores are then combined using their respective weight and the combined scores are sorted
func FusionRelativeScore(weights []float64, resultSets [][]*Result, names []string) []*Result {
	if len(resultSets[0]) == 0 && (len(resultSets) == 1 || len(resultSets[1]) == 0) {
		return []*Result{}
	}

	var maximum []float32
	var minimum []float32

	for i := range resultSets {
		if len(resultSets[i]) > 0 {
			maximum = append(maximum, resultSets[i][0].SecondarySortValue)
			minimum = append(minimum, resultSets[i][0].SecondarySortValue)
		} else { // dummy values so the indices match
			maximum = append(maximum, 0)
			minimum = append(minimum, 0)
		}
		for _, res := range resultSets[i] {
			if res.SecondarySortValue > maximum[i] {
				maximum[i] = res.SecondarySortValue
			}

			if res.SecondarySortValue < minimum[i] {
				minimum[i] = res.SecondarySortValue
			}
		}
	}

	// normalize scores between 0 and 1 and sum uo the normalized scores from different sources
	// pre-allocate map, at this stage we do not know how many total, combined results there are, but it is at least the
	// length of the longer input list
	numResults := len(resultSets[0])
	if len(resultSets) > 1 && len(resultSets[1]) > numResults {
		numResults = len(resultSets[1])
	}
	mapResults := make(map[strfmt.UUID]*Result, numResults)
	for i := range resultSets {
		weight := float32(weights[i])
		for _, res := range resultSets[i] {
			// If all scores are identical min and max are the same => just set score to the weight.
			score := weight
			if maximum[i] != minimum[i] {
				score *= (res.SecondarySortValue - minimum[i]) / (maximum[i] - minimum[i])
			}

			previousResult, ok := mapResults[res.ID]
			explainScore := fmt.Sprintf("(Result Set '%v') Document %v: original score %v, normalized score: %v", names[i], res.ID, res.SecondarySortValue, score)
			if ok {
				score += previousResult.Score
				explainScore += " - " + previousResult.ExplainScore
			}
			res.Score = score
			res.ExplainScore = explainScore

			mapResults[res.ID] = res
		}
	}

	concat := make([]*Result, 0, len(mapResults))
	for _, res := range mapResults {
		concat = append(concat, res)
	}

	sort.Slice(concat, func(i, j int) bool {
		a_b := float64(concat[j].Score - concat[i].Score)
		if a_b*a_b < 1e-14 {
			return concat[i].SecondarySortValue > concat[j].SecondarySortValue
		}
		return float64(concat[i].Score) > float64(concat[j].Score)
	})
	return concat
}