File size: 9,929 Bytes
b110593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
//                           _       _
// __      _____  __ ___   ___  __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
//  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
//   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
//  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
//  CONTACT: [email protected]
//

// Package aggregate provides the local aggregate graphql endpoint for Weaviate
package aggregate

import (
	"context"
	"fmt"
	"strconv"
	"strings"

	"github.com/tailor-inc/graphql"
	"github.com/tailor-inc/graphql/language/ast"
	"github.com/weaviate/weaviate/adapters/handlers/graphql/local/common_filters"
	"github.com/weaviate/weaviate/entities/aggregation"
	enterrors "github.com/weaviate/weaviate/entities/errors"
	"github.com/weaviate/weaviate/entities/filters"
	"github.com/weaviate/weaviate/entities/models"
	"github.com/weaviate/weaviate/entities/schema"
	"github.com/weaviate/weaviate/entities/searchparams"
)

// GroupedByFieldName is a special graphQL field that appears alongside the
// to-be-aggregated props, but doesn't require any processing by the connectors
// itself, as it just displays meta info about the overall aggregation.
const GroupedByFieldName = "groupedBy"

// Resolver is a local interface that can be composed with other interfaces to
// form the overall GraphQL API main interface. All data-base connectors that
// want to support the Meta feature must implement this interface.
type Resolver interface {
	Aggregate(ctx context.Context, principal *models.Principal, info *aggregation.Params) (interface{}, error)
}

// RequestsLog is a local abstraction on the RequestsLog that needs to be
// provided to the graphQL API in order to log Local.Get queries.
type RequestsLog interface {
	Register(requestType string, identifier string)
}

func makeResolveClass(modulesProvider ModulesProvider, class *models.Class) graphql.FieldResolveFn {
	return func(p graphql.ResolveParams) (interface{}, error) {
		res, err := resolveAggregate(p, modulesProvider, class)
		if err != nil {
			return res, enterrors.NewErrGraphQLUser(err, "Aggregate", schema.ClassName(p.Info.FieldName).String())
		}
		return res, nil
	}
}

func resolveAggregate(p graphql.ResolveParams, modulesProvider ModulesProvider, class *models.Class) (interface{}, error) {
	className := schema.ClassName(p.Info.FieldName)
	source, ok := p.Source.(map[string]interface{})
	if !ok {
		return nil, fmt.Errorf("expected source to be a map, but was %t", p.Source)
	}

	resolver, ok := source["Resolver"].(Resolver)
	if !ok {
		return nil, fmt.Errorf("expected source to contain a usable Resolver, but was %t", p.Source)
	}

	// There can only be exactly one ast.Field; it is the class name.
	if len(p.Info.FieldASTs) != 1 {
		panic("Only one Field expected here")
	}

	selections := p.Info.FieldASTs[0].SelectionSet
	properties, includeMeta, err := extractProperties(selections)
	if err != nil {
		return nil, fmt.Errorf("could not extract properties for class '%s': %w", className, err)
	}

	groupBy, err := extractGroupBy(p.Args, p.Info.FieldName)
	if err != nil {
		return nil, fmt.Errorf("could not extract groupBy path: %w", err)
	}

	limit, err := extractLimit(p.Args)
	if err != nil {
		return nil, fmt.Errorf("could not extract limit: %w", err)
	}

	objectLimit, err := extractObjectLimit(p.Args)
	if objectLimit != nil && *objectLimit <= 0 {
		return nil, fmt.Errorf("objectLimit must be a positive integer")
	}
	if err != nil {
		return nil, fmt.Errorf("could not extract objectLimit: %w", err)
	}

	filters, err := common_filters.ExtractFilters(p.Args, p.Info.FieldName)
	if err != nil {
		return nil, fmt.Errorf("could not extract filters: %w", err)
	}

	var nearVectorParams *searchparams.NearVector
	if nearVector, ok := p.Args["nearVector"]; ok {
		p, err := common_filters.ExtractNearVector(nearVector.(map[string]interface{}))
		if err != nil {
			return nil, fmt.Errorf("failed to extract nearVector params: %w", err)
		}
		nearVectorParams = &p
	}

	var nearObjectParams *searchparams.NearObject
	if nearObject, ok := p.Args["nearObject"]; ok {
		p, err := common_filters.ExtractNearObject(nearObject.(map[string]interface{}))
		if err != nil {
			return nil, fmt.Errorf("failed to extract nearObject params: %w", err)
		}
		nearObjectParams = &p
	}

	var moduleParams map[string]interface{}
	if modulesProvider != nil {
		extractedParams := modulesProvider.ExtractSearchParams(p.Args, class.Class)
		if len(extractedParams) > 0 {
			moduleParams = extractedParams
		}
	}

	// Extract hybrid search params from the processed query
	// Everything hybrid can go in another namespace AFTER modulesprovider is
	// refactored
	var hybridParams *searchparams.HybridSearch
	if hybrid, ok := p.Args["hybrid"]; ok {
		p, err := common_filters.ExtractHybridSearch(hybrid.(map[string]interface{}), false)
		if err != nil {
			return nil, fmt.Errorf("failed to extract hybrid params: %w", err)
		}
		hybridParams = p
	}

	var tenant string
	if tk, ok := p.Args["tenant"]; ok {
		tenant = tk.(string)
	}

	params := &aggregation.Params{
		Filters:          filters,
		ClassName:        className,
		Properties:       properties,
		GroupBy:          groupBy,
		IncludeMetaCount: includeMeta,
		Limit:            limit,
		ObjectLimit:      objectLimit,
		NearVector:       nearVectorParams,
		NearObject:       nearObjectParams,
		ModuleParams:     moduleParams,
		Hybrid:           hybridParams,
		Tenant:           tenant,
	}

	// we might support objectLimit without nearMedia filters later, e.g. with sort
	if params.ObjectLimit != nil && !validateObjectLimitUsage(params) {
		return nil, fmt.Errorf("objectLimit can only be used with a near<Media> or hybrid filter")
	}

	res, err := resolver.Aggregate(p.Context, principalFromContext(p.Context), params)
	if err != nil {
		return nil, err
	}

	switch parsed := res.(type) {
	case *aggregation.Result:
		return parsed.Groups, nil
	default:
		return res, nil
	}
}

func extractProperties(selections *ast.SelectionSet) ([]aggregation.ParamProperty, bool, error) {
	properties := []aggregation.ParamProperty{}
	var includeMeta bool

	for _, selection := range selections.Selections {
		field := selection.(*ast.Field)
		name := field.Name.Value
		if name == GroupedByFieldName {
			// in the graphQL API we show the "groupedBy" field alongside various
			// properties, however, we don't have to include it here, as we don't
			// won't to perform aggregations on it.
			// If we didn't exclude it we'd run into errors down the line, because
			// the connector would look for a "groupedBy" prop on the specific class
			// which doesn't exist.

			continue
		}

		if name == "meta" {
			includeMeta = true
			continue
		}

		if name == "__typename" {
			continue
		}

		name = strings.ToLower(string(name[0:1])) + string(name[1:])
		property := aggregation.ParamProperty{Name: schema.PropertyName(name)}
		aggregators, err := extractAggregators(field.SelectionSet)
		if err != nil {
			return nil, false, err
		}

		property.Aggregators = aggregators
		properties = append(properties, property)
	}

	return properties, includeMeta, nil
}

func extractAggregators(selections *ast.SelectionSet) ([]aggregation.Aggregator, error) {
	if selections == nil {
		return nil, nil
	}
	analyses := []aggregation.Aggregator{}
	for _, selection := range selections.Selections {
		field := selection.(*ast.Field)
		name := field.Name.Value
		if name == "__typename" {
			continue
		}
		property, err := aggregation.ParseAggregatorProp(name)
		if err != nil {
			return nil, err
		}

		if property.String() == aggregation.NewTopOccurrencesAggregator(nil).String() {
			// a top occurrence, so we need to check if we have a limit argument
			if overwrite := extractLimitFromArgs(field.Arguments); overwrite != nil {
				property.Limit = overwrite
			}
		}

		analyses = append(analyses, property)
	}

	return analyses, nil
}

func extractGroupBy(args map[string]interface{}, rootClass string) (*filters.Path, error) {
	groupBy, ok := args["groupBy"]
	if !ok {
		// not set means the user is not interested in grouping (former Meta)
		return nil, nil
	}

	pathSegments, ok := groupBy.([]interface{})
	if !ok {
		return nil, fmt.Errorf("no groupBy must be a list, instead got: %#v", groupBy)
	}

	return filters.ParsePath(pathSegments, rootClass)
}

func principalFromContext(ctx context.Context) *models.Principal {
	principal := ctx.Value("principal")
	if principal == nil {
		return nil
	}

	return principal.(*models.Principal)
}

func extractLimit(args map[string]interface{}) (*int, error) {
	limit, ok := args["limit"]
	if !ok {
		// not set means the user is not interested and the UC should use a reasonable default
		return nil, nil
	}

	limitInt, ok := limit.(int)
	if !ok {
		return nil, fmt.Errorf("limit must be an int, instead got: %#v", limit)
	}

	return &limitInt, nil
}

func extractObjectLimit(args map[string]interface{}) (*int, error) {
	objectLimit, ok := args["objectLimit"]
	if !ok {
		return nil, nil
	}

	objectLimitInt, ok := objectLimit.(int)
	if !ok {
		return nil, fmt.Errorf("objectLimit must be an int, instead got: %#v", objectLimit)
	}

	return &objectLimitInt, nil
}

func extractLimitFromArgs(args []*ast.Argument) *int {
	for _, arg := range args {
		if arg.Name.Value != "limit" {
			continue
		}

		v, ok := arg.Value.GetValue().(string)
		if ok {
			asInt, _ := strconv.Atoi(v)
			return &asInt
		}
	}

	return nil
}

func validateObjectLimitUsage(params *aggregation.Params) bool {
	return params.NearObject != nil ||
		params.NearVector != nil ||
		len(params.ModuleParams) > 0 ||
		params.Hybrid != nil
}