File size: 2,851 Bytes
b110593
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
//                           _       _
// __      _____  __ ___   ___  __ _| |_ ___
// \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
//  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
//   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
//
//  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
//
//  CONTACT: [email protected]
//

package hnsw

import (
	"context"
	"errors"
	"fmt"

	"github.com/weaviate/weaviate/adapters/repos/db/vector/compressionhelpers"

	"github.com/weaviate/weaviate/entities/storobj"
	ent "github.com/weaviate/weaviate/entities/vectorindex/hnsw"
)

func (h *hnsw) calculateOptimalSegments(dims int) int {
	if dims >= 2048 && dims%8 == 0 {
		return dims / 8
	} else if dims >= 768 && dims%6 == 0 {
		return dims / 6
	} else if dims >= 256 && dims%4 == 0 {
		return dims / 4
	} else if dims%2 == 0 {
		return dims / 2
	}
	return dims
}

func (h *hnsw) compress(cfg ent.UserConfig) error {
	if !cfg.PQ.Enabled && !cfg.BQ.Enabled {
		return nil
	}

	h.compressActionLock.Lock()
	defer h.compressActionLock.Unlock()
	data := h.cache.All()
	if cfg.PQ.Enabled {
		if h.isEmpty() {
			return errors.New("Compress command cannot be executed before inserting some data. Please, insert your data first.")
		}
		dims := int(h.dims)

		if cfg.PQ.Segments <= 0 {
			cfg.PQ.Segments = h.calculateOptimalSegments(dims)
			h.pqConfig.Segments = cfg.PQ.Segments
		}

		cleanData := make([][]float32, 0, len(data))
		for i := range data {
			// Rather than just taking the cache dump at face value, let's explicitly
			// request the vectors. Otherwise we would miss any vector that's currently
			// not in the cache, for example because the cache is not hot yet after a
			// restart.
			p, err := h.cache.Get(context.Background(), uint64(i))
			if err != nil {
				var e storobj.ErrNotFound
				if errors.As(err, &e) {
					// already deleted, ignore
					continue
				} else {
					return fmt.Errorf("unexpected error obtaining vectors for fitting: %w", err)
				}
			}

			if p == nil {
				// already deleted, ignore
				continue
			}

			cleanData = append(cleanData, p)
		}

		var err error
		h.compressor, err = compressionhelpers.NewPQCompressor(cfg.PQ, h.distancerProvider, dims, 1e12, h.logger, cleanData, h.store)
		if err != nil {
			return fmt.Errorf("Compressing vectors: %w", err)
		}
		h.commitLog.AddPQ(h.compressor.ExposeFields())
	} else {
		var err error
		h.compressor, err = compressionhelpers.NewBQCompressor(h.distancerProvider, 1e12, h.logger, h.store)
		if err != nil {
			return err
		}
	}
	compressionhelpers.Concurrently(uint64(len(data)),
		func(index uint64) {
			if data[index] == nil {
				return
			}
			h.compressor.Preload(index, data[index])
		})

	h.compressed.Store(true)
	h.cache.Drop()
	return nil
}