Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,6 @@ from transformers import AutoTokenizer
|
|
| 14 |
from transformers import AutoModelForCausalLM
|
| 15 |
from transformers import TextIteratorStreamer
|
| 16 |
from threading import Thread
|
| 17 |
-
from langchain import Dimension
|
| 18 |
|
| 19 |
|
| 20 |
|
|
@@ -43,9 +42,10 @@ embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
| 43 |
data = dataset["train"]
|
| 44 |
|
| 45 |
print(data)
|
| 46 |
-
d = 384
|
| 47 |
-
|
| 48 |
-
|
|
|
|
| 49 |
# adds an index column that for the embeddings
|
| 50 |
|
| 51 |
|
|
|
|
| 14 |
from transformers import AutoModelForCausalLM
|
| 15 |
from transformers import TextIteratorStreamer
|
| 16 |
from threading import Thread
|
|
|
|
| 17 |
|
| 18 |
|
| 19 |
|
|
|
|
| 42 |
data = dataset["train"]
|
| 43 |
|
| 44 |
print(data)
|
| 45 |
+
d = 384 # vectors dimension
|
| 46 |
+
m = 32 # hnsw parameter. Higher is more accurate but takes more time to index (default is 32, 128 should be ok)
|
| 47 |
+
index = faiss.IndexHNSWFlat(d, m, faiss.METRIC_INNER_PRODUCT)
|
| 48 |
+
data = data.add_faiss_index("embeddings", custom_index=index)
|
| 49 |
# adds an index column that for the embeddings
|
| 50 |
|
| 51 |
|