Spaces:
Running
Running
Commit
Β·
60cacbb
1
Parent(s):
b3e42d1
duplicate issue
Browse files
app.py
CHANGED
|
@@ -7,6 +7,8 @@ import torch
|
|
| 7 |
from functools import lru_cache
|
| 8 |
import logging
|
| 9 |
from datetime import datetime
|
|
|
|
|
|
|
| 10 |
|
| 11 |
# π§ Configure logging
|
| 12 |
logging.basicConfig(level=logging.INFO)
|
|
@@ -70,12 +72,23 @@ def encode_splade_cached(text: str) -> SparseVector:
|
|
| 70 |
vocab_indices = nonzero[:, 1]
|
| 71 |
values = relu_log[nonzero[:, 0], nonzero[:, 1]]
|
| 72 |
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
return SparseVector(
|
| 75 |
-
indices=
|
| 76 |
-
values=
|
| 77 |
)
|
| 78 |
|
|
|
|
| 79 |
# π Main endpoint
|
| 80 |
@app.post("/get-embedding/")
|
| 81 |
async def get_embedding(input: TextInput):
|
|
|
|
| 7 |
from functools import lru_cache
|
| 8 |
import logging
|
| 9 |
from datetime import datetime
|
| 10 |
+
from collections import defaultdict
|
| 11 |
+
|
| 12 |
|
| 13 |
# π§ Configure logging
|
| 14 |
logging.basicConfig(level=logging.INFO)
|
|
|
|
| 72 |
vocab_indices = nonzero[:, 1]
|
| 73 |
values = relu_log[nonzero[:, 0], nonzero[:, 1]]
|
| 74 |
|
| 75 |
+
vocab_indices_list = vocab_indices.cpu().numpy().tolist()
|
| 76 |
+
values_list = values.cpu().numpy().tolist()
|
| 77 |
+
|
| 78 |
+
index_to_value = defaultdict(float)
|
| 79 |
+
for idx, val in zip(vocab_indices_list, values_list):
|
| 80 |
+
index_to_value[idx] += val
|
| 81 |
+
|
| 82 |
+
deduped_indices = list(index_to_value.keys())
|
| 83 |
+
deduped_values = list(index_to_value.values())
|
| 84 |
+
|
| 85 |
+
logger.info(f"SPLADE encoding complete with {len(deduped_indices)} dimensions")
|
| 86 |
return SparseVector(
|
| 87 |
+
indices=deduped_indices,
|
| 88 |
+
values=deduped_values
|
| 89 |
)
|
| 90 |
|
| 91 |
+
|
| 92 |
# π Main endpoint
|
| 93 |
@app.post("/get-embedding/")
|
| 94 |
async def get_embedding(input: TextInput):
|