Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -20,15 +20,13 @@ def embed_sparse(text: str):
|
|
20 |
if not text.strip():
|
21 |
return {"error": "Input text is empty."}
|
22 |
tokens = text.split()
|
23 |
-
# Treat the input as a single document and also as the query
|
24 |
bm25 = BM25Okapi([tokens])
|
25 |
unique_terms = sorted(set(tokens))
|
26 |
-
# BM25 expects a query, so we use the unique terms as the query
|
27 |
scores = bm25.get_scores(unique_terms)
|
|
|
28 |
term_weights = {term: float(score) for term, score in zip(unique_terms, scores)}
|
29 |
-
# Build Qdrant format
|
30 |
indices = list(range(len(unique_terms)))
|
31 |
-
values = [term_weights
|
32 |
return {"indices": indices, "values": values, "terms": unique_terms}
|
33 |
|
34 |
# 3. Late-interaction embedding model (ColBERT)
|
|
|
20 |
if not text.strip():
|
21 |
return {"error": "Input text is empty."}
|
22 |
tokens = text.split()
|
|
|
23 |
bm25 = BM25Okapi([tokens])
|
24 |
unique_terms = sorted(set(tokens))
|
|
|
25 |
scores = bm25.get_scores(unique_terms)
|
26 |
+
# Assign scores for all unique terms
|
27 |
term_weights = {term: float(score) for term, score in zip(unique_terms, scores)}
|
|
|
28 |
indices = list(range(len(unique_terms)))
|
29 |
+
values = [term_weights.get(term, 0.0) for term in unique_terms]
|
30 |
return {"indices": indices, "values": values, "terms": unique_terms}
|
31 |
|
32 |
# 3. Late-interaction embedding model (ColBERT)
|