IotaCluster commited on
Commit
57f1fb2
·
verified ·
1 Parent(s): bea665a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -20,15 +20,13 @@ def embed_sparse(text: str):
20
  if not text.strip():
21
  return {"error": "Input text is empty."}
22
  tokens = text.split()
23
- # Treat the input as a single document and also as the query
24
  bm25 = BM25Okapi([tokens])
25
  unique_terms = sorted(set(tokens))
26
- # BM25 expects a query, so we use the unique terms as the query
27
  scores = bm25.get_scores(unique_terms)
 
28
  term_weights = {term: float(score) for term, score in zip(unique_terms, scores)}
29
- # Build Qdrant format
30
  indices = list(range(len(unique_terms)))
31
- values = [term_weights[term] for term in unique_terms]
32
  return {"indices": indices, "values": values, "terms": unique_terms}
33
 
34
  # 3. Late-interaction embedding model (ColBERT)
 
20
  if not text.strip():
21
  return {"error": "Input text is empty."}
22
  tokens = text.split()
 
23
  bm25 = BM25Okapi([tokens])
24
  unique_terms = sorted(set(tokens))
 
25
  scores = bm25.get_scores(unique_terms)
26
+ # Assign scores for all unique terms
27
  term_weights = {term: float(score) for term, score in zip(unique_terms, scores)}
 
28
  indices = list(range(len(unique_terms)))
29
+ values = [term_weights.get(term, 0.0) for term in unique_terms]
30
  return {"indices": indices, "values": values, "terms": unique_terms}
31
 
32
  # 3. Late-interaction embedding model (ColBERT)