IotaCluster commited on
Commit
b974815
·
verified ·
1 Parent(s): 445b813

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -1
app.py CHANGED
@@ -24,7 +24,11 @@ def embed_sparse(text: str):
24
  scores = bm25.get_scores(tokens)
25
  # Map each term to its BM25 weight
26
  term_weights = {tok: float(score) for tok, score in zip(tokens, scores)}
27
- return {"sparse_embedding": term_weights}
 
 
 
 
28
 
29
  # 3. Late-interaction embedding model (ColBERT)
30
  colbert_tokenizer = AutoTokenizer.from_pretrained('colbert-ir/colbertv2.0', use_fast=True)
 
24
  scores = bm25.get_scores(tokens)
25
  # Map each term to its BM25 weight
26
  term_weights = {tok: float(score) for tok, score in zip(tokens, scores)}
27
+ # Build a consistent vocabulary (sorted for deterministic indices)
28
+ terms = sorted(term_weights.keys())
29
+ indices = list(range(len(terms)))
30
+ values = [term_weights[term] for term in terms]
31
+ return {"indices": indices, "values": values, "terms": terms} # 'terms' is optional, for debugging
32
 
33
  # 3. Late-interaction embedding model (ColBERT)
34
  colbert_tokenizer = AutoTokenizer.from_pretrained('colbert-ir/colbertv2.0', use_fast=True)