Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -20,7 +20,6 @@ nltk.download('punkt_tab')
|
|
20 |
index = None
|
21 |
chunks = None
|
22 |
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
23 |
-
rerank_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
|
24 |
generator = None
|
25 |
|
26 |
# --- PDF Processing and Embedding ---
|
@@ -123,7 +122,7 @@ def rerank(query, results, keyword_weight=0.3, cross_encoder_weight=0.7):
|
|
123 |
keyword_scores = [score_chunk_keywords(chunk) for chunk in results]
|
124 |
|
125 |
# Cross-encoder scoring
|
126 |
-
rerank_model = CrossEncoder(
|
127 |
query_results = [[query, f"Document: {result['document_id']}, Section: {result['section_header']}, Text: {result['text']}"] for result in results]
|
128 |
cross_encoder_scores = rerank_model.predict(query_results)
|
129 |
|
@@ -145,8 +144,6 @@ def merge_chunks(retrieved_chunks):
|
|
145 |
# --- Confidence Calculation ---
|
146 |
def calculate_confidence(query, context, answer):
|
147 |
"""Calculates confidence score based on question-context and context-answer similarity."""
|
148 |
-
embedding_model = SentenceTransformer(embedding_model)
|
149 |
-
|
150 |
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
|
151 |
context_embedding = embedding_model.encode([context], convert_to_numpy=True)
|
152 |
answer_embedding = embedding_model.encode([answer], convert_to_numpy=True)
|
|
|
20 |
index = None
|
21 |
chunks = None
|
22 |
embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
|
|
23 |
generator = None
|
24 |
|
25 |
# --- PDF Processing and Embedding ---
|
|
|
122 |
keyword_scores = [score_chunk_keywords(chunk) for chunk in results]
|
123 |
|
124 |
# Cross-encoder scoring
|
125 |
+
rerank_model = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-12-v2')
|
126 |
query_results = [[query, f"Document: {result['document_id']}, Section: {result['section_header']}, Text: {result['text']}"] for result in results]
|
127 |
cross_encoder_scores = rerank_model.predict(query_results)
|
128 |
|
|
|
144 |
# --- Confidence Calculation ---
|
145 |
def calculate_confidence(query, context, answer):
|
146 |
"""Calculates confidence score based on question-context and context-answer similarity."""
|
|
|
|
|
147 |
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
|
148 |
context_embedding = embedding_model.encode([context], convert_to_numpy=True)
|
149 |
answer_embedding = embedding_model.encode([answer], convert_to_numpy=True)
|