Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -31,7 +31,7 @@ def download_pdf(url):
|
|
31 |
def custom_chunking(text, delimiter="\n\n"):
|
32 |
"""Splits text based on a specified delimiter."""
|
33 |
return text.split(delimiter)
|
34 |
-
|
35 |
def extract_text_from_pdf(pdf_bytes, document_id):
|
36 |
"""Extracts text from a PDF, page by page, and then chunks each page."""
|
37 |
pdf_file = io.BytesIO(pdf_bytes)
|
@@ -147,7 +147,7 @@ def calculate_confidence(query, context, answer):
|
|
147 |
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
|
148 |
context_embedding = embedding_model.encode([context], convert_to_numpy=True)
|
149 |
answer_embedding = embedding_model.encode([answer], convert_to_numpy=True)
|
150 |
-
|
151 |
query_context_similarity = np.dot(query_embedding, context_embedding.T).item()
|
152 |
context_answer_similarity = np.dot(context_embedding, answer_embedding.T).item()
|
153 |
confidence = (query_context_similarity + context_answer_similarity) / 2.0 # Equal weights
|
@@ -161,7 +161,7 @@ def generate_response(query, context):
|
|
161 |
- JUST PROVIDE ONLY THE ANSWER.
|
162 |
- Provide a elaborate, factual answer based strictly on the Context.
|
163 |
- Avoid generating Python code, solutions, or any irrelevant information.
|
164 |
-
Context: {context}
|
165 |
Question: {query}
|
166 |
Answer:"""
|
167 |
response = generator(prompt, max_new_tokens=500, num_return_sequences=1)[0]['generated_text']
|
@@ -256,4 +256,4 @@ with gr.Blocks() as demo:
|
|
256 |
|
257 |
iface.render()
|
258 |
|
259 |
-
demo.launch()
|
|
|
31 |
def custom_chunking(text, delimiter="\n\n"):
|
32 |
"""Splits text based on a specified delimiter."""
|
33 |
return text.split(delimiter)
|
34 |
+
|
35 |
def extract_text_from_pdf(pdf_bytes, document_id):
|
36 |
"""Extracts text from a PDF, page by page, and then chunks each page."""
|
37 |
pdf_file = io.BytesIO(pdf_bytes)
|
|
|
147 |
query_embedding = embedding_model.encode([query], convert_to_numpy=True)
|
148 |
context_embedding = embedding_model.encode([context], convert_to_numpy=True)
|
149 |
answer_embedding = embedding_model.encode([answer], convert_to_numpy=True)
|
150 |
+
|
151 |
query_context_similarity = np.dot(query_embedding, context_embedding.T).item()
|
152 |
context_answer_similarity = np.dot(context_embedding, answer_embedding.T).item()
|
153 |
confidence = (query_context_similarity + context_answer_similarity) / 2.0 # Equal weights
|
|
|
161 |
- JUST PROVIDE ONLY THE ANSWER.
|
162 |
- Provide a elaborate, factual answer based strictly on the Context.
|
163 |
- Avoid generating Python code, solutions, or any irrelevant information.
|
164 |
+
Context: {context}
|
165 |
Question: {query}
|
166 |
Answer:"""
|
167 |
response = generator(prompt, max_new_tokens=500, num_return_sequences=1)[0]['generated_text']
|
|
|
256 |
|
257 |
iface.render()
|
258 |
|
259 |
+
demo.launch()
|