Update app.py
Browse files
app.py
CHANGED
@@ -40,36 +40,36 @@ class PDFAnalyzer:
|
|
40 |
return [text[i:i+500] for i in range(0, len(text), 500)]
|
41 |
|
42 |
def query(self, question):
|
43 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
44 |
return json.dumps({
|
45 |
-
"status":
|
46 |
-
"message": "
|
47 |
-
"results": [
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
full_answer = self.text_chunks[best_idx]
|
59 |
-
|
60 |
-
return json.dumps({
|
61 |
-
"status": 200,
|
62 |
-
"message": "Success",
|
63 |
-
"results": [{
|
64 |
-
"text": self._format_answer(full_answer, question),
|
65 |
-
"confidence": confidence,
|
66 |
-
"document_id": str(hash(self.active_doc)), # Convert to string
|
67 |
-
"metadata": {
|
68 |
-
"chunk_index": best_idx,
|
69 |
-
"document": self.active_doc
|
70 |
-
}
|
71 |
-
}]
|
72 |
-
}, default=lambda x: str(x)) # Fallback string conversion
|
73 |
|
74 |
def _format_answer(self, text, question):
|
75 |
# Extract focused answer with 100-word context
|
|
|
40 |
return [text[i:i+500] for i in range(0, len(text), 500)]
|
41 |
|
42 |
def query(self, question):
|
43 |
+
if not self.active_doc:
|
44 |
+
return json.dumps({
|
45 |
+
"status": 400,
|
46 |
+
"message": "No document uploaded",
|
47 |
+
"results": []
|
48 |
+
})
|
49 |
+
|
50 |
+
ques_emb = self.model.encode(question)
|
51 |
+
similarities = cosine_similarity([ques_emb], self.embeddings)[0]
|
52 |
+
best_idx = np.argmax(similarities)
|
53 |
+
|
54 |
+
# Convert NumPy types to native Python types
|
55 |
+
confidence = float(similarities[best_idx].item()) # Convert to native float
|
56 |
+
best_idx = int(best_idx.item()) # Convert to native int
|
57 |
+
|
58 |
+
full_answer = self.text_chunks[best_idx]
|
59 |
+
|
60 |
return json.dumps({
|
61 |
+
"status": 200,
|
62 |
+
"message": "Success",
|
63 |
+
"results": [{
|
64 |
+
"text": self._format_answer(full_answer, question),
|
65 |
+
"confidence": confidence,
|
66 |
+
"document_id": str(hash(self.active_doc)), # Convert to string
|
67 |
+
"metadata": {
|
68 |
+
"chunk_index": best_idx,
|
69 |
+
"document": self.active_doc
|
70 |
+
}
|
71 |
+
}]
|
72 |
+
}, default=lambda x: str(x)) # Fallback string conversion
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
def _format_answer(self, text, question):
|
75 |
# Extract focused answer with 100-word context
|