Update app.py
Browse files
app.py
CHANGED
@@ -31,9 +31,35 @@ class PDFAnalyzer:
|
|
31 |
return [text[i:i+500] for i in range(0, len(text), 500)]
|
32 |
|
33 |
def query(self, question):
|
34 |
-
if not self.active_doc:
|
|
|
|
|
35 |
ques_emb = self.model.encode(question)
|
36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
|
38 |
def create_app():
|
39 |
analyzer = PDFAnalyzer()
|
|
|
31 |
return [text[i:i+500] for i in range(0, len(text), 500)]
|
32 |
|
33 |
def query(self, question):
|
34 |
+
if not self.active_doc:
|
35 |
+
return "Please upload a PDF document first"
|
36 |
+
|
37 |
ques_emb = self.model.encode(question)
|
38 |
+
best_idx = np.argmax(cosine_similarity([ques_emb], self.embeddings)[0])
|
39 |
+
full_answer = self.text_chunks[best_idx]
|
40 |
+
|
41 |
+
# Extract 100-word precise answer with context
|
42 |
+
words = full_answer.split()
|
43 |
+
question_words = set(question.lower().split())
|
44 |
+
|
45 |
+
# Find the most relevant sentence
|
46 |
+
sentences = re.split(r'(?<=[.!?]) +', full_answer)
|
47 |
+
best_sentence = max(sentences,
|
48 |
+
key=lambda s: len(set(s.lower().split()) & question_words),
|
49 |
+
default="")
|
50 |
+
|
51 |
+
# Get 50 words before and after the best sentence
|
52 |
+
all_words = ' '.join(sentences).split()
|
53 |
+
try:
|
54 |
+
start = max(0, all_words.index(best_sentence.split()[0]) - 50)
|
55 |
+
end = start + 100
|
56 |
+
except:
|
57 |
+
start = 0
|
58 |
+
end = 100
|
59 |
+
|
60 |
+
precise_answer = ' '.join(all_words[start:end]) + ("..." if end < len(all_words) else "")
|
61 |
+
|
62 |
+
return precise_answer
|
63 |
|
64 |
def create_app():
|
65 |
analyzer = PDFAnalyzer()
|