gaur3009 commited on
Commit
05edc93
·
verified ·
1 Parent(s): b8699f4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -2
app.py CHANGED
@@ -31,9 +31,35 @@ class PDFAnalyzer:
31
  return [text[i:i+500] for i in range(0, len(text), 500)]
32
 
33
  def query(self, question):
34
- if not self.active_doc: return "Upload PDF first"
 
 
35
  ques_emb = self.model.encode(question)
36
- return self.text_chunks[np.argmax(cosine_similarity([ques_emb], self.embeddings)[0])]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
  def create_app():
39
  analyzer = PDFAnalyzer()
 
31
  return [text[i:i+500] for i in range(0, len(text), 500)]
32
 
33
  def query(self, question):
34
+ if not self.active_doc:
35
+ return "Please upload a PDF document first"
36
+
37
  ques_emb = self.model.encode(question)
38
+ best_idx = np.argmax(cosine_similarity([ques_emb], self.embeddings)[0])
39
+ full_answer = self.text_chunks[best_idx]
40
+
41
+ # Extract 100-word precise answer with context
42
+ words = full_answer.split()
43
+ question_words = set(question.lower().split())
44
+
45
+ # Find the most relevant sentence
46
+ sentences = re.split(r'(?<=[.!?]) +', full_answer)
47
+ best_sentence = max(sentences,
48
+ key=lambda s: len(set(s.lower().split()) & question_words),
49
+ default="")
50
+
51
+ # Get 50 words before and after the best sentence
52
+ all_words = ' '.join(sentences).split()
53
+ try:
54
+ start = max(0, all_words.index(best_sentence.split()[0]) - 50)
55
+ end = start + 100
56
+ except:
57
+ start = 0
58
+ end = 100
59
+
60
+ precise_answer = ' '.join(all_words[start:end]) + ("..." if end < len(all_words) else "")
61
+
62
+ return precise_answer
63
 
64
  def create_app():
65
  analyzer = PDFAnalyzer()