ivyblossom commited on
Commit
e1c69a3
·
1 Parent(s): d9b70c9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -14
app.py CHANGED
@@ -13,30 +13,32 @@ def truncate_to_word_boundary(text, max_words=100):
13
  # Function to perform question-answering
14
  def question_answering(question, pdf_path):
15
  pdf_reader = PdfReader(pdf_path)
16
- pdf_text = ""
17
 
18
- for pdf_page in pdf_reader.pages:
19
- pdf_text += pdf_page.extract_text()
 
 
 
20
 
21
  # Perform question-answering using Hugging Face's Transformers
22
  question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
23
  answer = question_answerer(question=question, context=pdf_text)
24
 
25
- return answer, pdf_text
26
 
27
  def main():
28
- st.title("Question Answering on an Uploaded File")
29
 
30
- uploaded_file = st.file_uploader("Upload a file:", type=["pdf", "txt", "docx", "csv", "json"])
31
  question = st.text_input("Ask your question:")
32
 
33
  if st.button("Answer") and uploaded_file is not None:
34
- file_extension = os.path.splitext(uploaded_file.name)[1].lower()
35
  pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
36
  with open(pdf_path, "wb") as f:
37
  f.write(uploaded_file.read())
38
 
39
- answer, pdf_text = question_answering(question, pdf_path)
40
 
41
  # Delete the uploaded file after processing
42
  os.remove(pdf_path)
@@ -44,13 +46,13 @@ def main():
44
  st.write(f"Question: '{question}'")
45
  st.write("Answer:", answer['answer'])
46
  st.write("Score:", answer['score'])
47
- st.write("Page Number:", answer['start'] + 1) # Add 1 to convert 0-based index to 1-based page number
48
 
49
- # Display truncated context
50
- start_page = answer['start']
51
- context = pdf_text
52
- truncated_context = truncate_to_word_boundary(context)
53
- st.write("Context:", truncated_context)
 
54
 
55
  if __name__ == "__main__":
56
  main()
 
13
  # Function to perform question-answering
14
  def question_answering(question, pdf_path):
15
  pdf_reader = PdfReader(pdf_path)
16
+ pdf_text_with_pages = []
17
 
18
+ for page_num, pdf_page in enumerate(pdf_reader.pages, start=1):
19
+ pdf_text = pdf_page.extract_text()
20
+ pdf_text_with_pages.append((page_num, pdf_text))
21
+
22
+ pdf_text = "\n".join([text for _, text in pdf_text_with_pages])
23
 
24
  # Perform question-answering using Hugging Face's Transformers
25
  question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
26
  answer = question_answerer(question=question, context=pdf_text)
27
 
28
+ return answer, pdf_text_with_pages
29
 
30
  def main():
31
+ st.title("Question Answering on a PDF File")
32
 
33
+ uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
34
  question = st.text_input("Ask your question:")
35
 
36
  if st.button("Answer") and uploaded_file is not None:
 
37
  pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
38
  with open(pdf_path, "wb") as f:
39
  f.write(uploaded_file.read())
40
 
41
+ answer, pdf_text_with_pages = question_answering(question, pdf_path)
42
 
43
  # Delete the uploaded file after processing
44
  os.remove(pdf_path)
 
46
  st.write(f"Question: '{question}'")
47
  st.write("Answer:", answer['answer'])
48
  st.write("Score:", answer['score'])
 
49
 
50
+ # Display context where the answer came from
51
+ context_page_num = answer['start']
52
+ context_text = pdf_text_with_pages[context_page_num - 1][1] # Adjust page number to 0-based index
53
+ st.write("Context:")
54
+ st.write(f"Page Number: {context_page_num}")
55
+ st.write(context_text)
56
 
57
  if __name__ == "__main__":
58
  main()