Spaces:

ivyblossom
/

question-answering

Sleeping

App Files Files Community

ivyblossom commited on Aug 3, 2023

Commit

e1c69a3

1 Parent(s): d9b70c9

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -14

app.py CHANGED Viewed

@@ -13,30 +13,32 @@ def truncate_to_word_boundary(text, max_words=100):
 # Function to perform question-answering
 def question_answering(question, pdf_path):
     pdf_reader = PdfReader(pdf_path)
-    pdf_text = ""
-    for pdf_page in pdf_reader.pages:
-        pdf_text += pdf_page.extract_text()
     # Perform question-answering using Hugging Face's Transformers
     question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
     answer = question_answerer(question=question, context=pdf_text)
-    return answer, pdf_text
 def main():
-    st.title("Question Answering on an Uploaded File")
-    uploaded_file = st.file_uploader("Upload a file:", type=["pdf", "txt", "docx", "csv", "json"])
     question = st.text_input("Ask your question:")
     if st.button("Answer") and uploaded_file is not None:
-        file_extension = os.path.splitext(uploaded_file.name)[1].lower()
         pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
         with open(pdf_path, "wb") as f:
             f.write(uploaded_file.read())
-        answer, pdf_text = question_answering(question, pdf_path)
         # Delete the uploaded file after processing
         os.remove(pdf_path)
@@ -44,13 +46,13 @@ def main():
         st.write(f"Question: '{question}'")
         st.write("Answer:", answer['answer'])
         st.write("Score:", answer['score'])
-        st.write("Page Number:", answer['start'] + 1)  # Add 1 to convert 0-based index to 1-based page number
-        # Display truncated context
-        start_page = answer['start']
-        context = pdf_text
-        truncated_context = truncate_to_word_boundary(context)
-        st.write("Context:", truncated_context)
 if __name__ == "__main__":
     main()

 # Function to perform question-answering
 def question_answering(question, pdf_path):
     pdf_reader = PdfReader(pdf_path)
+    pdf_text_with_pages = []
+    for page_num, pdf_page in enumerate(pdf_reader.pages, start=1):
+        pdf_text = pdf_page.extract_text()
+        pdf_text_with_pages.append((page_num, pdf_text))
+    pdf_text = "\n".join([text for _, text in pdf_text_with_pages])
     # Perform question-answering using Hugging Face's Transformers
     question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
     answer = question_answerer(question=question, context=pdf_text)
+    return answer, pdf_text_with_pages
 def main():
+    st.title("Question Answering on a PDF File")
+    uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
     question = st.text_input("Ask your question:")
     if st.button("Answer") and uploaded_file is not None:
         pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
         with open(pdf_path, "wb") as f:
             f.write(uploaded_file.read())
+        answer, pdf_text_with_pages = question_answering(question, pdf_path)
         # Delete the uploaded file after processing
         os.remove(pdf_path)
         st.write(f"Question: '{question}'")
         st.write("Answer:", answer['answer'])
         st.write("Score:", answer['score'])
+        # Display context where the answer came from
+        context_page_num = answer['start']
+        context_text = pdf_text_with_pages[context_page_num - 1][1]  # Adjust page number to 0-based index
+        st.write("Context:")
+        st.write(f"Page Number: {context_page_num}")
+        st.write(context_text)
 if __name__ == "__main__":
     main()