Spaces:
Sleeping
Sleeping
Commit
·
e1c69a3
1
Parent(s):
d9b70c9
Update app.py
Browse files
app.py
CHANGED
@@ -13,30 +13,32 @@ def truncate_to_word_boundary(text, max_words=100):
|
|
13 |
# Function to perform question-answering
|
14 |
def question_answering(question, pdf_path):
|
15 |
pdf_reader = PdfReader(pdf_path)
|
16 |
-
|
17 |
|
18 |
-
for pdf_page in pdf_reader.pages:
|
19 |
-
pdf_text
|
|
|
|
|
|
|
20 |
|
21 |
# Perform question-answering using Hugging Face's Transformers
|
22 |
question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
|
23 |
answer = question_answerer(question=question, context=pdf_text)
|
24 |
|
25 |
-
return answer,
|
26 |
|
27 |
def main():
|
28 |
-
st.title("Question Answering on
|
29 |
|
30 |
-
uploaded_file = st.file_uploader("Upload a file:", type=["pdf"
|
31 |
question = st.text_input("Ask your question:")
|
32 |
|
33 |
if st.button("Answer") and uploaded_file is not None:
|
34 |
-
file_extension = os.path.splitext(uploaded_file.name)[1].lower()
|
35 |
pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
|
36 |
with open(pdf_path, "wb") as f:
|
37 |
f.write(uploaded_file.read())
|
38 |
|
39 |
-
answer,
|
40 |
|
41 |
# Delete the uploaded file after processing
|
42 |
os.remove(pdf_path)
|
@@ -44,13 +46,13 @@ def main():
|
|
44 |
st.write(f"Question: '{question}'")
|
45 |
st.write("Answer:", answer['answer'])
|
46 |
st.write("Score:", answer['score'])
|
47 |
-
st.write("Page Number:", answer['start'] + 1) # Add 1 to convert 0-based index to 1-based page number
|
48 |
|
49 |
-
# Display
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
st.write("
|
|
|
54 |
|
55 |
if __name__ == "__main__":
|
56 |
main()
|
|
|
13 |
# Function to perform question-answering
|
14 |
def question_answering(question, pdf_path):
|
15 |
pdf_reader = PdfReader(pdf_path)
|
16 |
+
pdf_text_with_pages = []
|
17 |
|
18 |
+
for page_num, pdf_page in enumerate(pdf_reader.pages, start=1):
|
19 |
+
pdf_text = pdf_page.extract_text()
|
20 |
+
pdf_text_with_pages.append((page_num, pdf_text))
|
21 |
+
|
22 |
+
pdf_text = "\n".join([text for _, text in pdf_text_with_pages])
|
23 |
|
24 |
# Perform question-answering using Hugging Face's Transformers
|
25 |
question_answerer = pipeline("question-answering", model="distilbert-base-cased-distilled-squad", tokenizer="distilbert-base-cased-distilled-squad")
|
26 |
answer = question_answerer(question=question, context=pdf_text)
|
27 |
|
28 |
+
return answer, pdf_text_with_pages
|
29 |
|
30 |
def main():
|
31 |
+
st.title("Question Answering on a PDF File")
|
32 |
|
33 |
+
uploaded_file = st.file_uploader("Upload a PDF file:", type=["pdf"])
|
34 |
question = st.text_input("Ask your question:")
|
35 |
|
36 |
if st.button("Answer") and uploaded_file is not None:
|
|
|
37 |
pdf_path = os.path.join(os.getcwd(), uploaded_file.name)
|
38 |
with open(pdf_path, "wb") as f:
|
39 |
f.write(uploaded_file.read())
|
40 |
|
41 |
+
answer, pdf_text_with_pages = question_answering(question, pdf_path)
|
42 |
|
43 |
# Delete the uploaded file after processing
|
44 |
os.remove(pdf_path)
|
|
|
46 |
st.write(f"Question: '{question}'")
|
47 |
st.write("Answer:", answer['answer'])
|
48 |
st.write("Score:", answer['score'])
|
|
|
49 |
|
50 |
+
# Display context where the answer came from
|
51 |
+
context_page_num = answer['start']
|
52 |
+
context_text = pdf_text_with_pages[context_page_num - 1][1] # Adjust page number to 0-based index
|
53 |
+
st.write("Context:")
|
54 |
+
st.write(f"Page Number: {context_page_num}")
|
55 |
+
st.write(context_text)
|
56 |
|
57 |
if __name__ == "__main__":
|
58 |
main()
|