Spaces:

ignaciaginting
/

answer_question_from_doc

Sleeping

ignaciaginting commited on Apr 11

Commit

fe0246c

verified ·

1 Parent(s): 8bc3f30

Updates

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,24 +1,40 @@
 from transformers import pipeline
-nlp = pipeline(
-    "document-question-answering",
-    model="impira/layoutlm-document-qa",
-)
-nlp(
-    "https://templates.invoicehome.com/invoice-template-us-neat-750px.png",
-    "What is the invoice number?"
-)
-# {'score': 0.9943977, 'answer': 'us-001', 'start': 15, 'end': 15}
-nlp(
-    "https://miro.medium.com/max/787/1*iECQRIiOGTmEFLdWkVIH2g.jpeg",
-    "What is the purchase amount?"
-)
-# {'score': 0.9912159, 'answer': '$1,000,000,000', 'start': 97, 'end': 97}
-nlp(
-    "https://www.accountingcoach.com/wp-content/uploads/2013/10/[email protected]",
-    "What are the 2020 net sales?"
-)
-# {'score': 0.59147286, 'answer': '$ 3,750', 'start': 19, 'end': 20}

+import streamlit as st
 from transformers import pipeline
+from PIL import Image
+import tempfile
+import fitz  # PyMuPDF
+# Load the model
+@st.cache_resource
+def load_model():
+    return pipeline("document-question-answering", model="impira/layoutlm-document-qa")
+qa_pipeline = load_model()
+st.title("📄 Document Question Answering App")
+st.write("Upload a PDF file, enter a question, and get answers from the document.")
+# Upload PDF
+pdf_file = st.file_uploader("Upload PDF", type=["pdf"])
+# Ask a question
+question = st.text_input("Ask a question about the document:")
+if pdf_file and question:
+    # Convert first page of PDF to image using PyMuPDF
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp_file:
+        tmp_file.write(pdf_file.read())
+        pdf_path = tmp_file.name
+    doc = fitz.open(pdf_path)
+    page = doc.load_page(0)  # just first page for now
+    pix = page.get_pixmap(dpi=150)
+    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
+    # Show the rendered page
+    st.image(img, caption="Page 1 of PDF")
+    # Run the pipeline
+    with st.spinner("Searching for the answer..."):
+        result = qa_pipeline(img, question)
+        st.success(f"**Answer:** {result['answer']} (score: {result['score']:.2f})")