Spaces:

ANASAKHTAR
/

Document_Question_And_Answer

Running

File size: 1,571 Bytes

import torch
import gradio as gr
import pdfplumber
from transformers import pipeline

model_path = ("../Models/models--deepset--roberta-base-squad2/snapshots"
              "/cbf50ba81465d4d8676b8bab348e31835147541b")

question_answer = pipeline("question-answering", model="deepset/roberta-base-squad2")

def read_pdf_content(file_obj):
    """
    Reads the content of a PDF file object and returns the extracted text.
    Parameters:
    file_obj (file object): The file object to read from.
    Returns:
    str: The extracted text from the PDF.
    """
    try:
        with pdfplumber.open(file_obj) as pdf:
            text = ""
            for page in pdf.pages:
                text += page.extract_text()
            return text
    except Exception as e:
        return f"An error occurred: {e}"

def get_answer(file, question):
    # Extract text from the uploaded PDF
    context = read_pdf_content(file)
    if context.startswith("An error occurred"):
        return context
    
    # Get the answer from the model
    answer = question_answer(question=question, context=context)
    return answer["answer"]

demo = gr.Interface(fn=get_answer,
                    inputs=[gr.File(label="Upload your PDF file"), gr.Textbox(label="Input your question", lines=1)],
                    outputs=[gr.Textbox(label="Answer text", lines=1)],
                    title="@GenAILearniverse Project 5: Document Q & A",
                    description="This application will be used to answer questions based on context provided from the uploaded PDF.")

demo.launch()