|
import torch |
|
import gradio as gr |
|
import pdfplumber |
|
from transformers import pipeline |
|
|
|
model_path = ("../Models/models--deepset--roberta-base-squad2/snapshots" |
|
"/cbf50ba81465d4d8676b8bab348e31835147541b") |
|
|
|
question_answer = pipeline("question-answering", model="deepset/roberta-base-squad2") |
|
|
|
def read_pdf_content(file_obj): |
|
""" |
|
Reads the content of a PDF file object and returns the extracted text. |
|
Parameters: |
|
file_obj (file object): The file object to read from. |
|
Returns: |
|
str: The extracted text from the PDF. |
|
""" |
|
try: |
|
with pdfplumber.open(file_obj) as pdf: |
|
text = "" |
|
for page in pdf.pages: |
|
text += page.extract_text() |
|
return text |
|
except Exception as e: |
|
return f"An error occurred: {e}" |
|
|
|
def get_answer(file, question): |
|
|
|
context = read_pdf_content(file) |
|
if context.startswith("An error occurred"): |
|
return context |
|
|
|
|
|
answer = question_answer(question=question, context=context) |
|
return answer["answer"] |
|
|
|
demo = gr.Interface(fn=get_answer, |
|
inputs=[gr.File(label="Upload your PDF file"), gr.Textbox(label="Input your question", lines=1)], |
|
outputs=[gr.Textbox(label="Answer text", lines=1)], |
|
title="@GenAILearniverse Project 5: Document Q & A", |
|
description="This application will be used to answer questions based on context provided from the uploaded PDF.") |
|
|
|
demo.launch() |
|
|