|
import gradio as gr |
|
from transformers import pipeline |
|
import PyPDF2 |
|
|
|
|
|
nlp = pipeline("question-answering", model="distilbert-base-cased-distilled-squad") |
|
|
|
def extract_text_from_pdf(pdf_file): |
|
with open(pdf_file.name, "rb") as file: |
|
reader = PyPDF2.PdfFileReader(file) |
|
text = "" |
|
for page_num in range(reader.numPages): |
|
page = reader.getPage(page_num) |
|
text += page.extract_text() |
|
return text |
|
|
|
def answer_question(pdf_file, question): |
|
|
|
context = extract_text_from_pdf(pdf_file) |
|
|
|
|
|
result = nlp(question=question, context=context) |
|
return result['answer'] |
|
|
|
|
|
iface = gr.Interface( |
|
fn=answer_question, |
|
inputs=[ |
|
gr.inputs.File(label="Carregar PDF"), |
|
gr.inputs.Textbox(label="Pergunta") |
|
], |
|
outputs=gr.outputs.Textbox(label="Resposta"), |
|
title="QA sobre PDF", |
|
description="Carregue um PDF e faça perguntas sobre o conteúdo." |
|
) |
|
|
|
|
|
iface.launch() |