File size: 2,058 Bytes
7f29224
4fa0927
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7f29224
 
4fa0927
 
 
7f29224
 
 
4fa0927
 
 
 
 
 
 
 
7f29224
 
4fa0927
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import HuggingFaceHub

# Configurações
EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
LLM_REPO_ID = "google/flan-t5-large"  # Modelo de linguagem da Hugging Face

# Função para carregar e processar PDFs
def load_and_process_pdf(pdf_path):
    # Carrega o PDF
    loader = PyPDFLoader(pdf_path)
    documents = loader.load()

    # Divide o texto em chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)

    # Cria embeddings e armazena no vetor store
    embeddings = HuggingFaceEmbeddings(model_name=EMBEDDING_MODEL)
    db = FAISS.from_documents(texts, embeddings)

    return db

# Função para gerar respostas usando RAG
def generate_response(pdf_file, query):
    if pdf_file is None:
        return "Erro: Nenhum arquivo PDF foi carregado."

    # Carrega e processa o PDF
    db = load_and_process_pdf(pdf_file.name)

    # Configura o modelo de linguagem
    llm = HuggingFaceHub(repo_id=LLM_REPO_ID, model_kwargs={"temperature": 0.7, "max_length": 512})

    # Cria a cadeia de RAG
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_kwargs={"k": 3}),
        return_source_documents=True
    )

    # Executa a consulta
    result = qa_chain({"query": query})
    return result["result"]

# Interface Gradio
iface = gr.Interface(
    fn=generate_response,
    inputs=[
        gr.File(label="Upload PDF", type="file"),
        gr.Textbox(label="Sua Pergunta")
    ],
    outputs=gr.Textbox(label="Resposta Gerada"),
    title="Sistema de RAG com LangChain",
    description="Faça upload de um PDF e faça perguntas sobre o conteúdo."
)

iface.launch(share=True)