Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""app | |
Automatically generated by Colab. | |
Original file is located at | |
https://colab.research.google.com/drive/1ZybFOpX1r-SAA-RslP5WJkQ9gdI6JCCj | |
""" | |
import streamlit as st | |
import os | |
from langchain.chat_models import ChatOpenAI | |
from langchain.document_loaders import PyPDFLoader | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import FAISS | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
import tempfile | |
st.set_page_config(page_title="Chat com PDF", layout="centered") | |
st.title("π Chat com PDF usando LangChain") | |
uploaded_file = st.file_uploader("π€ Envie um arquivo PDF", type="pdf") | |
if uploaded_file is not None: | |
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp: | |
tmp.write(uploaded_file.read()) | |
pdf_path = tmp.name | |
with st.spinner("π Processando o PDF..."): | |
try: | |
# Carregar e dividir o PDF | |
loader = PyPDFLoader(pdf_path) | |
documents = loader.load() | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
docs = text_splitter.split_documents(documents) | |
# Gerar embeddings | |
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") | |
vectorstore = FAISS.from_documents(docs, embeddings) | |
# Criar modelo LLM | |
llm = ChatOpenAI( | |
openai_api_base="https://openrouter.ai/api/v1", | |
openai_api_key=os.environ["OPENROUTER_API_KEY"], | |
model='deepseek/deepseek-r1-zero:free' | |
) | |
# Criar a cadeia de QA | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=llm, | |
retriever=vectorstore.as_retriever(), | |
return_source_documents=True | |
) | |
# Interface para pergunta | |
pergunta = st.text_input("β FaΓ§a uma pergunta sobre o PDF:") | |
if pergunta: | |
resposta = qa_chain.invoke({"query": pergunta}) | |
st.success("β Resposta:") | |
st.write(resposta['result']) | |
with st.expander("π Fontes usadas"): | |
for i, doc in enumerate(resposta['source_documents']): | |
st.markdown(f"**Fonte {i+1}:**\n\n{doc.page_content[:500]}...") | |
except Exception as e: | |
st.error(f"Erro: {str(e)}") | |