Spaces:
Sleeping
Sleeping
File size: 2,475 Bytes
a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d a1b2b0f b9eee6d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
# -*- coding: utf-8 -*-
"""app
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1ZybFOpX1r-SAA-RslP5WJkQ9gdI6JCCj
"""
import streamlit as st
import os
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA
import tempfile
st.set_page_config(page_title="Chat com PDF", layout="centered")
st.title("π Chat com PDF usando LangChain")
uploaded_file = st.file_uploader("π€ Envie um arquivo PDF", type="pdf")
if uploaded_file is not None:
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
tmp.write(uploaded_file.read())
pdf_path = tmp.name
with st.spinner("π Processando o PDF..."):
try:
# Carregar e dividir o PDF
loader = PyPDFLoader(pdf_path)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
docs = text_splitter.split_documents(documents)
# Gerar embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = FAISS.from_documents(docs, embeddings)
# Criar modelo LLM
llm = ChatOpenAI(
openai_api_base="https://openrouter.ai/api/v1",
openai_api_key=os.environ["OPENROUTER_API_KEY"],
model='deepseek/deepseek-r1-zero:free'
)
# Criar a cadeia de QA
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
retriever=vectorstore.as_retriever(),
return_source_documents=True
)
# Interface para pergunta
pergunta = st.text_input("β FaΓ§a uma pergunta sobre o PDF:")
if pergunta:
resposta = qa_chain.invoke({"query": pergunta})
st.success("β
Resposta:")
st.write(resposta['result'])
with st.expander("π Fontes usadas"):
for i, doc in enumerate(resposta['source_documents']):
st.markdown(f"**Fonte {i+1}:**\n\n{doc.page_content[:500]}...")
except Exception as e:
st.error(f"Erro: {str(e)}")
|