trry / app.py
random2222's picture
Update app.py
62390c0 verified
raw
history blame
1.78 kB
import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceHub
from huggingface_hub import login
# 1. Authentication (MUST HAVE)
login(token=os.environ.get('HF_TOKEN'))
# 2. PDF Processing Function
def create_qa_system():
# File check
if not os.path.exists("data.pdf"):
raise gr.Error("❌ data.pdf not found! Upload it in Space's Files tab")
# Load PDF
loader = PyMuPDFLoader("data.pdf")
documents = loader.load()
# Split text
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
texts = text_splitter.split_documents(documents)
# Create embeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Build vector store
db = FAISS.from_documents(texts, embeddings)
# Initialize LLM (Free-tier compatible)
llm = HuggingFaceHub(
repo_id="google/flan-t5-base", # Changed to smaller model
model_kwargs={"temperature": 0.2, "max_length": 256}
)
return RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=db.as_retriever(search_kwargs={"k": 2})
)
# 3. Initialize system
qa = create_qa_system()
# 4. Chat interface
def chat(message, history):
response = qa({"query": message})
return response["result"]
# 5. Launch Gradio
gr.ChatInterface(
chat,
title="PDF Chatbot",
description="Upload your PDF in Files tab ➡️ Ask questions!",
).launch()