Spaces:
Sleeping
Sleeping
import os | |
import gradio as gr | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.document_loaders import PyMuPDFLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
from langchain_community.llms import HuggingFaceHub | |
from huggingface_hub import login | |
# 1. Authentication (MUST HAVE) | |
login(token=os.environ.get('HF_TOKEN')) | |
# 2. PDF Processing Function | |
def create_qa_system(): | |
# File check | |
if not os.path.exists("data.pdf"): | |
raise gr.Error("❌ data.pdf not found! Upload it in Space's Files tab") | |
# Load PDF | |
loader = PyMuPDFLoader("data.pdf") | |
documents = loader.load() | |
# Split text | |
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50) | |
texts = text_splitter.split_documents(documents) | |
# Create embeddings | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
# Build vector store | |
db = FAISS.from_documents(texts, embeddings) | |
# Initialize LLM (Free-tier compatible) | |
llm = HuggingFaceHub( | |
repo_id="google/flan-t5-base", # Changed to smaller model | |
model_kwargs={"temperature": 0.2, "max_length": 256} | |
) | |
return RetrievalQA.from_chain_type( | |
llm=llm, | |
chain_type="stuff", | |
retriever=db.as_retriever(search_kwargs={"k": 2}) | |
) | |
# 3. Initialize system | |
qa = create_qa_system() | |
# 4. Chat interface | |
def chat(message, history): | |
response = qa({"query": message}) | |
return response["result"] | |
# 5. Launch Gradio | |
gr.ChatInterface( | |
chat, | |
title="PDF Chatbot", | |
description="Upload your PDF in Files tab ➡️ Ask questions!", | |
).launch() |