Spaces:

random2222
/

trry

Sleeping

File size: 1,779 Bytes

import os
import gradio as gr
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFaceHub
from huggingface_hub import login

# 1. Authentication (MUST HAVE)
login(token=os.environ.get('HF_TOKEN'))

# 2. PDF Processing Function
def create_qa_system():
    # File check
    if not os.path.exists("data.pdf"):
        raise gr.Error("❌ data.pdf not found! Upload it in Space's Files tab")
    
    # Load PDF
    loader = PyMuPDFLoader("data.pdf")
    documents = loader.load()
    
    # Split text
    text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)
    
    # Create embeddings
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    
    # Build vector store
    db = FAISS.from_documents(texts, embeddings)
    
    # Initialize LLM (Free-tier compatible)
    llm = HuggingFaceHub(
        repo_id="google/flan-t5-base",  # Changed to smaller model
        model_kwargs={"temperature": 0.2, "max_length": 256}
    )
    
    return RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=db.as_retriever(search_kwargs={"k": 2})
    )

# 3. Initialize system
qa = create_qa_system()

# 4. Chat interface
def chat(message, history):
    response = qa({"query": message})
    return response["result"]

# 5. Launch Gradio
gr.ChatInterface(
    chat,
    title="PDF Chatbot",
    description="Upload your PDF in Files tab ➡️ Ask questions!",
).launch()