Spaces:

random2222
/

tryagain

Build error

File size: 2,960 Bytes

import os
import gradio as gr
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer

def load_documents(file_path="study_materials"):
    documents = []
    for filename in os.listdir(file_path):
        path = os.path.join(file_path, filename)
        if filename.endswith(".pdf"):
            loader = PyMuPDFLoader(path)
            documents.extend(loader.load())
        elif filename.endswith(".txt"):
            loader = TextLoader(path)
            documents.extend(loader.load())
    return documents

def create_qa_system():
    try:
        # Load documents
        documents = load_documents()
        if not documents:
            raise ValueError("📚 No study materials found")
        
        # Text splitting
        text_splitter = CharacterTextSplitter(
            chunk_size=1100,
            chunk_overlap=200,
            separator="\n\n"
        )
        texts = text_splitter.split_documents(documents)
        
        # Embeddings
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        
        # Vector store
        db = FAISS.from_documents(texts, embeddings)
        
        # LLM setup with proper LangChain wrapper
        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")  # ←
        pipe = pipeline(
            "text2text-generation",
            model="google/flan-t5-large",
            max_length=600,
            temperature=0.7,
            tokenizer=tokenizer,
            do_sample=True,
            top_k=50,
            device=-1
        )
        
        # Wrap pipeline in LangChain component
        llm = HuggingFacePipeline(pipeline=pipe)
        
        # Create QA chain
        return RetrievalQA.from_llm(
            llm=llm,
            retriever=db.as_retriever(search_kwargs={"k": 3}),
            return_source_documents=True
        )
    except Exception as e:
        raise gr.Error(f"Error: {str(e)}")

# Initialize system
try:
    qa = create_qa_system()
except Exception as e:
    print(f"Startup failed: {str(e)}")
    raise

def ask_question(question, history):
    try:
        result = qa.invoke({"query": question})
        answer = result["result"]
        sources = list({doc.metadata['source'] for doc in result['source_documents']})
        return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
    except Exception as e:
        return f"Error: {str(e)[:150]}"

gr.ChatInterface(
    ask_question,
    title="Study Assistant",
    description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
    theme="soft"
).launch()