import os
import gc
import torch
import gradio as gr
from langchain_community.document_loaders import PyMuPDFLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from transformers import pipeline, AutoTokenizer, BitsAndBytesConfig
from huggingface_hub import login


torch.set_num_threads(2)

# HF Token handling
if os.environ.get("HF_TOKEN"):
    login(token=os.environ["HF_TOKEN"])

def load_documents(file_path="study_materials"):
    documents = []
    for filename in os.listdir(file_path):
        path = os.path.join(file_path, filename)
        if filename.endswith(".pdf"):
            loader = PyMuPDFLoader(path)
            documents.extend(loader.load())
        elif filename.endswith(".txt"):
            loader = TextLoader(path)
            documents.extend(loader.load())
    return documents

def create_qa_system():
    try:
        # Load and process documents
        documents = load_documents()
        if not documents:
            raise ValueError("📚 No study materials found")
            
        text_splitter = CharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            separator="\n\n"
        )
        texts = text_splitter.split_documents(documents)

        # Create embeddings
        embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-MiniLM-L6-v2"
        )
        
        # Vector store
        db = FAISS.from_documents(texts, embeddings)

        # LLM setup with CPU optimizations
        tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-large")
        pipe = pipeline(
            "text2text-generation",
            model="google/flan-t5-large",
            tokenizer=tokenizer,
            max_length=200,
            temperature=0.7,
            do_sample=True,
            top_k=50,
            device=-1,  # Force CPU usage
            model_kwargs={
                "torch_dtype": torch.float16,
                "low_cpu_mem_usage": True
            }
        )

        # Memory cleanup
        gc.collect()

        return RetrievalQA.from_llm(
            llm=HuggingFacePipeline(pipeline=pipe),
            retriever=db.as_retriever(search_kwargs={"k": 3}),
            return_source_documents=True
        )
    except Exception as e:
        raise gr.Error(f"Error: {str(e)}")

# Initialize system
try:
    qa = create_qa_system()
except Exception as e:
    print(f"Startup failed: {str(e)}")
    raise

def ask_question(question, history):
    try:
        result = qa.invoke({"query": question})
        answer = result["result"]
        sources = {os.path.basename(doc.metadata['source']) for doc in result['source_documents']}
        return f"{answer}\n\n📚 Sources: {', '.join(sources)}"
    except Exception as e:
        return f"Error: {str(e)[:150]}"

gr.ChatInterface(
    ask_question,
    title="Study Assistant",
    description="Upload PDF/TXT files in 'study_materials' folder and ask questions!",
    theme="soft"
).launch()