import os
from huggingface_hub import login
import torch
import time
from transformers import AutoTokenizer, pipeline, AutoModelForCausalLM
from langdetect import detect
from langchain.chains import RetrievalQA
from langchain_community.llms import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import TextLoader, PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import HuggingFaceEmbeddings
import spaces
from transcription_diarization import process_video

# Get Hugging Face token from Space secret
hf_token = os.environ.get('hf_secret')
if not hf_token:
    raise ValueError("HF_TOKEN not found in environment variables. Please set it in the Space secrets.")

# Login to Hugging Face
login(token=hf_token)

# Analysis Pipeline Classes
class LazyPipeline:
    def __init__(self):
        self.pipeline = None

    @spaces.GPU(duration=250)
    def get_pipeline(self):
        if self.pipeline is None:
            model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
            tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=hf_token)
            model = AutoModelForCausalLM.from_pretrained(
                model_name,
                torch_dtype=torch.float16,
                device_map="auto",
                use_auth_token=hf_token
            )
            self.pipeline = pipeline(
                "text-generation",
                model=model,
                tokenizer=tokenizer,
                max_new_tokens=650,
                temperature=0.2,
                top_p = 0.95,
                top_k = 3,
                repetition_penalty = 1.2,
                do_sample=True,
            )
        return self.pipeline

class LazyLLM:
    def __init__(self, lazy_pipeline):
        self.lazy_pipeline = lazy_pipeline
        self.llm = None

    @spaces.GPU(duration=150)
    def get_llm(self):
        if self.llm is None:
            pipe = self.lazy_pipeline.get_pipeline()
            self.llm = HuggingFacePipeline(pipeline=pipe)
        return self.llm

class LazyChains:
    def __init__(self, lazy_llm):
        self.lazy_llm = lazy_llm
        self.attachments_chain = None
        self.bigfive_chain = None
        self.personalities_chain = None

    def create_prompt(self, task):
        return PromptTemplate(
            template=task + "\n\nContext: {context}\n\nTask: {question}\n\n-----------\n\nAnswer: ",
            input_variables=["context", "question"]
        )

    @spaces.GPU(duration=200)
    def get_chains(self):
        if self.attachments_chain is None:
            llm = self.lazy_llm.get_llm()
            self.attachments_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=attachments_db.as_retriever(),
                chain_type_kwargs={"prompt": self.create_prompt(attachments_task)}
            )
            self.bigfive_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=bigfive_db.as_retriever(),
                chain_type_kwargs={"prompt": self.create_prompt(bigfive_task)}
            )
            self.personalities_chain = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=personalities_db.as_retriever(),
                chain_type_kwargs={"prompt": self.create_prompt(personalities_task)}
            )
        return self.attachments_chain, self.bigfive_chain, self.personalities_chain

lazy_pipe = LazyPipeline()
lazy_llm = LazyLLM(lazy_pipe)
lazy_chains = LazyChains(lazy_llm)

# Load instruction files
def load_instructions(file_path):
    with open(file_path, 'r') as file:
        return file.read().strip()

attachments_task = load_instructions("tasks/Attachments_task.txt")
bigfive_task = load_instructions("tasks/BigFive_task.txt")
personalities_task = load_instructions("tasks/Personalities_task.txt")

# Load knowledge files and create vector stores
def load_knowledge(file_path):
    loader = TextLoader(file_path)
    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_documents(documents)
    return texts

embeddings = HuggingFaceEmbeddings()
attachments_db = FAISS.from_documents(load_knowledge("knowledge/bartholomew_attachments_definitions - no int.txt"), embeddings)
bigfive_db = FAISS.from_documents(load_knowledge("knowledge/bigfive_definitions.txt"), embeddings)
personalities_db = FAISS.from_documents(load_knowledge("knowledge/personalities_definitions.txt"), embeddings)

def detect_language(text):
    try:
        return detect(text)
    except:
        return "en"  # default to English if detection fails

# Analysis functions
def analyze_content(content, safe_progress):
    attachments_chain, bigfive_chain, personalities_chain = lazy_chains.get_chains()
    
    safe_progress(0.6, desc="Analyzing attachments...")
    attachments_result = attachments_chain({"query": content})
    attachments_answer = attachments_result['result'].split("-----------\n\nAnswer:")[-1].strip()
    
    safe_progress(0.7, desc="Analyzing Big Five traits...")
    bigfive_result = bigfive_chain({"query": content})
    bigfive_answer = bigfive_result['result'].split("-----------\n\nAnswer:")[-1].strip()
    
    safe_progress(0.8, desc="Analyzing personalities...")
    personalities_result = personalities_chain({"query": content})
    personalities_answer = personalities_result['result'].split("-----------\n\nAnswer:")[-1].strip()
    
    return attachments_answer, bigfive_answer, personalities_answer

# Main processing function
def process_input(input_file, progress=None):
    start_time = time.time()
    
    def safe_progress(value, desc=""):
        if progress is not None:
            try:
                progress(value, desc=desc)
            except Exception as e:
                print(f"Progress update failed: {e}")
    
    safe_progress(0, desc="Processing file...")
    
    file_extension = os.path.splitext(input_file.name)[1].lower()
    
    if file_extension == '.txt':
        with open(input_file.name, 'r', encoding='utf-8') as file:
            content = file.read()
    elif file_extension == '.pdf':
        loader = PyPDFLoader(input_file.name)
        pages = loader.load_and_split()
        content = '\n'.join([page.page_content for page in pages])
    elif file_extension in ['.mp4', '.avi', '.mov']:
        safe_progress(0.2, desc="Processing video...")
        srt_path = process_video(input_file.name, hf_token, "en")
        with open(srt_path, 'r', encoding='utf-8') as file:
            content = file.read()
        os.remove(srt_path)
    else:
        return "Unsupported file format. Please upload a TXT, PDF, or video file.", None, None, None, None, None

    detected_language = detect_language(content)
    
    safe_progress(0.4, desc="Analyzing content...")
    
    attachments_answer, bigfive_answer, personalities_answer = analyze_content(content, safe_progress)
    
    end_time = time.time()
    execution_time = end_time - start_time
    
    execution_info = f"{execution_time:.2f} seconds"
    
    safe_progress(1.0, desc="Analysis complete!")

    print("Attachments output:", attachments_answer)
    print("Big Five output:", bigfive_answer)
    print("Personalities output:", personalities_answer)

    return ("Analysis complete!", execution_info, detected_language, 
            attachments_answer, bigfive_answer, personalities_answer)