import os import gradio as gr from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM from langchain.llms import HuggingFacePipeline from langchain_community.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain_community.vectorstores import FAISS from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.chains import RetrievalQA from huggingface_hub import login import diarization import shutil import spaces # Get Hugging Face token from Space secret hf_token = os.environ.get('hf_secret') if not hf_token: raise ValueError("HF_TOKEN not found in environment variables. Please set it in the Space secrets.") # Login to Hugging Face login(token=hf_token) # Initialize the pipeline @spaces.GPU(duration=120) def initialize_pipeline(): import torch model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto", ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=512, temperature=0.5, top_p=0.95, repetition_penalty=1.15 ) return pipe # Create a LangChain wrapper around the pipeline @spaces.GPU(duration=120) def create_llm(): pipe = initialize_pipeline() return HuggingFacePipeline(pipeline=pipe) llm = create_llm() # Load instruction files def load_instructions(file_path): with open(file_path, 'r') as file: return file.read() general_task = load_instructions("tasks/general_task.txt") attachments_task = load_instructions("tasks/Attachments_task.txt") bigfive_task = load_instructions("tasks/BigFive_task.txt") personalities_task = load_instructions("tasks/Personalities_task.txt") # Load knowledge files def load_knowledge(file_path): loader = TextLoader(file_path) documents = loader.load() text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) texts = text_splitter.split_documents(documents) return texts attachments_knowledge = load_knowledge("knowledge/bartholomew_attachments_definitions.txt") bigfive_knowledge = load_knowledge("knowledge/bigfive_definitions.txt") personalities_knowledge = load_knowledge("knowledge/personalities_definitions.txt") # Create vector stores embeddings = HuggingFaceEmbeddings() attachments_db = FAISS.from_documents(attachments_knowledge, embeddings) bigfive_db = FAISS.from_documents(bigfive_knowledge, embeddings) personalities_db = FAISS.from_documents(personalities_knowledge, embeddings) # Create retrieval chains @spaces.GPU(duration=120) def create_chains(): attachments_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=attachments_db.as_retriever()) bigfive_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=bigfive_db.as_retriever()) personalities_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=personalities_db.as_retriever()) return attachments_chain, bigfive_chain, personalities_chain attachments_chain, bigfive_chain, personalities_chain = create_chains() # Function to process video file @spaces.GPU(duration=600) def process_video(video_file): # Copy the uploaded video file to a temporary location temp_video_path = "temp_video.mp4" shutil.copy2(video_file.name, temp_video_path) # Process the video using the diarization script language = "en" diarization.process_video(temp_video_path, hf_token, language) # The SRT file will be created with the same name as the video file but with .srt extension srt_path = temp_video_path.replace(".mp4", "_combined.srt") # Read the content of the SRT file with open(srt_path, 'r', encoding='utf-8') as file: srt_content = file.read() # Combine instructions and SRT content combined_prompt = f"{general_task}\n\n{attachments_task}\n\n{bigfive_task}\n\n{personalities_task}\n\nSRT Content:\n{srt_content}" # Process with LangChain attachments_result = attachments_chain.run(combined_prompt) bigfive_result = bigfive_chain.run(combined_prompt) personalities_result = personalities_chain.run(combined_prompt) # Combine results final_result = f"Attachments Analysis:\n{attachments_result}\n\nBig Five Analysis:\n{bigfive_result}\n\nPersonalities Analysis:\n{personalities_result}" # Save output to file output_file = "output.txt" with open(output_file, 'w') as file: file.write(final_result) return final_result, output_file, srt_path # Create Gradio interface iface = gr.Interface( fn=process_video, inputs=gr.File(label="Upload Video File"), outputs=[gr.Textbox(label="Analysis Result"), gr.File(label="Output File"), gr.File(label="Generated SRT File")], title="Video Analysis with Meta-Llama-3.1-8B-Instruct", description="Upload a video file to analyze using RAG techniques with Meta-Llama-3.1-8B-Instruct." ) # Launch the app iface.launch()