import streamlit as st import tempfile import os import yt_dlp from moviepy.editor import VideoFileClip from openai import OpenAI client = OpenAI() # Set your OpenAI API key (make sure it's set in Hugging Face Spaces secrets) #openai.api_key = os.getenv("OPENAI_API_KEY") # --------------------------- # Helper Functions # --------------------------- def download_video(youtube_url: str, output_path: str) -> str: """Download a YouTube video using yt-dlp and save it to the given output path.""" try: ydl_opts = { 'format': 'best', 'outtmpl': os.path.join(output_path, '%(title)s.%(ext)s'), 'noplaylist': True, 'quiet': True, 'cookiesfrombrowser': ('chrome',), # Extract cookies from Chrome 'verbose': True # Optional: for more detailed output } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(youtube_url, download=True) title = info.get('title', 'video') video_path = os.path.join(output_path, f"{title}.mp4") return video_path except Exception as e: st.error(f"Error downloading video: {e}") return None def extract_audio(video_path: str) -> str: """Extract audio from the video file and save as MP3.""" try: # Sanitize filename if needed (e.g., replace problematic characters) safe_video_path = video_path.replace("|", "_").replace(":", "_") clip = VideoFileClip(safe_video_path) audio_path = safe_video_path.replace(".mp4", ".mp3") clip.audio.write_audiofile(audio_path, codec='mp3') clip.close() return audio_path except Exception as e: st.error(f"Error extracting audio: {e}") return None # audio_file = open("/path/to/file/speech.mp3", "rb") # transcription = client.audio.transcriptions.create( # model="whisper-1", # file=audio_file, # response_format="text" # ) # print(transcription.text) def transcribe_audio(audio_path: str) -> str: """Transcribe the audio to text using OpenAI's Whisper API.""" try: with open(audio_path, "rb") as audio_file: transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file #response_format="text" ) return transcript.text except Exception as e: st.error(f"Error transcribing audio: {e}") return "" def generate_summary(transcript_text: str) -> str: """Generate a concise summary of the transcript using OpenAI.""" prompt = f"Summarize the following video transcript in a concise manner, highlighting the key points that the user should know:\n\n{transcript_text}. Feel free to use bullet points, bold, italics and headers to empasize key points where necessary" messages = [ {"role": "system", "content": "You are an helpful assistant."}, {"role": "user", "content": prompt} ] completion = client.chat.completions.create(model="gpt-4o-mini", messages=messages) return completion.choices[0].message.content.strip() def get_chat_response(transcript_text: str, conversation_history: list, user_query: str) -> str: """Generate a chat response using the transcript as context.""" messages = conversation_history + [ {"role": "user", "content": f"Based on the video transcript:\n\n{transcript_text}\n\nQuestion: {user_query}"} ] completion = client.chat.completions.create(model="gpt-4o-mini", messages=messages) return completion.choices[0].message.content.strip() # --------------------------- # Sidebar: Input Options # --------------------------- #st.sidebar.title("Video Input Options") st.sidebar.title("Upload a video") input_mode = st.sidebar.radio("Select Input Type", ("Upload Video")) #, "YouTube URL")) transcript_text = "" if input_mode == "Upload Video": uploaded_video = st.sidebar.file_uploader("Upload a video file (MP4)", type="mp4") # elif input_mode == "YouTube URL": # youtube_url = st.sidebar.text_input("Enter YouTube video URL:") # --------------------------- # Process Video Input # --------------------------- if input_mode == "Upload Video" and uploaded_video is not None: with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp: tmp.write(uploaded_video.read()) video_path = tmp.name st.sidebar.success("Video uploaded successfully!") audio_path = extract_audio(video_path) if audio_path: transcript_text = transcribe_audio(audio_path) if transcript_text: st.sidebar.success("Audio transcribed successfully!") # elif input_mode == "YouTube URL" and youtube_url: # with tempfile.TemporaryDirectory() as temp_dir: # video_path = download_video(youtube_url, temp_dir) # if video_path: # st.sidebar.success("Video downloaded successfully!") # audio_path = extract_audio(video_path) # if audio_path: # transcript_text = transcribe_audio(audio_path) # if transcript_text: # st.sidebar.success("Audio transcribed successfully!") if transcript_text: st.session_state.transcript_text = transcript_text # --------------------------- # Sidebar: Action Selection # --------------------------- st.sidebar.title("Select Action") action_mode = st.sidebar.radio("Choose Action", ("Summary", "Chat")) # --------------------------- # Session State Initialization for Chat # --------------------------- if "chat_history" not in st.session_state: st.session_state.chat_history = [{"role": "assistant", "content": "Hi, how can I help you with the video content?"}] # --------------------------- # Main Display Area # --------------------------- st.title("Video Chat") st.write('Tired of watching long boring videos? Summarize your videos in seconds or just chat!') if "transcript_text" not in st.session_state or not st.session_state.transcript_text: st.info("Please provide a video input from the sidebar to begin.") else: transcript_text = st.session_state.transcript_text if action_mode == "Summary": #st.header("Summary & Key Points") with st.spinner("Generating summary..."): summary = generate_summary(transcript_text) st.write(summary) elif action_mode == "Chat": st.header("Chat with Your Study Companion") for msg in st.session_state.chat_history: st.chat_message(msg["role"]).write(msg["content"]) user_query = st.chat_input("Ask a question about the video content:") if user_query: st.session_state.chat_history.append({"role": "user", "content": user_query}) st.chat_message("user").write(user_query) with st.spinner("Processing your question..."): response = get_chat_response(transcript_text, st.session_state.chat_history, user_query) st.session_state.chat_history.append({"role": "assistant", "content": response}) st.chat_message("assistant").write(response)