""" Video processing module for Study Companion Handles video upload, audio extraction, transcription, and chat functionality """ import streamlit as st import tempfile import os from moviepy import VideoFileClip from openai import OpenAI # Initialize OpenAI client api_key = os.getenv("OPENAI_API_KEY") client = OpenAI(api_key=api_key) # --------------------------- # Core Video Processing Functions # --------------------------- def extract_audio(video_path: str) -> str: """Extract audio from the video file and save as MP3.""" try: clip = VideoFileClip(video_path) audio_path = video_path.replace(".mp4", ".mp3").replace(".mkv", ".mp3").replace(".webm", ".mp3").replace(".mov", ".mp3").replace(".avi", ".mp3") clip.audio.write_audiofile(audio_path, codec='mp3', logger=None) clip.close() return audio_path except Exception as e: st.error(f"Error extracting audio: {e}") return None def transcribe_audio(audio_path: str) -> str: """Transcribe the audio to text using OpenAI's Whisper API.""" try: with open(audio_path, "rb") as audio_file: transcript = client.audio.transcriptions.create( model="whisper-1", file=audio_file ) return transcript.text except Exception as e: st.error(f"Error transcribing audio: {e}") return "" def generate_video_summary(transcript_text: str) -> str: """Generate a concise summary of the video transcript using OpenAI.""" prompt = ( f"Summarize the following video transcript in a concise manner, " "highlighting the key points that a student should know.\n\n" "Feel free to use bullet points, bold, italics and headers to emphasize key points where necessary.\n\n" f"Transcript:\n\n{transcript_text}" ) messages = [ {"role": "system", "content": "You are an educational assistant that creates clear, structured summaries."}, {"role": "user", "content": prompt} ] completion = client.chat.completions.create( model="gpt-4o-mini", messages=messages ) return completion.choices[0].message.content.strip() def chat_with_video(transcript_text: str, conversation_history: list, user_query: str) -> str: """Generate a chat response using the video transcript as context.""" messages = conversation_history + [ {"role": "user", "content": f"Based on the following video transcript:\n\n{transcript_text}\n\nQuestion: {user_query}"} ] completion = client.chat.completions.create( model="gpt-4o-mini", messages=messages ) return completion.choices[0].message.content.strip() def process_uploaded_video(uploaded_video) -> tuple: """ Process an uploaded video file: extract audio and transcribe. Returns: (transcript_text, video_path) or (None, None) on error """ # Check file size (200MB limit) if uploaded_video.size > 200 * 1024 * 1024: st.error("File size exceeds 200MB. Please upload a smaller video.") return None, None # Save uploaded video to temporary file with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp: tmp.write(uploaded_video.read()) video_path = tmp.name # Extract audio with st.spinner("🎵 Extracting audio from video..."): audio_path = extract_audio(video_path) if not audio_path: return None, None # Transcribe audio with st.spinner("📝 Transcribing audio... This may take a few minutes."): transcript_text = transcribe_audio(audio_path) # Clean up audio file try: os.unlink(audio_path) except: pass if not transcript_text: st.error("Failed to transcribe audio.") return None, None return transcript_text, video_path