import whisper import openai import gradio as gr from gtts import gTTS from moviepy.editor import VideoFileClip import os openai.api_key = "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" def transcribe_video(video_path): # Extract audio from video file video = VideoFileClip(video_path) audio_path = "temp_audio.wav" video.audio.write_audiofile(audio_path, codec='pcm_s16le') # Load Whisper model and transcribe audio model = whisper.load_model("base") result = model.transcribe(audio_path) transcription = result["text"] # Remove temporary audio file os.remove(audio_path) return transcription def summarize_text(text): response = openai.Completion.create( engine="text-davinci-003", prompt=f"Summarize the following text:\n\n{text}", max_tokens=150 ) summary = response.choices[0].text.strip() return summary def text_to_speech(text, language="en"): tts = gTTS(text=text, lang=language) tts.save("summary_audio.mp3") return "summary_audio.mp3" def process_video(video): # Transcribe the video transcription = transcribe_video(video) # Summarize the transcription summary = summarize_text(transcription) # Convert summary to speech audio_file = text_to_speech(summary) return transcription, summary, audio_file # Create Gradio interface iface = gr.Interface( fn=process_video, inputs=gr.Video(label="Upload Video"), outputs=[ gr.Textbox(label="Transcription"), gr.Textbox(label="Summary"), gr.Audio(label="Summary Audio") ], title="Video Transcription and Summarization", description="Upload a video file to transcribe and summarize its content." ) # Launch the interface iface.launch()