File size: 1,906 Bytes
c2fbd48 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import whisper
import openai
import gradio as gr
from gtts import gTTS
from moviepy.editor import VideoFileClip
import os
openai.api_key = "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"
def transcribe_video(video_path):
# Extract audio from video file
video = VideoFileClip(video_path)
audio_path = "temp_audio.wav"
video.audio.write_audiofile(audio_path, codec='pcm_s16le')
# Load Whisper model and transcribe audio
model = whisper.load_model("base")
result = model.transcribe(audio_path)
transcription = result["text"]
# Remove temporary audio file
os.remove(audio_path)
return transcription
def summarize_text(text):
response = openai.Completion.create(
engine="text-davinci-003",
prompt=f"Summarize the following text:\n\n{text}",
max_tokens=150
)
summary = response.choices[0].text.strip()
return summary
def text_to_speech(text, language="en"):
tts = gTTS(text=text, lang=language)
tts.save("summary_audio.mp3")
return "summary_audio.mp3"
def process_video(video):
# Transcribe the video
transcription = transcribe_video(video)
# Summarize the transcription
summary = summarize_text(transcription)
# Convert summary to speech
audio_file = text_to_speech(summary)
return transcription, summary, audio_file
# Create Gradio interface
iface = gr.Interface(
fn=process_video,
inputs=gr.Video(label="Upload Video"),
outputs=[
gr.Textbox(label="Transcription"),
gr.Textbox(label="Summary"),
gr.Audio(label="Summary Audio")
],
title="Video Transcription and Summarization",
description="Upload a video file to transcribe and summarize its content."
)
# Launch the interface
iface.launch() |