Video0001 / app.py
ZeeAI1's picture
Create app.py
3c745f2 verified
raw
history blame
2.06 kB
import streamlit as st
import whisper
from TTS.api import TTS
from moviepy.editor import VideoFileClip, AudioFileClip
import os
from tempfile import NamedTemporaryFile
# Page config
st.set_page_config(page_title="AI Voiceover Generator", layout="centered")
st.title("🎤 AI Voiceover + Subtitle Enhancer")
# Load models
@st.cache_resource
def load_whisper_model():
return whisper.load_model("small")
@st.cache_resource
def load_tts_model():
return TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
whisper_model = load_whisper_model()
tts = load_tts_model()
# Upload video
video_file = st.file_uploader("Upload a short video clip (MP4 preferred)", type=["mp4", "mov", "avi"])
if video_file:
with NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
tmp_video.write(video_file.read())
tmp_video_path = tmp_video.name
st.video(tmp_video_path)
# Extract audio
video = VideoFileClip(tmp_video_path)
audio_path = tmp_video_path.replace(".mp4", ".wav")
video.audio.write_audiofile(audio_path)
# Transcribe
st.info("Transcribing using Whisper...")
result = whisper_model.transcribe(audio_path)
st.subheader("📝 Detected Speech")
st.write(result["text"])
# User input for voiceover
custom_text = st.text_area("Enter your custom voiceover text:", "Here’s my voiceover explaining the video...")
if st.button("Generate AI Voiceover"):
voice_output_path = audio_path.replace(".wav", "_ai_voice.wav")
tts.tts_to_file(text=custom_text, file_path=voice_output_path)
st.audio(voice_output_path)
# Replace original audio with new one
final_video = video.set_audio(AudioFileClip(voice_output_path))
final_path = tmp_video_path.replace(".mp4", "_final.mp4")
final_video.write_videofile(final_path, codec="libx264", audio_codec="aac")
with open(final_path, "rb") as f:
st.download_button(label="📥 Download Final Video", data=f, file_name="final_ai_video.mp4")