ZeeAI1 commited on
Commit
27247c4
·
verified ·
1 Parent(s): d7132f4

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +26 -0
  2. README.md +8 -5
  3. app.py +68 -0
  4. requirements.txt +6 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PIP_NO_CACHE_DIR=true \
4
+ PYTHONDONTWRITEBYTECODE=1 \
5
+ PYTHONUNBUFFERED=1 \
6
+ NUMBA_DISABLE_CACHE=1
7
+
8
+ RUN apt-get update && apt-get install -y \
9
+ git ffmpeg libsndfile1 build-essential python3-dev libffi-dev wget curl \
10
+ && apt-get clean
11
+
12
+ WORKDIR /app
13
+
14
+ COPY app.py app.py
15
+ COPY requirements.txt requirements.txt
16
+ COPY README.md README.md
17
+
18
+ RUN pip install --upgrade pip setuptools wheel
19
+ RUN pip install numpy==1.24.3
20
+ RUN pip install git+https://github.com/openai/whisper.git
21
+ RUN pip install -r requirements.txt
22
+ RUN pip uninstall -y librosa
23
+ RUN pip install git+https://github.com/kamperh/librosa-no-numba.git
24
+ RUN pip install git+https://github.com/coqui-ai/TTS.git
25
+
26
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
- title: Video0004
3
- emoji: 🏃
4
- colorFrom: purple
5
- colorTo: gray
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
  ---
2
+ title: AI Voiceover App V2 (Docker)
3
+ emoji: 🧠
4
+ colorFrom: gray
5
+ colorTo: blue
6
  sdk: docker
7
  pinned: false
8
  ---
9
 
10
+ # 🧠 AI Voiceover V2 Replace One Speaker Only
11
+
12
+ Runs Whisper + Coqui TTS + audio mixing inside a Docker container with patched Librosa and Numba fixes for Hugging Face.
13
+
app.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["NUMBA_DISABLE_CACHE"] = "1"
3
+
4
+ import streamlit as st
5
+ import whisper
6
+ from TTS.api import TTS
7
+ from moviepy.editor import VideoFileClip, AudioFileClip, CompositeAudioClip
8
+ from tempfile import NamedTemporaryFile
9
+ import torchaudio
10
+
11
+ st.set_page_config(page_title="AI Voiceover Generator V2", layout="centered")
12
+ st.title("🎤 AI Voiceover V2: Replace One Speaker Only")
13
+
14
+ @st.cache_resource
15
+ def load_whisper_model():
16
+ return whisper.load_model("small")
17
+
18
+ @st.cache_resource
19
+ def load_tts_model():
20
+ return TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
21
+
22
+ whisper_model = load_whisper_model()
23
+ tts = load_tts_model()
24
+
25
+ video_file = st.file_uploader("Upload a short video clip (MP4 preferred)", type=["mp4", "mov", "avi"])
26
+
27
+ if video_file:
28
+ with NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
29
+ tmp_video.write(video_file.read())
30
+ tmp_video_path = tmp_video.name
31
+
32
+ st.video(tmp_video_path)
33
+
34
+ video = VideoFileClip(tmp_video_path)
35
+ audio_path = tmp_video_path.replace(".mp4", ".wav")
36
+ video.audio.write_audiofile(audio_path)
37
+
38
+ st.info("Transcribing using Whisper...")
39
+ result = whisper_model.transcribe(audio_path)
40
+ st.subheader("📝 Detected Speech")
41
+ st.write(result["text"])
42
+
43
+ custom_text = st.text_area("Enter your custom voiceover text to replace one speaker:", result["text"])
44
+
45
+ if st.button("Replace Only One Speaker's Voice"):
46
+ ai_voice_path = audio_path.replace(".wav", "_ai_voice.wav")
47
+ tts.tts_to_file(text=custom_text, file_path=ai_voice_path)
48
+ st.audio(ai_voice_path)
49
+
50
+ original_audio, sr = torchaudio.load(audio_path)
51
+ ai_audio, _ = torchaudio.load(ai_voice_path)
52
+
53
+ if ai_audio.shape[1] < original_audio.shape[1]:
54
+ diff = original_audio.shape[1] - ai_audio.shape[1]
55
+ ai_audio = torchaudio.functional.pad(ai_audio, (0, diff))
56
+ else:
57
+ ai_audio = ai_audio[:, :original_audio.shape[1]]
58
+
59
+ mixed_audio = (original_audio * 0.4) + (ai_audio * 0.6)
60
+ mixed_path = audio_path.replace(".wav", "_mixed.wav")
61
+ torchaudio.save(mixed_path, mixed_audio, sr)
62
+
63
+ final_video = video.set_audio(AudioFileClip(mixed_path))
64
+ final_path = tmp_video_path.replace(".mp4", "_final_v2.mp4")
65
+ final_video.write_videofile(final_path, codec="libx264", audio_codec="aac")
66
+
67
+ with open(final_path, "rb") as f:
68
+ st.download_button(label="📥 Download Final Video with Mixed Voiceover", data=f, file_name="final_ai_video_v2.mp4")
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ streamlit
2
+ moviepy
3
+ ffmpeg-python
4
+ torchaudio
5
+ torch
6
+ numba