Spaces:
Running
on
Zero
Running
on
Zero
#generate_subtitles.py | |
import random | |
import os | |
import torch | |
from moviepy import ( | |
VideoFileClip, | |
TextClip, | |
CompositeVideoClip, | |
ImageClip, | |
vfx | |
) | |
from moviepy.video.fx import FadeIn, Resize | |
import spaces | |
FONT_PATH = "DejaVuSans-Bold" | |
# Palette de couleurs « flashy » | |
SUBTITLE_COLORS = [ | |
"white", "yellow", "cyan", "deeppink", "gold", "lightgreen", "magenta", "orange" | |
] | |
def color_for_word(word: str) -> str: | |
return random.choice(SUBTITLE_COLORS) | |
def chunk_text_by_words(segments, max_words=1): | |
""" | |
Découpe chaque segment Whisper en mini sous-titres de max_words mots | |
pour un affichage plus dynamique. | |
""" | |
print("✂️ Découpage en sous-titres dynamiques (4 mots max)...") | |
subs = [] | |
for seg in segments: | |
words = seg['text'].strip().split() | |
seg_duration = seg['end'] - seg['start'] | |
if not words or seg_duration <= 0: | |
continue | |
word_duration = seg_duration / len(words) | |
for i in range(0, len(words), max_words): | |
chunk_words = words[i:i + max_words] | |
chunk_text = " ".join(chunk_words) | |
start_time = seg['start'] + i * word_duration | |
end_time = start_time + len(chunk_words) * word_duration | |
subs.append({ | |
"start": start_time, | |
"end": end_time, | |
"text": chunk_text | |
}) | |
print(f"🧩 {len(subs)} sous-titres créés (dynamiques).") | |
return subs | |
def save_subtitles_to_srt(subtitles, output_path): | |
""" | |
Sauvegarde les sous-titres au format .srt | |
""" | |
def format_timestamp(seconds): | |
h = int(seconds // 3600) | |
m = int((seconds % 3600) // 60) | |
s = int(seconds % 60) | |
ms = int((seconds - int(seconds)) * 1000) | |
return f"{h:02}:{m:02}:{s:02},{ms:03}" | |
with open(output_path, "w", encoding="utf-8") as f: | |
for i, sub in enumerate(subtitles, 1): | |
f.write(f"{i}\n") | |
f.write(f"{format_timestamp(sub['start'])} --> {format_timestamp(sub['end'])}\n") | |
f.write(f"{sub['text'].strip()}\n\n") | |
def transcribe_audio_to_subs(audio_path): | |
""" | |
Transcrit le fichier audio en texte (via Whisper), retourne la liste | |
des segments start/end/text, et sauvegarde en .srt. | |
""" | |
print("🎙️ Transcription avec Whisper...") | |
# Empêche Torch de détecter CUDA | |
import os | |
os.environ["CUDA_VISIBLE_DEVICES"] = "" | |
import whisper | |
model = whisper.load_model("medium", device="cpu") | |
result = model.transcribe(audio_path) | |
subtitles = [{ | |
"start": seg['start'], | |
"end": seg['end'], | |
"text": seg['text'] | |
} for seg in result['segments']] | |
print(f"📝 {len(subtitles)} sous-titres générés.") | |
# Sauvegarde .srt | |
base_name = os.path.splitext(audio_path)[0] | |
srt_path = f"{base_name}.srt" | |
save_subtitles_to_srt(subtitles, srt_path) | |
print(f"💾 Sous-titres enregistrés dans : {srt_path}") | |
return subtitles | |
def format_subtitle_text(text, max_chars=50): | |
""" | |
Coupe le texte en 2 lignes max (~50 caractères max par ligne) | |
pour mieux remplir la vidéo verticale sans déborder. | |
""" | |
words = text.strip().split() | |
lines = [] | |
current_line = "" | |
for word in words: | |
if len(current_line + " " + word) <= max_chars: | |
current_line += (" " + word if current_line else word) | |
else: | |
lines.append(current_line.strip()) | |
current_line = word | |
# Ajout de la dernière ligne | |
lines.append(current_line.strip()) | |
# Retourne uniquement 2 lignes max | |
return "\n".join(lines[:2]) | |
def create_animated_subtitle_clip(text, start, end, video_w, video_h): | |
""" | |
Crée un TextClip avec : | |
- Couleur aléatoire | |
- Fade-in / pop (resize progressif) | |
- Position verticale fixe (ajustable) ou légèrement aléatoire | |
""" | |
word = text.strip() | |
color = color_for_word(word) | |
# Mise en forme du texte | |
# Création du clip texte de base | |
txt_clip = TextClip( | |
text=text, | |
font=FONT_PATH, | |
font_size=100, | |
color=color, | |
stroke_color="black", | |
stroke_width=6, | |
method="caption", | |
size=(int(video_w * 0.8), None), # 80% de la largeur, hauteur auto | |
text_align="center", # alignement dans la box | |
horizontal_align="center", # box centrée horizontalement | |
vertical_align="center", # box centrée verticalement | |
interline=4, | |
transparent=True | |
) | |
y_choices = [int(video_h * 0.45), int(video_h * 0.55), int(video_h * 0.6)] | |
base_y = random.choice(y_choices) | |
txt_clip = txt_clip.with_position(("center", base_y)) | |
txt_clip = txt_clip.with_start(start).with_end(end) | |
# On applique un fadein + un petit effet "pop" qui grandit de 5% sur la durée du chunk | |
# 1) fadein de 0.2s | |
clip_fadein = FadeIn(duration=0.2).apply(txt_clip) | |
# 2) agrandissement progressif (ex: 1.0 → 1.05 sur la durée) | |
duration_subtitle = end - start | |
def pop_effect(t): | |
if duration_subtitle > 0: | |
progress = t / duration_subtitle | |
scale = 1.0 + 0.07 * (1 - (1 - progress) ** 3) # easing out cubic | |
else: | |
scale = 1.0 | |
return scale | |
resize_effect = Resize(pop_effect) | |
clip_pop = resize_effect.apply(clip_fadein) # ✅ Utilisation correcte | |
return clip_pop | |
def add_subtitles_to_video(video_path, subtitles, output_file="./assets/output/video_with_subs.mp4"): | |
""" | |
Insère les sous-titres animés/couleur dans la vidéo, | |
recadre en 1080x1920 si besoin et exporte le résultat. | |
""" | |
print("🎬 Insertion des sous-titres optimisés SHORTS...") | |
video = VideoFileClip(video_path) | |
# Force le format vertical 1080×1920 si non conforme | |
if (video.w, video.h) != (1080, 1920): | |
print("📐 Recadrage vidéo en 1080×1920...") | |
video = video.resize((1080, 1920)) | |
clips = [video] | |
for sub in subtitles: | |
start_time = sub['start'] | |
end_time = sub['end'] | |
text_chunk = sub['text'] | |
animated_sub_clip = create_animated_subtitle_clip( | |
text_chunk, start_time, end_time, video_w=video.w, video_h=video.h | |
) | |
clips.append(animated_sub_clip) | |
final = CompositeVideoClip(clips, size=(1080, 1920)).with_duration(video.duration) | |
# Export en MP4 H.264 + AAC, 30 fps | |
final.write_videofile( | |
output_file, | |
codec="libx264", | |
audio_codec="aac", | |
fps=30, | |
threads=4, | |
preset="medium", | |
ffmpeg_params=["-pix_fmt", "yuv420p"] | |
) | |
print(f"✅ Vidéo Shorts/TikTok prête : {output_file}") | |