Spaces:

malvin-ai
/

light-ai-video-generator

Running on Zero

light-ai-video-generator / scripts /generate_subtitles.py

malvin noel

corrected font

4098c08 3 months ago

6.98 kB

	#generate_subtitles.py

	import random
	import os
	import torch
	from moviepy import (
	VideoFileClip,
	TextClip,
	CompositeVideoClip,
	ImageClip,
	vfx
	)
	from moviepy.video.fx import FadeIn, Resize
	import spaces



	FONT_PATH = "DejaVuSans-Bold"



	# Palette de couleurs « flashy »
	SUBTITLE_COLORS = [
	"white", "yellow", "cyan", "deeppink", "gold", "lightgreen", "magenta", "orange"
	]




	def color_for_word(word: str) -> str:
	return random.choice(SUBTITLE_COLORS)





	def chunk_text_by_words(segments, max_words=1):
	"""
	Découpe chaque segment Whisper en mini sous-titres de max_words mots
	pour un affichage plus dynamique.
	"""
	print("✂️ Découpage en sous-titres dynamiques (4 mots max)...")
	subs = []
	for seg in segments:
	words = seg['text'].strip().split()
	seg_duration = seg['end'] - seg['start']
	if not words or seg_duration <= 0:
	continue

	word_duration = seg_duration / len(words)

	for i in range(0, len(words), max_words):
	chunk_words = words[i:i + max_words]
	chunk_text = " ".join(chunk_words)
	start_time = seg['start'] + i * word_duration
	end_time = start_time + len(chunk_words) * word_duration

	subs.append({
	"start": start_time,
	"end": end_time,
	"text": chunk_text
	})

	print(f"🧩 {len(subs)} sous-titres créés (dynamiques).")
	return subs


	def save_subtitles_to_srt(subtitles, output_path):
	"""
	Sauvegarde les sous-titres au format .srt
	"""
	def format_timestamp(seconds):
	h = int(seconds // 3600)
	m = int((seconds % 3600) // 60)
	s = int(seconds % 60)
	ms = int((seconds - int(seconds)) * 1000)
	return f"{h:02}:{m:02}:{s:02},{ms:03}"

	with open(output_path, "w", encoding="utf-8") as f:
	for i, sub in enumerate(subtitles, 1):
	f.write(f"{i}\n")
	f.write(f"{format_timestamp(sub['start'])} --> {format_timestamp(sub['end'])}\n")
	f.write(f"{sub['text'].strip()}\n\n")

	def transcribe_audio_to_subs(audio_path):
	"""
	Transcrit le fichier audio en texte (via Whisper), retourne la liste
	des segments start/end/text, et sauvegarde en .srt.
	"""
	print("🎙️ Transcription avec Whisper...")

	# Empêche Torch de détecter CUDA
	import os
	os.environ["CUDA_VISIBLE_DEVICES"] = ""

	import whisper
	model = whisper.load_model("medium", device="cpu")
	result = model.transcribe(audio_path)

	subtitles = [{
	"start": seg['start'],
	"end": seg['end'],
	"text": seg['text']
	} for seg in result['segments']]

	print(f"📝 {len(subtitles)} sous-titres générés.")

	# Sauvegarde .srt
	base_name = os.path.splitext(audio_path)[0]
	srt_path = f"{base_name}.srt"
	save_subtitles_to_srt(subtitles, srt_path)
	print(f"💾 Sous-titres enregistrés dans : {srt_path}")

	return subtitles

	def format_subtitle_text(text, max_chars=50):
	"""
	Coupe le texte en 2 lignes max (~50 caractères max par ligne)
	pour mieux remplir la vidéo verticale sans déborder.
	"""
	words = text.strip().split()
	lines = []
	current_line = ""

	for word in words:
	if len(current_line + " " + word) <= max_chars:
	current_line += (" " + word if current_line else word)
	else:
	lines.append(current_line.strip())
	current_line = word
	# Ajout de la dernière ligne
	lines.append(current_line.strip())

	# Retourne uniquement 2 lignes max
	return "\n".join(lines[:2])


	def create_animated_subtitle_clip(text, start, end, video_w, video_h):
	"""
	Crée un TextClip avec :
	- Couleur aléatoire
	- Fade-in / pop (resize progressif)
	- Position verticale fixe (ajustable) ou légèrement aléatoire
	"""
	word = text.strip()
	color = color_for_word(word)


	# Mise en forme du texte

	# Création du clip texte de base
	txt_clip = TextClip(
	text=text,
	font=FONT_PATH,
	font_size=100,
	color=color,
	stroke_color="black",
	stroke_width=6,
	method="caption",
	size=(int(video_w * 0.8), None), # 80% de la largeur, hauteur auto
	text_align="center", # alignement dans la box
	horizontal_align="center", # box centrée horizontalement
	vertical_align="center", # box centrée verticalement
	interline=4,
	transparent=True
	)


	y_choices = [int(video_h * 0.45), int(video_h * 0.55), int(video_h * 0.6)]
	base_y = random.choice(y_choices)

	txt_clip = txt_clip.with_position(("center", base_y))
	txt_clip = txt_clip.with_start(start).with_end(end)

	# On applique un fadein + un petit effet "pop" qui grandit de 5% sur la durée du chunk
	# 1) fadein de 0.2s
	clip_fadein = FadeIn(duration=0.2).apply(txt_clip)

	# 2) agrandissement progressif (ex: 1.0 → 1.05 sur la durée)
	duration_subtitle = end - start
	def pop_effect(t):
	if duration_subtitle > 0:
	progress = t / duration_subtitle
	scale = 1.0 + 0.07 * (1 - (1 - progress) ** 3) # easing out cubic
	else:
	scale = 1.0
	return scale

	resize_effect = Resize(pop_effect)
	clip_pop = resize_effect.apply(clip_fadein) # ✅ Utilisation correcte



	return clip_pop


	def add_subtitles_to_video(video_path, subtitles, output_file="./assets/output/video_with_subs.mp4"):
	"""
	Insère les sous-titres animés/couleur dans la vidéo,
	recadre en 1080x1920 si besoin et exporte le résultat.
	"""
	print("🎬 Insertion des sous-titres optimisés SHORTS...")

	video = VideoFileClip(video_path)

	# Force le format vertical 1080×1920 si non conforme
	if (video.w, video.h) != (1080, 1920):
	print("📐 Recadrage vidéo en 1080×1920...")
	video = video.resize((1080, 1920))

	clips = [video]

	for sub in subtitles:
	start_time = sub['start']
	end_time = sub['end']
	text_chunk = sub['text']

	animated_sub_clip = create_animated_subtitle_clip(
	text_chunk, start_time, end_time, video_w=video.w, video_h=video.h
	)
	clips.append(animated_sub_clip)


	final = CompositeVideoClip(clips, size=(1080, 1920)).with_duration(video.duration)

	# Export en MP4 H.264 + AAC, 30 fps
	final.write_videofile(
	output_file,
	codec="libx264",
	audio_codec="aac",
	fps=30,
	threads=4,
	preset="medium",
	ffmpeg_params=["-pix_fmt", "yuv420p"]
	)

	print(f"✅ Vidéo Shorts/TikTok prête : {output_file}")