Spaces:

malvin-ai
/

light-ai-video-generator

Running on Zero

malvin noel

change script

d269828 about 2 months ago

15.5 kB

	#app.py
	from __future__ import annotations
	import gradio as gr
	import os
	import shutil
	import datetime
	from typing import List, Optional

	# ──────────────────────────────────────────────────────────────────────────────
	# Import project‑specific helpers — unchanged from initial version
	# ──────────────────────────────────────────────────────────────────────────────
	from scripts.generate_scripts import generate_script, generate_title, generate_description
	from scripts.generate_voice import generate_voice
	from scripts.get_footage import get_video_montage_from_folder
	from scripts.edit_video import edit_video
	from scripts.generate_subtitles import (
	transcribe_audio_to_subs,
	chunk_text_by_words,
	add_subtitles_to_video,
	)

	import torch
	from transformers import (
	AutoTokenizer,
	AutoModelForCausalLM,
	BitsAndBytesConfig,
	)

	# ──────────────────────────────────────────────────────────────────────────────
	# Constants & utilities
	# ──────────────────────────────────────────────────────────────────────────────
	WORDS_PER_SECOND = 2.3 # ≃ 140 wpm
	ASSETS_DIRS = (
	"./assets/audio",
	"./assets/backgrounds",
	"./assets/output",
	"./assets/video_music",
	)


	# ────────────────────────────────────────────────────────
	# CONFIGURATION
	# ────────────────────────────────────────────────────────
	MODEL_ID = os.getenv("MODEL_ID", "Qwen/Qwen3-4B")
	DTYPE = torch.float16 # or torch.float16

	print(f"🔄 Loading {MODEL_ID} (dtype = {DTYPE}) …")

	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=DTYPE,
	trust_remote_code=True,
	)
	model.to("cuda" if torch.cuda.is_available() else "cpu") # single-device move

	DEVICE = next(model.parameters()).device
	print(f"✅ Model ready on {DEVICE}.")

	for d in ASSETS_DIRS:
	os.makedirs(d, exist_ok=True)

	def safe_copy(src: str, dst: str) -> str:
	"""Copy src → dst unless they are the same file, returns destination path."""
	if os.path.abspath(src) == os.path.abspath(dst):
	return src
	shutil.copy(src, dst)
	return dst

	# Wrapper util to timestamp generated files so different runs don't overwrite each other

	def timestamped_filename(prefix: str, ext: str) -> str:
	ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
	return os.path.join("./assets/output", f"{prefix}_{ts}.{ext}")

	# ──────────────────────────────────────────────────────────────────────────────
	# Independent functional endpoints (Gradio callbacks)
	# ──────────────────────────────────────────────────────────────────────────────

	def cb_generate_script(
	context: str,
	instruction: str,
	target_duration: int,
	script_mode: str,
	custom_script: Optional[str],
	):
	"""Generate (or accept) a script + title/description."""

	approx_words = int(target_duration * WORDS_PER_SECOND)

	if script_mode == "Use my script":
	if not custom_script or not custom_script.strip():
	raise gr.Error("❌ You selected 'Use my script' but the script field is empty!")
	script = custom_script.strip()
	else:
	prompt = (
	f"You are a video creation expert. Here is the context: {context.strip()}\n"
	f"Instruction: {instruction.strip()}\n"
	f"🔴 Strict target duration: {target_duration}s — ≈ {approx_words} words (must be respected)."
	)
	script = generate_script(model,tokenizer,prompt)

	title = generate_title(model,tokenizer,script)
	description = generate_description(model,tokenizer,script)
	return script, title, description, script # last return for state update


	def cb_generate_voice(script: str):
	"""TTS generation from a given script, returns path to MP3."""
	if not script or not script.strip():
	raise gr.Error("❌ Script text is empty – generate or paste a script first.")

	voice_path = timestamped_filename("voice", "mp3")
	generate_voice(script, voice_path)
	return voice_path, voice_path # second value updates state


	def accumulate_files(new: List[str], state: List[str] \| None):
	"""Append only new valid MP4 files to state."""
	state = state or []
	for f in new or []:
	if (
	isinstance(f, str)
	and os.path.isfile(f)
	and f.lower().endswith(".mp4")
	and f not in state
	):
	state.append(f)
	return state


	def cb_create_montage(
	accumulated_videos: List[str],
	voice_path: str,
	lum: float,
	contrast: float,
	gamma: float,
	show_bar: bool,
	):
	"""Create the background‑video montage synced to the narration audio."""

	if not accumulated_videos:
	raise gr.Error("❌ Please upload at least one background video (.mp4) before generating the montage.")
	if not voice_path or not os.path.isfile(voice_path):
	raise gr.Error("❌ A narration audio file (.mp3) is required – generate or upload one first.")

	# Clean previous backgrounds, then copy new ones
	for f in os.listdir("./assets/backgrounds"):
	if f.lower().endswith(".mp4"):
	os.remove(os.path.join("./assets/backgrounds", f))
	for idx, v in enumerate(accumulated_videos):
	safe_copy(v, os.path.join("./assets/backgrounds", f"video_{idx:03d}.mp4"))

	montage_path = timestamped_filename("montage", "mp4")
	_ = get_video_montage_from_folder(
	folder_path="./assets/backgrounds",
	audio_path=voice_path,
	output_dir="./assets/video_music",
	lum=lum,
	contrast=contrast,
	gamma=gamma,
	show_progress_bar=show_bar,
	)
	# get_video_montage_from_folder already saves the file – we just need its path
	# It returns the path, so capture it
	montage_path = _
	return montage_path, montage_path


	def cb_mix_audio(
	montage_path: str,
	voice_path: str,
	music_file: Optional[str] = None,
	):
	"""Combine montage video, voice audio, and optional background music."""
	if not montage_path or not os.path.isfile(montage_path):
	raise gr.Error("❌ Please generate a montage video first.")
	if not voice_path or not os.path.isfile(voice_path):
	raise gr.Error("❌ Narration audio missing – generate or upload it.")

	music_path = music_file if music_file and os.path.isfile(music_file) else None
	final_no_subs = timestamped_filename("final_no_subs", "mp4")
	edit_video(montage_path, voice_path, music_path, final_no_subs)
	return final_no_subs, final_no_subs


	def cb_add_subtitles(final_no_subs: str, voice_path: str):
	"""Overlay dynamic subtitles on the mixed video."""
	if not final_no_subs or not os.path.isfile(final_no_subs):
	raise gr.Error("❌ Mixed video not found – run the 'Mix Audio/Video' step first.")
	if not voice_path or not os.path.isfile(voice_path):
	raise gr.Error("❌ Narration audio missing.")

	segments = transcribe_audio_to_subs(voice_path)
	subs = chunk_text_by_words(segments, max_words=3)
	final_with_subs = timestamped_filename("final_with_subs", "mp4")
	add_subtitles_to_video(final_no_subs, subs, final_with_subs)
	return final_with_subs

	# ──────────────────────────────────────────────────────────────────────────────
	# Gradio UI – one tab per function
	# ──────────────────────────────────────────────────────────────────────────────

	demo = gr.Blocks(theme="gradio/soft")

	with demo:
	gr.Markdown("# 🎬 Modular AI Video Toolkit")
	gr.Markdown(
	"Each tab exposes one single processing step so you can mix & match them as you like. 💡"
	)

	# Shared state across tabs
	script_state = gr.State("")
	voice_state = gr.State("")
	montage_state = gr.State("")
	final_no_subs_state = gr.State("")

	# ───────────────────────── Script generation ─────────────────────────
	with gr.Tab("1️⃣ Generate Script"):
	with gr.Row():
	context_in = gr.Textbox(label="🧠 Context", lines=4)
	instruction_in = gr.Textbox(label="🎯 Instruction", lines=4)
	duration_slider = gr.Slider(5, 120, 1, 60, label="⏱️ Target duration (s)")
	script_mode = gr.Radio([
	"Generate script with AI",
	"Use my script",
	], value="Generate script with AI", label="Script mode")
	custom_script_in = gr.Textbox(label="✍️ My script", lines=8, interactive=False)

	def _toggle(mode):
	return gr.update(interactive=(mode == "Use my script"))

	script_mode.change(_toggle, inputs=script_mode, outputs=custom_script_in)

	gen_script_btn = gr.Button("📝 Create Script", variant="primary")
	script_out = gr.Textbox(label="Script", lines=8, interactive=False)
	title_out = gr.Textbox(label="Title", lines=1, interactive=False)
	desc_out = gr.Textbox(label="Description", lines=3, interactive=False)

	gen_script_btn.click(
	cb_generate_script,
	[context_in, instruction_in, duration_slider, script_mode, custom_script_in],
	[script_out, title_out, desc_out, script_state],
	)

	# ───────────────────────── Voice generation ─────────────────────────
	with gr.Tab("2️⃣ Generate Voice"):
	script_in_voice = gr.Textbox(label="Script (paste or use from previous step)", lines=8)
	gen_voice_btn = gr.Button("🔈 Synthesize Voice", variant="primary")
	voice_audio = gr.Audio(label="Generated voice", interactive=False)

	gen_voice_btn.click(
	cb_generate_voice,
	inputs=[script_in_voice],
	outputs=[voice_audio, voice_state],
	)
	# Auto‑populate script textbox with state when it updates
	script_state.change(lambda s: s, script_state, script_in_voice, queue=False)

	# ───────────────────────── Montage creation ─────────────────────────
	with gr.Tab("3️⃣ Create Montage"):
	videos_dropzone = gr.Files(label="🎞️ Background videos (MP4)", file_types=[".mp4"], type="filepath")
	videos_state = gr.State([])
	videos_dropzone.upload(accumulate_files, [videos_dropzone, videos_state], videos_state, queue=False)
	videos_display = gr.Textbox(label="Selected videos", interactive=False)
	videos_state.change(lambda s: "\n".join(os.path.basename(f) for f in s), videos_state, videos_display, queue=False)

	with gr.Accordion("🎨 Visual settings", open=False):
	lum_slider = gr.Slider(0, 20, 6, step=0.5, label="Brightness (0–20)")
	contrast_slider = gr.Slider(0.5, 2.0, 1.0, step=0.05, label="Contrast (0.5–2.0)")
	gamma_slider = gr.Slider(0.5, 2.0, 1.0, step=0.05, label="Gamma (0.5–2.0)")
	show_bar = gr.Checkbox(label="Show progress bar", value=True)

	create_montage_btn = gr.Button("🎞️ Build Montage", variant="primary")
	montage_video = gr.Video(label="Montage Preview")

	create_montage_btn.click(
	cb_create_montage,
	[videos_state, voice_state, lum_slider, contrast_slider, gamma_slider, show_bar],
	[montage_video, montage_state],
	)

	# ───────────────────────── Mixing (voice + music) ─────────────────────────
	with gr.Tab("4️⃣ Mix Audio / Video"):
	voice_in = gr.File(label="Narration MP3 (optional – leave empty to use state)", file_types=[".mp3"], type="filepath")
	montage_in = gr.File(label="Montage MP4 (optional – leave empty to use state)", file_types=[".mp4"], type="filepath")
	music_in = gr.File(label="Background music (MP3 – optional)", file_types=[".mp3"], type="filepath")

	def _use_state(file, state):
	return file if file else state

	mix_btn = gr.Button("🎚️ Mix", variant="primary")
	final_no_subs_vid = gr.Video(label="Mixed video (no subtitles)")

	mix_btn.click(
	lambda montage, voice, music, montage_state_val, voice_state_val: cb_mix_audio(
	_use_state(montage, montage_state_val),
	_use_state(voice, voice_state_val),
	music,
	),
	[montage_in, voice_in, music_in, montage_state, voice_state],
	[final_no_subs_vid, final_no_subs_state],
	)

	# ───────────────────────── Subtitles ─────────────────────────
	with gr.Tab("5️⃣ Add Subtitles"):
	video_in_sub = gr.File(label="Video MP4 (optional – defaults to last mixed video)", type="filepath", file_types=[".mp4"])
	voice_in_sub = gr.File(label="Narration MP3 (optional – defaults to last generated voice)", type="filepath", file_types=[".mp3"])
	add_subs_btn = gr.Button("🔤 Add Subtitles", variant="primary")
	final_subs_video = gr.Video(label="Final video with subtitles")

	add_subs_btn.click(
	lambda v_in, a_in, v_state, a_state: cb_add_subtitles(
	v_in if v_in else v_state,
	a_in if a_in else a_state,
	),
	[video_in_sub, voice_in_sub, final_no_subs_state, voice_state],
	final_subs_video,
	)

	# Startup
	demo.launch()