initial add app.py
Browse files- .gitignore +12 -0
- app.py +411 -0
- packages.txt +1 -0
- requirements.txt +5 -0
- webui.bat +162 -0
.gitignore
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.vs
|
2 |
+
.vscode
|
3 |
+
# Byte-compiled / optimized / DLL files
|
4 |
+
__pycache__/
|
5 |
+
|
6 |
+
venv/
|
7 |
+
tmp/
|
8 |
+
sf2/
|
9 |
+
models/
|
10 |
+
output/
|
11 |
+
rendered_midi/
|
12 |
+
transcribed_/
|
app.py
ADDED
@@ -0,0 +1,411 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import librosa
|
3 |
+
import numpy as np
|
4 |
+
import re
|
5 |
+
import os
|
6 |
+
import time
|
7 |
+
import struct
|
8 |
+
import subprocess
|
9 |
+
import matplotlib.font_manager as fm
|
10 |
+
from typing import Tuple, List, Dict
|
11 |
+
from mutagen.flac import FLAC
|
12 |
+
from moviepy import CompositeVideoClip, TextClip, VideoClip, AudioFileClip
|
13 |
+
|
14 |
+
# --- Font Scanning and Management ---
|
15 |
+
def get_font_display_name(font_path: str) -> Tuple[str, str]:
|
16 |
+
"""
|
17 |
+
A robust TTF/TTC parser based on the user's final design.
|
18 |
+
It reads the 'name' table to find the localized "Full Font Name" (nameID=4).
|
19 |
+
Returns a tuple of (display_name, language_tag {'zh'/'ja'/'ko'/'en'/'other'}).
|
20 |
+
"""
|
21 |
+
def decode_name_string(name_bytes: bytes, platform_id: int, encoding_id: int) -> str:
|
22 |
+
"""Decodes the name string based on platform and encoding IDs."""
|
23 |
+
try:
|
24 |
+
if platform_id == 3 and encoding_id in [1, 10]: # Windows, Unicode
|
25 |
+
return name_bytes.decode('utf_16_be').strip('\x00')
|
26 |
+
elif platform_id == 1 and encoding_id == 0: # Macintosh, Roman
|
27 |
+
return name_bytes.decode('mac_roman').strip('\x00')
|
28 |
+
elif platform_id == 0: # Unicode
|
29 |
+
return name_bytes.decode('utf_16_be').strip('\x00')
|
30 |
+
else: # Fallback
|
31 |
+
return name_bytes.decode('utf_8', errors='ignore').strip('\x00')
|
32 |
+
except Exception:
|
33 |
+
return None
|
34 |
+
|
35 |
+
try:
|
36 |
+
with open(font_path, 'rb') as f: data = f.read()
|
37 |
+
def read_ushort(offset): return struct.unpack('>H', data[offset:offset+2])[0]
|
38 |
+
def read_ulong(offset): return struct.unpack('>I', data[offset:offset+4])[0]
|
39 |
+
|
40 |
+
font_offsets = [0]
|
41 |
+
# Check for TTC (TrueType Collection) header
|
42 |
+
if data[:4] == b'ttcf':
|
43 |
+
num_fonts = read_ulong(8)
|
44 |
+
font_offsets = [read_ulong(12 + i * 4) for i in range(num_fonts)]
|
45 |
+
|
46 |
+
# For simplicity, we only parse the first font in a TTC
|
47 |
+
font_offset = font_offsets[0]
|
48 |
+
|
49 |
+
num_tables = read_ushort(font_offset + 4)
|
50 |
+
name_table_offset = -1
|
51 |
+
# Locate the 'name' table
|
52 |
+
for i in range(num_tables):
|
53 |
+
entry_offset = font_offset + 12 + i * 16
|
54 |
+
tag = data[entry_offset:entry_offset+4]
|
55 |
+
if tag == b'name':
|
56 |
+
name_table_offset = read_ulong(entry_offset + 8); break
|
57 |
+
|
58 |
+
if name_table_offset == -1: return None, None
|
59 |
+
|
60 |
+
count, string_offset = read_ushort(name_table_offset + 2), read_ushort(name_table_offset + 4)
|
61 |
+
name_candidates = {}
|
62 |
+
# Iterate through all name records
|
63 |
+
for i in range(count):
|
64 |
+
rec_offset = name_table_offset + 6 + i * 12
|
65 |
+
platform_id, encoding_id, language_id, name_id, length, offset = struct.unpack('>HHHHHH', data[rec_offset:rec_offset+12])
|
66 |
+
|
67 |
+
if name_id == 4: # We only care about the "Full Font Name"
|
68 |
+
string_pos = name_table_offset + string_offset + offset
|
69 |
+
value = decode_name_string(data[string_pos : string_pos + length], platform_id, encoding_id)
|
70 |
+
|
71 |
+
if value:
|
72 |
+
# Store candidates based on language ID
|
73 |
+
if language_id in [1028, 2052, 3076, 4100, 5124]: name_candidates["zh"] = value # Chinese
|
74 |
+
elif language_id == 1041: name_candidates["ja"] = value # Japanese
|
75 |
+
elif language_id == 1042: name_candidates["ko"] = value # Korean
|
76 |
+
elif language_id in [1033, 0]: name_candidates["en"] = value # English
|
77 |
+
else:
|
78 |
+
if "other" not in name_candidates: name_candidates["other"] = value
|
79 |
+
|
80 |
+
# Return the best candidate based on language priority
|
81 |
+
if name_candidates.get("zh"): return name_candidates.get("zh"), "zh"
|
82 |
+
if name_candidates.get("ja"): return name_candidates.get("ja"), "ja"
|
83 |
+
if name_candidates.get("ko"): return name_candidates.get("ko"), "ko"
|
84 |
+
if name_candidates.get("other"): return name_candidates.get("other"), "other"
|
85 |
+
if name_candidates.get("en"): return name_candidates.get("en"), "en"
|
86 |
+
return None, None
|
87 |
+
|
88 |
+
except Exception:
|
89 |
+
return None, None
|
90 |
+
|
91 |
+
def get_font_data() -> Tuple[Dict[str, str], List[str]]:
|
92 |
+
"""
|
93 |
+
Scans system fonts, parses their display names, and returns a sorted list
|
94 |
+
with a corresponding name-to-path map.
|
95 |
+
"""
|
96 |
+
font_map = {}
|
97 |
+
found_names = [] # Stores (display_name, is_fallback, lang_tag)
|
98 |
+
|
99 |
+
# Scan for both .ttf and .ttc files
|
100 |
+
ttf_files = fm.findSystemFonts(fontpaths=None, fontext='ttf')
|
101 |
+
ttc_files = fm.findSystemFonts(fontpaths=None, fontext='ttc')
|
102 |
+
all_font_files = list(set(ttf_files + ttc_files))
|
103 |
+
|
104 |
+
for path in all_font_files:
|
105 |
+
display_name, lang_tag = get_font_display_name(path)
|
106 |
+
is_fallback = display_name is None
|
107 |
+
|
108 |
+
if is_fallback:
|
109 |
+
# Create a fallback name from the filename
|
110 |
+
display_name = os.path.splitext(os.path.basename(path))[0].replace('-', ' ').replace('_', ' ').title()
|
111 |
+
lang_tag = 'fallback'
|
112 |
+
|
113 |
+
if display_name and display_name not in font_map:
|
114 |
+
font_map[display_name] = path
|
115 |
+
found_names.append((display_name, is_fallback, lang_tag))
|
116 |
+
|
117 |
+
# Define sort priority for languages
|
118 |
+
sort_order = {'zh': 0, 'ja': 1, 'ko': 2, 'en': 3, 'other': 4, 'fallback': 5}
|
119 |
+
|
120 |
+
# Sort by priority, then alphabetically
|
121 |
+
found_names.sort(key=lambda x: (sort_order.get(x[2], 99), x[0]))
|
122 |
+
|
123 |
+
sorted_display_names = [name for name, _, _ in found_names]
|
124 |
+
return font_map, sorted_display_names
|
125 |
+
|
126 |
+
print("Scanning system fonts and parsing names...")
|
127 |
+
SYSTEM_FONTS_MAP, FONT_DISPLAY_NAMES = get_font_data()
|
128 |
+
print(f"Scan complete. Found {len(FONT_DISPLAY_NAMES)} available fonts.")
|
129 |
+
|
130 |
+
|
131 |
+
# --- CUE Sheet Parsing Logic (Unchanged) ---
|
132 |
+
def cue_time_to_seconds(time_str: str) -> float:
|
133 |
+
try:
|
134 |
+
minutes, seconds, frames = map(int, time_str.split(':'))
|
135 |
+
return minutes * 60 + seconds + frames / 75.0
|
136 |
+
except ValueError:
|
137 |
+
return 0.0
|
138 |
+
|
139 |
+
def parse_cue_sheet_manually(cue_data: str) -> List[Dict[str, any]]:
|
140 |
+
tracks = []
|
141 |
+
current_track_info = None
|
142 |
+
for line in cue_data.splitlines():
|
143 |
+
line = line.strip()
|
144 |
+
if line.upper().startswith('TRACK'):
|
145 |
+
if current_track_info and 'title' in current_track_info and 'start_time' in current_track_info:
|
146 |
+
tracks.append(current_track_info)
|
147 |
+
current_track_info = {}
|
148 |
+
continue
|
149 |
+
if current_track_info is not None:
|
150 |
+
title_match = re.search(r'TITLE\s+"(.*?)"', line, re.IGNORECASE)
|
151 |
+
if title_match:
|
152 |
+
current_track_info['title'] = title_match.group(1)
|
153 |
+
continue
|
154 |
+
index_match = re.search(r'INDEX\s+01\s+(\d{2}:\d{2}:\d{2})', line, re.IGNORECASE)
|
155 |
+
if index_match:
|
156 |
+
current_track_info['start_time'] = cue_time_to_seconds(index_match.group(1))
|
157 |
+
continue
|
158 |
+
if current_track_info and 'title' in current_track_info and 'start_time' in current_track_info:
|
159 |
+
tracks.append(current_track_info)
|
160 |
+
return tracks
|
161 |
+
|
162 |
+
|
163 |
+
# --- Add a function to increase framerate using FFmpeg ---
|
164 |
+
def increase_video_framerate(input_path: str, output_path: str, target_fps: int = 24):
|
165 |
+
"""
|
166 |
+
Uses FFmpeg to increase the video's framerate without re-encoding.
|
167 |
+
This is extremely fast as it only copies streams and changes metadata.
|
168 |
+
|
169 |
+
Args:
|
170 |
+
input_path (str): Path to the low-framerate video file.
|
171 |
+
output_path (str): Path for the final, high-framerate video file.
|
172 |
+
target_fps (int): The desired output framerate.
|
173 |
+
"""
|
174 |
+
print(f"Increasing framerate of '{input_path}' to {target_fps} FPS...")
|
175 |
+
|
176 |
+
# Construct the FFmpeg command based on the user's specification
|
177 |
+
command = [
|
178 |
+
'ffmpeg',
|
179 |
+
'-y', # Overwrite output file if exists
|
180 |
+
'-i', input_path,
|
181 |
+
'-map', '0', # Map all streams (video, audio, subtitles)
|
182 |
+
'-vf', 'fps=24', # Use fps filter to convert framerate to 24
|
183 |
+
'-c:v', 'libx264', # Re-encode video with H.264 codec
|
184 |
+
'-preset', 'fast', # Encoding speed/quality tradeoff
|
185 |
+
'-crf', '18', # Quality (lower is better)
|
186 |
+
'-c:a', 'copy', # Copy audio without re-encoding
|
187 |
+
output_path
|
188 |
+
]
|
189 |
+
|
190 |
+
try:
|
191 |
+
# Execute the command
|
192 |
+
# Using capture_output to hide ffmpeg logs from the main console unless an error occurs
|
193 |
+
result = subprocess.run(command, check=True, capture_output=True, text=True)
|
194 |
+
print("Framerate increase successful.")
|
195 |
+
except FileNotFoundError:
|
196 |
+
# This error occurs if FFmpeg is not installed or not in the system's PATH
|
197 |
+
raise gr.Error("FFmpeg not found. Please ensure FFmpeg is installed and accessible in your system's PATH.")
|
198 |
+
except subprocess.CalledProcessError as e:
|
199 |
+
# This error occurs if FFmpeg returns a non-zero exit code
|
200 |
+
print("FFmpeg error output:\n", e.stderr)
|
201 |
+
raise gr.Error(f"FFmpeg failed to increase the framerate. See console for details. Error: {e.stderr}")
|
202 |
+
|
203 |
+
|
204 |
+
# --- Main Processing Function ---
|
205 |
+
def process_audio_to_video(
|
206 |
+
audio_path: str, spec_fg_color: str, spec_bg_color: str,
|
207 |
+
font_name: str, font_size: int, font_color: str,
|
208 |
+
font_bg_color: str, font_bg_alpha: float,
|
209 |
+
pos_h: str, pos_v: str
|
210 |
+
) -> str:
|
211 |
+
if not audio_path: raise gr.Error("Please upload an audio file first.")
|
212 |
+
if not font_name: raise gr.Error("Please select a font from the list.")
|
213 |
+
|
214 |
+
# Define paths for temporary and final files
|
215 |
+
timestamp = int(time.time())
|
216 |
+
temp_fps1_path = f"temp_{timestamp}_fps1.mp4"
|
217 |
+
final_output_path = f"final_video_{timestamp}_fps24.mp4"
|
218 |
+
|
219 |
+
WIDTH, HEIGHT, RENDER_FPS = 1280, 720, 1 # Render at 1 FPS
|
220 |
+
PLAYBACK_FPS = 24 # Final playback framerate
|
221 |
+
|
222 |
+
# --- A robust color parser for hex and rgb() strings ---
|
223 |
+
def parse_color_to_rgb(color_str: str) -> Tuple[int, int, int]:
|
224 |
+
"""
|
225 |
+
Parses a color string which can be in hex format (#RRGGBB) or
|
226 |
+
rgb format (e.g., "rgb(255, 128, 0)").
|
227 |
+
Returns a tuple of (R, G, B).
|
228 |
+
"""
|
229 |
+
color_str = color_str.strip()
|
230 |
+
if color_str.startswith('#'):
|
231 |
+
# Handle hex format
|
232 |
+
hex_val = color_str.lstrip('#')
|
233 |
+
if len(hex_val) == 3: # Handle shorthand hex like #FFF
|
234 |
+
hex_val = "".join([c*2 for c in hex_val])
|
235 |
+
return tuple(int(hex_val[i:i+2], 16) for i in (0, 2, 4))
|
236 |
+
elif color_str.startswith('rgb'):
|
237 |
+
# Handle rgb format
|
238 |
+
try:
|
239 |
+
numbers = re.findall(r'\d+', color_str)
|
240 |
+
return tuple(int(n) for n in numbers[:3])
|
241 |
+
except (ValueError, IndexError):
|
242 |
+
raise ValueError(f"Could not parse rgb color string: {color_str}")
|
243 |
+
else:
|
244 |
+
raise ValueError(f"Unknown color format: {color_str}")
|
245 |
+
|
246 |
+
# Use the new robust parser for all color inputs
|
247 |
+
fg_rgb, bg_rgb = parse_color_to_rgb(spec_fg_color), parse_color_to_rgb(spec_bg_color)
|
248 |
+
grid_rgb = tuple(min(c + 40, 255) for c in bg_rgb)
|
249 |
+
|
250 |
+
# Wrap the entire process in a try...finally block to ensure cleanup
|
251 |
+
try:
|
252 |
+
y, sr = librosa.load(audio_path, sr=None, mono=True)
|
253 |
+
duration = librosa.get_duration(y=y, sr=sr)
|
254 |
+
|
255 |
+
# Spectrogram calculation
|
256 |
+
N_FFT, HOP_LENGTH, N_BANDS = 2048, 512, 32
|
257 |
+
MIN_DB, MAX_DB = -80.0, 0.0
|
258 |
+
S_mel = librosa.feature.melspectrogram(y=y, sr=sr, n_fft=N_FFT, hop_length=HOP_LENGTH, n_mels=N_BANDS, fmax=sr/2)
|
259 |
+
S_mel_db = librosa.power_to_db(S_mel, ref=np.max)
|
260 |
+
|
261 |
+
# Frame generation logic
|
262 |
+
def frame_generator(t):
|
263 |
+
frame = np.full((HEIGHT, WIDTH, 3), bg_rgb, dtype=np.uint8)
|
264 |
+
for i in range(1, 9):
|
265 |
+
y_pos = int(i * (HEIGHT / 9)); frame[y_pos-1:y_pos, :] = grid_rgb
|
266 |
+
time_idx = int((t / duration) * (S_mel_db.shape[1] - 1))
|
267 |
+
bar_width = WIDTH / N_BANDS
|
268 |
+
for i in range(N_BANDS):
|
269 |
+
energy_db = S_mel_db[i, time_idx]
|
270 |
+
norm_height = np.clip((energy_db - MIN_DB) / (MAX_DB - MIN_DB), 0, 1)
|
271 |
+
bar_height = int(norm_height * HEIGHT)
|
272 |
+
if bar_height < 1: continue
|
273 |
+
x_start, x_end = int(i * bar_width), int((i + 1) * bar_width - 2)
|
274 |
+
y_start = HEIGHT - bar_height
|
275 |
+
for k in range(bar_height):
|
276 |
+
y_pos, ratio = y_start + k, k / bar_height
|
277 |
+
r, g, b = (int(c1 * (1-ratio) + c2 * ratio) for c1, c2 in zip(fg_rgb, bg_rgb))
|
278 |
+
frame[y_pos, x_start:x_end] = (r, g, b)
|
279 |
+
return frame
|
280 |
+
|
281 |
+
video_clip = VideoClip(frame_function=frame_generator, duration=duration)
|
282 |
+
audio_clip = AudioFileClip(audio_path)
|
283 |
+
|
284 |
+
# CUE Sheet title overlay logic
|
285 |
+
text_clips = []
|
286 |
+
tracks = []
|
287 |
+
if audio_path.lower().endswith('.flac'):
|
288 |
+
try:
|
289 |
+
audio = FLAC(audio_path); tracks = parse_cue_sheet_manually(audio.tags['cuesheet'][0])
|
290 |
+
print(f"Successfully parsed {len(tracks)} tracks from CUE sheet...")
|
291 |
+
except Exception as e:
|
292 |
+
print(f"Warning: Could not read or parse CUE sheet: {e}")
|
293 |
+
|
294 |
+
if tracks:
|
295 |
+
font_path = SYSTEM_FONTS_MAP.get(font_name)
|
296 |
+
if not font_path: raise gr.Error(f"Font path for '{font_name}' not found!")
|
297 |
+
|
298 |
+
# Use the robust parser for text colors as well
|
299 |
+
font_bg_rgb = parse_color_to_rgb(font_bg_color)
|
300 |
+
font_bg_rgba = (*font_bg_rgb, int(font_bg_alpha * 255))
|
301 |
+
|
302 |
+
position = (pos_h.lower(), pos_v.lower())
|
303 |
+
|
304 |
+
print(f"Using font: {font_name}, Size: {font_size}, Position: {position}")
|
305 |
+
|
306 |
+
for i, track in enumerate(tracks):
|
307 |
+
start_time = track.get('start_time', 0)
|
308 |
+
title, end_time = track.get('title', 'Unknown Track'), tracks[i+1].get('start_time', duration) if i + 1 < len(tracks) else duration
|
309 |
+
text_duration = end_time - start_time
|
310 |
+
if text_duration <= 0: continue
|
311 |
+
|
312 |
+
# Note: TextClip's `color` argument can handle color names like 'white' directly
|
313 |
+
txt_clip = (TextClip(text=f"{i+1}. {title}", font_size=font_size, color=font_color, font=font_path, bg_color=font_bg_rgba)
|
314 |
+
.with_position(position)
|
315 |
+
.with_duration(text_duration)
|
316 |
+
.with_start(start_time))
|
317 |
+
text_clips.append(txt_clip)
|
318 |
+
|
319 |
+
final_clip = CompositeVideoClip([video_clip] + text_clips).with_audio(audio_clip)
|
320 |
+
|
321 |
+
# Step 1: Render the slow, 1 FPS intermediate file
|
322 |
+
print(f"Step 1/2: Rendering base video at {RENDER_FPS} FPS...")
|
323 |
+
try:
|
324 |
+
# Attempt to copy audio stream directly
|
325 |
+
print("Attempting to copy audio stream directly...")
|
326 |
+
final_clip.write_videofile(
|
327 |
+
temp_fps1_path, codec="libx264", audio_codec="copy", fps=RENDER_FPS,
|
328 |
+
logger='bar', threads=os.cpu_count(), preset='ultrafast'
|
329 |
+
)
|
330 |
+
print("Audio stream successfully copied!")
|
331 |
+
except Exception:
|
332 |
+
# Fallback to AAC encoding if copy fails
|
333 |
+
print("Direct audio copy failed, falling back to AAC encoding...")
|
334 |
+
final_clip.write_videofile(
|
335 |
+
temp_fps1_path, codec="libx264", audio_codec="aac", fps=RENDER_FPS,
|
336 |
+
logger='bar', threads=os.cpu_count(), preset='ultrafast'
|
337 |
+
)
|
338 |
+
print("AAC audio encoding complete.")
|
339 |
+
|
340 |
+
final_clip.close()
|
341 |
+
|
342 |
+
# Step 2: Use FFmpeg to quickly increase the framerate to 24 FPS
|
343 |
+
print(f"\nStep 2/2: Remuxing video to {PLAYBACK_FPS} FPS...")
|
344 |
+
increase_video_framerate(temp_fps1_path, final_output_path, target_fps=PLAYBACK_FPS)
|
345 |
+
|
346 |
+
return final_output_path
|
347 |
+
|
348 |
+
except Exception as e:
|
349 |
+
# Re-raise the exception to be caught and displayed by Gradio
|
350 |
+
raise e
|
351 |
+
finally:
|
352 |
+
# Step 3: Clean up the temporary file regardless of success or failure
|
353 |
+
if os.path.exists(temp_fps1_path):
|
354 |
+
print(f"Cleaning up temporary file: {temp_fps1_path}")
|
355 |
+
os.remove(temp_fps1_path)
|
356 |
+
|
357 |
+
# --- Gradio UI ---
|
358 |
+
with gr.Blocks(title="Spectrogram Video Generator") as iface:
|
359 |
+
gr.Markdown("# Spectrogram Video Generator")
|
360 |
+
with gr.Row():
|
361 |
+
with gr.Column(scale=1):
|
362 |
+
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
|
363 |
+
|
364 |
+
with gr.Accordion("Visualizer Options", open=True):
|
365 |
+
fg_color = gr.ColorPicker(value="#71808c", label="Spectrogram Bar Top Color")
|
366 |
+
bg_color = gr.ColorPicker(value="#2C3E50", label="Background Color")
|
367 |
+
|
368 |
+
with gr.Accordion("Text Overlay Options", open=True):
|
369 |
+
|
370 |
+
# --- CORE CORRECTION: Add clarification text ---
|
371 |
+
gr.Markdown(
|
372 |
+
"**Note:** These options only take effect if the input audio file has an embedded CUE sheet."
|
373 |
+
)
|
374 |
+
gr.Markdown("---") # Add a separator line
|
375 |
+
# --- CORRECTION END ---
|
376 |
+
|
377 |
+
gr.Markdown("If your CUE sheet contains non-English characters, please select a compatible font.")
|
378 |
+
default_font = "Microsoft JhengHei" if "Microsoft JhengHei" in FONT_DISPLAY_NAMES else ("Arial" if "Arial" in FONT_DISPLAY_NAMES else (FONT_DISPLAY_NAMES[0] if FONT_DISPLAY_NAMES else None))
|
379 |
+
font_name_dd = gr.Dropdown(choices=FONT_DISPLAY_NAMES, value=default_font, label="Font Family")
|
380 |
+
|
381 |
+
with gr.Row():
|
382 |
+
font_size_slider = gr.Slider(minimum=12, maximum=128, value=40, step=1, label="Font Size")
|
383 |
+
font_color_picker = gr.ColorPicker(value="#FFFFFF", label="Font Color")
|
384 |
+
|
385 |
+
with gr.Row():
|
386 |
+
font_bg_color_picker = gr.ColorPicker(value="#000000", label="Text BG Color")
|
387 |
+
font_bg_alpha_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.6, step=0.05, label="Text BG Opacity")
|
388 |
+
|
389 |
+
gr.Markdown("Text Position")
|
390 |
+
with gr.Row():
|
391 |
+
pos_h_radio = gr.Radio(["left", "center", "right"], value="center", label="Horizontal Align")
|
392 |
+
pos_v_radio = gr.Radio(["top", "center", "bottom"], value="bottom", label="Vertical Align")
|
393 |
+
|
394 |
+
submit_btn = gr.Button("Generate Video", variant="primary")
|
395 |
+
|
396 |
+
with gr.Column(scale=2):
|
397 |
+
video_output = gr.Video(label="Generated Video")
|
398 |
+
|
399 |
+
submit_btn.click(
|
400 |
+
fn=process_audio_to_video,
|
401 |
+
inputs=[
|
402 |
+
audio_input, fg_color, bg_color,
|
403 |
+
font_name_dd, font_size_slider, font_color_picker,
|
404 |
+
font_bg_color_picker, font_bg_alpha_slider,
|
405 |
+
pos_h_radio, pos_v_radio
|
406 |
+
],
|
407 |
+
outputs=video_output
|
408 |
+
)
|
409 |
+
|
410 |
+
if __name__ == "__main__":
|
411 |
+
iface.launch(inbrowser=True)
|
packages.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
ffmpeg
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
gradio
|
2 |
+
moviepy
|
3 |
+
mutagen
|
4 |
+
librosa
|
5 |
+
matplotlib
|
webui.bat
ADDED
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
@echo off
|
2 |
+
|
3 |
+
:: The original source of the webui.bat file is stable-diffusion-webui
|
4 |
+
:: Modified and enhanced by Gemini with features for venv management and requirements handling.
|
5 |
+
|
6 |
+
:: --------- Configuration ---------
|
7 |
+
set COMMANDLINE_ARGS=
|
8 |
+
:: Define the name of the Launch application
|
9 |
+
set APPLICATION_NAME=app.py
|
10 |
+
:: Define the name of the virtual environment directory
|
11 |
+
set VENV_NAME=venv
|
12 |
+
:: Set to 1 to always attempt to update packages from requirements.txt on every launch
|
13 |
+
set ALWAYS_UPDATE_REQS=0
|
14 |
+
:: ---------------------------------
|
15 |
+
|
16 |
+
|
17 |
+
:: Set PYTHON executable if not already defined
|
18 |
+
if not defined PYTHON (set PYTHON=python)
|
19 |
+
:: Set VENV_DIR using VENV_NAME if not already defined
|
20 |
+
if not defined VENV_DIR (set "VENV_DIR=%~dp0%VENV_NAME%")
|
21 |
+
|
22 |
+
mkdir tmp 2>NUL
|
23 |
+
|
24 |
+
:: Check if Python is callable
|
25 |
+
%PYTHON% -c "" >tmp/stdout.txt 2>tmp/stderr.txt
|
26 |
+
if %ERRORLEVEL% == 0 goto :check_pip
|
27 |
+
echo Couldn't launch python
|
28 |
+
goto :show_stdout_stderr
|
29 |
+
|
30 |
+
:check_pip
|
31 |
+
:: Check if pip is available
|
32 |
+
%PYTHON% -mpip --help >tmp/stdout.txt 2>tmp/stderr.txt
|
33 |
+
if %ERRORLEVEL% == 0 goto :start_venv
|
34 |
+
:: If pip is not available and PIP_INSTALLER_LOCATION is set, try to install pip
|
35 |
+
if "%PIP_INSTALLER_LOCATION%" == "" goto :show_stdout_stderr
|
36 |
+
%PYTHON% "%PIP_INSTALLER_LOCATION%" >tmp/stdout.txt 2>tmp/stderr.txt
|
37 |
+
if %ERRORLEVEL% == 0 goto :start_venv
|
38 |
+
echo Couldn't install pip
|
39 |
+
goto :show_stdout_stderr
|
40 |
+
|
41 |
+
:start_venv
|
42 |
+
:: Skip venv creation/activation if VENV_DIR is explicitly set to "-"
|
43 |
+
if ["%VENV_DIR%"] == ["-"] goto :skip_venv_entirely
|
44 |
+
:: Skip venv creation/activation if SKIP_VENV is set to "1"
|
45 |
+
if ["%SKIP_VENV%"] == ["1"] goto :skip_venv_entirely
|
46 |
+
|
47 |
+
:: Check if the venv already exists by looking for Python.exe in its Scripts directory
|
48 |
+
dir "%VENV_DIR%\Scripts\Python.exe" >tmp/stdout.txt 2>tmp/stderr.txt
|
49 |
+
if %ERRORLEVEL% == 0 goto :activate_venv_and_maybe_update
|
50 |
+
|
51 |
+
:: Venv does not exist, create it
|
52 |
+
echo Virtual environment not found in "%VENV_DIR%". Creating a new one.
|
53 |
+
for /f "delims=" %%i in ('CALL %PYTHON% -c "import sys; print(sys.executable)"') do set PYTHON_FULLNAME="%%i"
|
54 |
+
echo Creating venv in directory %VENV_DIR% using python %PYTHON_FULLNAME%
|
55 |
+
%PYTHON_FULLNAME% -m venv "%VENV_DIR%" >tmp/stdout.txt 2>tmp/stderr.txt
|
56 |
+
if %ERRORLEVEL% NEQ 0 (
|
57 |
+
echo Unable to create venv in directory "%VENV_DIR%"
|
58 |
+
goto :show_stdout_stderr
|
59 |
+
)
|
60 |
+
echo Venv created.
|
61 |
+
|
62 |
+
:: Install requirements for the first time if venv was just created
|
63 |
+
:: This section handles the initial installation of packages from requirements.txt
|
64 |
+
:: immediately after a new virtual environment is created.
|
65 |
+
echo Checking for requirements.txt for initial setup in %~dp0
|
66 |
+
if exist "%~dp0requirements.txt" (
|
67 |
+
echo Found requirements.txt, attempting to install for initial setup...
|
68 |
+
call "%VENV_DIR%\Scripts\activate.bat"
|
69 |
+
echo Installing packages from requirements.txt ^(initial setup^)...
|
70 |
+
"%VENV_DIR%\Scripts\python.exe" -m pip install -r "%~dp0requirements.txt"
|
71 |
+
if %ERRORLEVEL% NEQ 0 (
|
72 |
+
echo Failed to install requirements during initial setup. Please check the output above.
|
73 |
+
pause
|
74 |
+
goto :show_stdout_stderr_custom_pip_initial
|
75 |
+
)
|
76 |
+
echo Initial requirements installed successfully.
|
77 |
+
call "%VENV_DIR%\Scripts\deactivate.bat"
|
78 |
+
) else (
|
79 |
+
echo No requirements.txt found for initial setup, skipping package installation.
|
80 |
+
)
|
81 |
+
goto :activate_venv_and_maybe_update
|
82 |
+
|
83 |
+
|
84 |
+
:activate_venv_and_maybe_update
|
85 |
+
:: This label is reached if the venv exists or was just created.
|
86 |
+
:: Set PYTHON to point to the venv's Python interpreter.
|
87 |
+
set PYTHON="%VENV_DIR%\Scripts\Python.exe"
|
88 |
+
echo Activating venv: %PYTHON%
|
89 |
+
|
90 |
+
:: Always update requirements if ALWAYS_UPDATE_REQS is 1
|
91 |
+
:: This section allows for updating packages from requirements.txt on every launch
|
92 |
+
:: if the ALWAYS_UPDATE_REQS variable is set to 1.
|
93 |
+
if defined ALWAYS_UPDATE_REQS (
|
94 |
+
if "%ALWAYS_UPDATE_REQS%"=="1" (
|
95 |
+
echo ALWAYS_UPDATE_REQS is enabled.
|
96 |
+
if exist "%~dp0requirements.txt" (
|
97 |
+
echo Attempting to update packages from requirements.txt...
|
98 |
+
REM No need to call activate.bat here again, PYTHON is already set to the venv's python
|
99 |
+
%PYTHON% -m pip install -r "%~dp0requirements.txt"
|
100 |
+
if %ERRORLEVEL% NEQ 0 (
|
101 |
+
echo Failed to update requirements. Please check the output above.
|
102 |
+
pause
|
103 |
+
goto :endofscript
|
104 |
+
)
|
105 |
+
echo Requirements updated successfully.
|
106 |
+
) else (
|
107 |
+
echo ALWAYS_UPDATE_REQS is enabled, but no requirements.txt found. Skipping update.
|
108 |
+
)
|
109 |
+
) else (
|
110 |
+
echo ALWAYS_UPDATE_REQS is not enabled or not set to 1. Skipping routine update.
|
111 |
+
)
|
112 |
+
)
|
113 |
+
|
114 |
+
goto :launch
|
115 |
+
|
116 |
+
:skip_venv_entirely
|
117 |
+
:: This label is reached if venv usage is explicitly skipped.
|
118 |
+
echo Skipping venv.
|
119 |
+
goto :launch
|
120 |
+
|
121 |
+
:launch
|
122 |
+
:: Launch the main application
|
123 |
+
echo Launching Web UI with arguments: %COMMANDLINE_ARGS% %*
|
124 |
+
%PYTHON% %APPLICATION_NAME% %COMMANDLINE_ARGS% %*
|
125 |
+
echo Launch finished.
|
126 |
+
pause
|
127 |
+
exit /b
|
128 |
+
|
129 |
+
:show_stdout_stderr_custom_pip_initial
|
130 |
+
:: Custom error handler for failures during the initial pip install process.
|
131 |
+
echo.
|
132 |
+
echo exit code ^(pip initial install^): %errorlevel%
|
133 |
+
echo Errors during initial pip install. See output above.
|
134 |
+
echo.
|
135 |
+
echo Launch unsuccessful. Exiting.
|
136 |
+
pause
|
137 |
+
exit /b
|
138 |
+
|
139 |
+
|
140 |
+
:show_stdout_stderr
|
141 |
+
:: General error handler: displays stdout and stderr from the tmp directory.
|
142 |
+
echo.
|
143 |
+
echo exit code: %errorlevel%
|
144 |
+
|
145 |
+
for /f %%i in ("tmp\stdout.txt") do set size=%%~zi
|
146 |
+
if %size% equ 0 goto :show_stderr
|
147 |
+
echo.
|
148 |
+
echo stdout:
|
149 |
+
type tmp\stdout.txt
|
150 |
+
|
151 |
+
:show_stderr
|
152 |
+
for /f %%i in ("tmp\stderr.txt") do set size=%%~zi
|
153 |
+
if %size% equ 0 goto :endofscript
|
154 |
+
echo.
|
155 |
+
echo stderr:
|
156 |
+
type tmp\stderr.txt
|
157 |
+
|
158 |
+
:endofscript
|
159 |
+
echo.
|
160 |
+
echo Launch unsuccessful. Exiting.
|
161 |
+
pause
|
162 |
+
exit /b
|