Spaces:
Running
Running
Commit
·
c5fce8d
1
Parent(s):
46649d8
ud
Browse files- app.py +155 -93
- requirements.txt +4 -1
app.py
CHANGED
@@ -1,126 +1,188 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
import numpy as np
|
4 |
-
from PIL import Image
|
5 |
import json
|
6 |
-
import
|
7 |
-
from
|
|
|
|
|
|
|
8 |
|
9 |
-
def wrap_text(text, max_width=
|
10 |
words = text.split()
|
11 |
lines = []
|
12 |
-
|
13 |
for word in words:
|
14 |
-
if len(
|
15 |
-
|
16 |
else:
|
17 |
-
lines.append(
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
height, width, _ = images[0].shape
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
66 |
try:
|
67 |
scripts = json.loads(script_input)
|
68 |
durations = json.loads(duration_input)
|
69 |
except Exception as e:
|
70 |
-
return None, f"❌
|
71 |
|
72 |
if len(image_files) != len(scripts) or len(scripts) != len(durations):
|
73 |
-
return None, "❌
|
74 |
|
75 |
try:
|
76 |
os.makedirs("outputs", exist_ok=True)
|
77 |
audio_path = audio_file.name
|
78 |
-
|
79 |
-
|
80 |
-
images
|
81 |
-
for
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
print(f"
|
|
|
|
|
87 |
|
88 |
if not images:
|
89 |
-
return None, "❌
|
90 |
-
|
91 |
-
print(f"✅ đọc được {len(images)} ảnh hợp lệ.")
|
92 |
-
print("durations:", vd)
|
93 |
-
|
94 |
-
create_video_from_images(images, vs, vd, output_video)
|
95 |
-
|
96 |
-
# thêm audio
|
97 |
-
video_clip = VideoFileClip(output_video)
|
98 |
-
# cắt audio theo đúng độ dài video
|
99 |
-
audio_clip = AudioFileClip(audio_path).subclip(0, video_clip.duration)
|
100 |
-
final = video_clip.set_audio(audio_clip)
|
101 |
-
final.write_videofile(output_video, codec="libx264", audio_codec="aac", fps=24)
|
102 |
|
103 |
-
|
|
|
104 |
except Exception as e:
|
105 |
import traceback
|
106 |
-
return None, f"❌
|
107 |
|
108 |
-
# giao diện gradio
|
109 |
demo = gr.Interface(
|
110 |
fn=generate_video,
|
111 |
inputs=[
|
112 |
-
gr.File(file_types=["image"], label="
|
113 |
-
gr.Textbox(label="
|
114 |
-
gr.Textbox(label="
|
115 |
-
gr.File(file_types=["audio"], label="
|
|
|
116 |
],
|
117 |
outputs=[
|
118 |
-
gr.Video(label="
|
119 |
-
gr.Textbox(label="
|
120 |
],
|
121 |
-
title="
|
122 |
-
description="
|
123 |
)
|
124 |
|
125 |
if __name__ == "__main__":
|
126 |
-
demo.launch()
|
|
|
1 |
+
import ffmpeg
|
2 |
+
import os
|
3 |
import numpy as np
|
4 |
+
from PIL import Image
|
5 |
import json
|
6 |
+
import gradio as gr
|
7 |
+
from concurrent.futures import ThreadPoolExecutor
|
8 |
+
import tempfile
|
9 |
+
|
10 |
+
# os.environ["PATH"] += os.pathsep + r"D:\Downloads\ffmpeg-7.1.1-essentials_build\ffmpeg-7.1.1-essentials_build\bin"
|
11 |
|
12 |
+
def wrap_text(text, max_width=1060, font_size=20):
|
13 |
words = text.split()
|
14 |
lines = []
|
15 |
+
current_line = ""
|
16 |
for word in words:
|
17 |
+
if len(current_line + " " + word) * font_size <= max_width:
|
18 |
+
current_line += " " + word
|
19 |
else:
|
20 |
+
lines.append(current_line.strip())
|
21 |
+
current_line = word
|
22 |
+
if current_line:
|
23 |
+
lines.append(current_line.strip())
|
24 |
+
return "\n".join(lines)
|
25 |
+
|
26 |
+
def calculate_text_height(text, font_size=20, line_spacing=10):
|
27 |
+
lines = text.split("\n")
|
28 |
+
return len(lines) * font_size + (len(lines) - 1) * line_spacing
|
29 |
+
|
30 |
+
def create_single_video(args):
|
31 |
+
img, script, dur, output_path, width, height, fps = args
|
32 |
+
temp_img_path = tempfile.NamedTemporaryFile(suffix='.png', delete=False).name
|
33 |
+
Image.fromarray(img).save(temp_img_path)
|
34 |
+
|
35 |
+
d_frames = int(dur * fps)
|
36 |
+
wrapped_text = wrap_text(script, max_width=1060, font_size=20)
|
37 |
+
wrapped_text = wrapped_text.replace(":", "\\:").replace("'", "\\'")
|
38 |
+
text_height = calculate_text_height(wrapped_text, font_size=20, line_spacing=10)
|
39 |
+
y_position = height - text_height - 30
|
40 |
+
|
41 |
+
try:
|
42 |
+
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
43 |
+
stream = ffmpeg.input(temp_img_path, loop=1, t=dur)
|
44 |
+
stream = stream.output(output_path, **{
|
45 |
+
'vf': (
|
46 |
+
f"scale=2400:-1,"
|
47 |
+
f"zoompan=z='min(zoom+0.0002,1.5)':x='floor(iw/2-(iw/zoom/2))':y='floor(ih/2-(ih/zoom/2))':d={d_frames}:s={width}x{height}:fps={fps},"
|
48 |
+
f"drawtext=text='{wrapped_text}':fontsize=20:fontcolor=white:x=(w-text_w)/2:y={y_position}:"
|
49 |
+
f"fontfile=fonts/Roboto-VariableFont_wdth\,wght.ttf:box=1:[email protected]:boxborderw=10:line_spacing=10,"
|
50 |
+
# f"noise=alls=10:allf=t+u"
|
51 |
+
),
|
52 |
+
't': dur,
|
53 |
+
'pix_fmt': 'yuv420p',
|
54 |
+
'crf': '20',
|
55 |
+
'c:v': 'libx264',
|
56 |
+
'an': None
|
57 |
+
})
|
58 |
+
out, err = stream.run(capture_stdout=True, capture_stderr=True)
|
59 |
+
except ffmpeg.Error as e:
|
60 |
+
print('FFmpeg Error:', e.stderr.decode('utf-8'))
|
61 |
+
raise
|
62 |
+
finally:
|
63 |
+
if os.path.exists(temp_img_path):
|
64 |
+
os.remove(temp_img_path)
|
65 |
+
|
66 |
+
return output_path
|
67 |
+
|
68 |
+
def create_video_from_images(images, scripts, durations, audio_path, output_path, fps=60):
|
69 |
height, width, _ = images[0].shape
|
70 |
+
temp_dir = tempfile.mkdtemp()
|
71 |
+
video_paths = []
|
72 |
+
|
73 |
+
with ThreadPoolExecutor(max_workers=2) as executor:
|
74 |
+
tasks = [
|
75 |
+
(img, script, dur, os.path.join(temp_dir, f"temp_{i}.mp4"), width, height, fps)
|
76 |
+
for i, (img, script, dur) in enumerate(zip(images, scripts, durations))
|
77 |
+
]
|
78 |
+
video_paths = list(executor.map(create_single_video, tasks))
|
79 |
+
|
80 |
+
concat_file = os.path.join(temp_dir, "concat.txt")
|
81 |
+
with open(concat_file, 'w') as f:
|
82 |
+
for path in video_paths:
|
83 |
+
f.write(f"file '{path}'\n")
|
84 |
+
|
85 |
+
ffmpeg.input(concat_file, format='concat', safe=0).output(output_path, c='copy', an=None).run()
|
86 |
+
|
87 |
+
final_output_path = output_path.replace(".mp4", "_with_audio.mp4")
|
88 |
+
video_input = ffmpeg.input(output_path)
|
89 |
+
audio_input = ffmpeg.input(audio_path)
|
90 |
+
ffmpeg.output(video_input, audio_input, final_output_path, vcodec='libx264', acodec='aac', shortest=None).run()
|
91 |
+
|
92 |
+
for path in video_paths:
|
93 |
+
if os.path.exists(path):
|
94 |
+
os.remove(path)
|
95 |
+
if os.path.exists(concat_file):
|
96 |
+
os.remove(concat_file)
|
97 |
+
if os.path.exists(temp_dir):
|
98 |
+
os.rmdir(temp_dir)
|
99 |
+
|
100 |
+
return final_output_path
|
101 |
+
|
102 |
+
def generate_video(image_files, script_input, duration_input, audio_file, fps=60):
|
103 |
+
# try:
|
104 |
+
# scripts = json.loads(script_input)
|
105 |
+
# durations = json.loads(duration_input)
|
106 |
+
# except Exception as e:
|
107 |
+
# return None, f"❌ Lỗi khi phân tích đầu vào:\n{e}"
|
108 |
+
|
109 |
+
# if len(image_files) != len(scripts) or len(scripts) != len(durations):
|
110 |
+
# return None, "❌ Số lượng ảnh, scripts và durations phải bằng nhau!"
|
111 |
+
|
112 |
+
# try:
|
113 |
+
# os.makedirs("outputs", exist_ok=True)
|
114 |
+
# audio_path = audio_file.name
|
115 |
+
# output_video_path = os.path.join("outputs", "temp_output.mp4")
|
116 |
+
|
117 |
+
# images = []
|
118 |
+
# target_size = (1080, 1920)
|
119 |
+
# for idx, img_file in enumerate(image_files):
|
120 |
+
# try:
|
121 |
+
# img_pil = Image.open(img_file).convert("RGB")
|
122 |
+
# img_pil = img_pil.resize(target_size, Image.Resampling.LANCZOS)
|
123 |
+
# img = np.array(img_pil)
|
124 |
+
# images.append(img)
|
125 |
+
# print(f"✅ Ảnh {idx+1}: {img_file.name} - kích thước {img.shape}")
|
126 |
+
# except Exception as e:
|
127 |
+
# print(f"❌ Không đọc được ảnh {idx+1}: {img_file.name} - lỗi: {e}")
|
128 |
+
|
129 |
+
# if not images:
|
130 |
+
# return None, "❌ Không có ảnh nào hợp lệ để tạo video!"
|
131 |
+
|
132 |
+
# final_video_path = create_video_from_images(images, scripts, durations, audio_path, output_video_path, fps)
|
133 |
+
# return final_video_path, "✅ Video tạo thành công!"
|
134 |
+
# except Exception as e:
|
135 |
+
# import traceback
|
136 |
+
# return None, f"❌ Lỗi khi tạo video:\n{traceback.format_exc()}"
|
137 |
try:
|
138 |
scripts = json.loads(script_input)
|
139 |
durations = json.loads(duration_input)
|
140 |
except Exception as e:
|
141 |
+
return None, f"❌ Lỗi khi phân tích đầu vào:\n{e}"
|
142 |
|
143 |
if len(image_files) != len(scripts) or len(scripts) != len(durations):
|
144 |
+
return None, "❌ Số lượng ảnh, scripts và durations phải bằng nhau!"
|
145 |
|
146 |
try:
|
147 |
os.makedirs("outputs", exist_ok=True)
|
148 |
audio_path = audio_file.name
|
149 |
+
output_video_path = os.path.join("outputs", "temp_output.mp4")
|
150 |
+
|
151 |
+
images = []
|
152 |
+
for idx, img_file in enumerate(image_files):
|
153 |
+
try:
|
154 |
+
img_pil = Image.open(img_file).convert("RGB")
|
155 |
+
img = np.array(img_pil)
|
156 |
+
images.append(img)
|
157 |
+
print(f"✅ Ảnh {idx+1}: {img_file.name} - kích thước {img.shape}")
|
158 |
+
except Exception as e:
|
159 |
+
print(f"❌ Không đọc được ảnh {idx+1}: {img_file.name} - lỗi: {e}")
|
160 |
|
161 |
if not images:
|
162 |
+
return None, "❌ Không có ảnh nào hợp lệ để tạo video!"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
|
164 |
+
final_video_path = create_video_from_images(images, scripts, durations, audio_path, output_video_path, fps)
|
165 |
+
return final_video_path, "✅ Video tạo thành công!"
|
166 |
except Exception as e:
|
167 |
import traceback
|
168 |
+
return None, f"❌ Lỗi khi tạo video:\n{traceback.format_exc()}"
|
169 |
|
|
|
170 |
demo = gr.Interface(
|
171 |
fn=generate_video,
|
172 |
inputs=[
|
173 |
+
gr.File(file_types=["image"], label="Ảnh (nhiều)", file_count="multiple"),
|
174 |
+
gr.Textbox(label="Scripts (danh sách)", placeholder="['Chào bạn', 'Video demo']"),
|
175 |
+
gr.Textbox(label="Durations (giây)", placeholder="[3, 4]"),
|
176 |
+
gr.File(file_types=["audio"], label="Nhạc nền (.mp3 hoặc .wav)"),
|
177 |
+
gr.Slider(minimum=1, maximum=120, step=1, label="FPS (frame/giây)", value=60),
|
178 |
],
|
179 |
outputs=[
|
180 |
+
gr.Video(label="Video kết quả"),
|
181 |
+
gr.Textbox(label="Trạng thái", interactive=False),
|
182 |
],
|
183 |
+
title="Tạo video từ ảnh, chữ và nhạc",
|
184 |
+
description="Upload nhiều ảnh + đoạn chữ + nhạc nền để tạo video tự động."
|
185 |
)
|
186 |
|
187 |
if __name__ == "__main__":
|
188 |
+
demo.queue().launch()
|
requirements.txt
CHANGED
@@ -2,4 +2,7 @@ moviepy==1.0.3
|
|
2 |
Pillow==9.5.0
|
3 |
imageio
|
4 |
imageio-ffmpeg
|
5 |
-
opencv-python
|
|
|
|
|
|
|
|
2 |
Pillow==9.5.0
|
3 |
imageio
|
4 |
imageio-ffmpeg
|
5 |
+
opencv-python
|
6 |
+
|
7 |
+
ffmpeg-python
|
8 |
+
numpy
|