Spaces:
Running
Running
Commit
·
46649d8
1
Parent(s):
23ca8b2
ud
Browse files
app.py
CHANGED
@@ -4,6 +4,7 @@ import numpy as np
|
|
4 |
from PIL import Image, ImageDraw, ImageFont
|
5 |
import json
|
6 |
import os
|
|
|
7 |
|
8 |
def wrap_text(text, max_width=40):
|
9 |
words = text.split()
|
@@ -19,25 +20,45 @@ def wrap_text(text, max_width=40):
|
|
19 |
return '\n'.join(lines)
|
20 |
|
21 |
def create_text_image(text, width, height):
|
22 |
-
#
|
23 |
-
img = Image.new('RGB', (width, height),
|
24 |
draw = ImageDraw.Draw(img)
|
25 |
font = ImageFont.truetype("DejaVuSans-Bold.ttf", 30)
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
31 |
|
32 |
-
|
|
|
|
|
|
|
33 |
height, width, _ = images[0].shape
|
34 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
35 |
out = cv2.VideoWriter(output_path, fourcc, 24, (width, height))
|
36 |
|
37 |
-
for img, txt, dur in zip(images, scripts, durations):
|
|
|
|
|
|
|
|
|
|
|
38 |
text_img = create_text_image(txt, width, height)
|
39 |
-
|
40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
out.release()
|
43 |
|
@@ -46,40 +67,59 @@ def generate_video(image_files, script_input, duration_input, audio_file):
|
|
46 |
scripts = json.loads(script_input)
|
47 |
durations = json.loads(duration_input)
|
48 |
except Exception as e:
|
49 |
-
return None, f"❌
|
50 |
|
51 |
if len(image_files) != len(scripts) or len(scripts) != len(durations):
|
52 |
-
return None, "❌
|
53 |
|
54 |
try:
|
55 |
-
image_paths = [img.name for img in image_files]
|
56 |
-
audio_path = audio_file.name
|
57 |
-
output_video_path = os.path.join("outputs", "final_output.mp4")
|
58 |
os.makedirs("outputs", exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
62 |
|
63 |
-
return
|
64 |
except Exception as e:
|
65 |
import traceback
|
66 |
-
return None, f"❌
|
67 |
|
68 |
-
#
|
69 |
demo = gr.Interface(
|
70 |
fn=generate_video,
|
71 |
inputs=[
|
72 |
-
gr.File(file_types=["image"], label="
|
73 |
-
gr.Textbox(label="
|
74 |
-
gr.Textbox(label="
|
75 |
-
gr.File(file_types=["audio"], label="
|
76 |
],
|
77 |
outputs=[
|
78 |
-
gr.Video(label="
|
79 |
-
gr.Textbox(label="
|
80 |
],
|
81 |
-
title="
|
82 |
-
description="
|
83 |
)
|
84 |
|
85 |
if __name__ == "__main__":
|
|
|
4 |
from PIL import Image, ImageDraw, ImageFont
|
5 |
import json
|
6 |
import os
|
7 |
+
from moviepy.editor import AudioFileClip, VideoFileClip
|
8 |
|
9 |
def wrap_text(text, max_width=40):
|
10 |
words = text.split()
|
|
|
20 |
return '\n'.join(lines)
|
21 |
|
22 |
def create_text_image(text, width, height):
|
23 |
+
# tạo ảnh chữ nền đen (rgb)
|
24 |
+
img = Image.new('RGB', (width, height), (0, 0, 0))
|
25 |
draw = ImageDraw.Draw(img)
|
26 |
font = ImageFont.truetype("DejaVuSans-Bold.ttf", 30)
|
27 |
|
28 |
+
# xuống dòng theo chiều rộng
|
29 |
+
wrapped_text = wrap_text(text, max_width=int(width/18))
|
30 |
+
text_size = draw.multiline_textsize(wrapped_text, font=font)
|
31 |
+
# căn giữa ngang, dưới cách lề 20px
|
32 |
+
text_x = (width - text_size[0])//2
|
33 |
+
text_y = height - text_size[1] - 20
|
34 |
+
draw.multiline_text((text_x, text_y), wrapped_text, font=font, fill="white", align="center")
|
35 |
|
36 |
+
# chuyển sang bgr để cv2.addWeighted
|
37 |
+
return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
38 |
+
|
39 |
+
def create_video_from_images(images, scripts, durations, output_path):
|
40 |
height, width, _ = images[0].shape
|
41 |
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
|
42 |
out = cv2.VideoWriter(output_path, fourcc, 24, (width, height))
|
43 |
|
44 |
+
for idx, (img, txt, dur) in enumerate(zip(images, scripts, durations)):
|
45 |
+
total_frames = int(dur * 24)
|
46 |
+
if total_frames <= 0:
|
47 |
+
print(f"⚠️ ảnh {idx+1} bỏ qua do duration={dur}")
|
48 |
+
continue
|
49 |
+
|
50 |
text_img = create_text_image(txt, width, height)
|
51 |
+
for i in range(total_frames):
|
52 |
+
zoom_scale = 1 + 0.05 * (i / total_frames)
|
53 |
+
new_w = int(width * zoom_scale)
|
54 |
+
new_h = int(height * zoom_scale)
|
55 |
+
zoomed = cv2.resize(img, (new_w, new_h))
|
56 |
+
# crop về size gốc
|
57 |
+
sx = (new_w - width)//2
|
58 |
+
sy = (new_h - height)//2
|
59 |
+
cropped = zoomed[sy:sy+height, sx:sx+width]
|
60 |
+
frame = cv2.addWeighted(cropped, 1.0, text_img, 0.8, 0)
|
61 |
+
out.write(frame)
|
62 |
|
63 |
out.release()
|
64 |
|
|
|
67 |
scripts = json.loads(script_input)
|
68 |
durations = json.loads(duration_input)
|
69 |
except Exception as e:
|
70 |
+
return None, f"❌ lỗi parse script/duration:\n{e}"
|
71 |
|
72 |
if len(image_files) != len(scripts) or len(scripts) != len(durations):
|
73 |
+
return None, "❌ số lượng ảnh, scripts và durations phải bằng nhau!"
|
74 |
|
75 |
try:
|
|
|
|
|
|
|
76 |
os.makedirs("outputs", exist_ok=True)
|
77 |
+
audio_path = audio_file.name
|
78 |
+
output_video = os.path.join("outputs", "final_output.mp4")
|
79 |
+
|
80 |
+
images, vs, vd = [], [], []
|
81 |
+
for f, s, d in zip(image_files, scripts, durations):
|
82 |
+
img = cv2.imread(f.name)
|
83 |
+
if img is not None:
|
84 |
+
images.append(img); vs.append(s); vd.append(d)
|
85 |
+
else:
|
86 |
+
print(f"❌ không đọc được ảnh: {f.name}")
|
87 |
+
|
88 |
+
if not images:
|
89 |
+
return None, "❌ không có ảnh hợp lệ!"
|
90 |
+
|
91 |
+
print(f"✅ đọc được {len(images)} ảnh hợp lệ.")
|
92 |
+
print("durations:", vd)
|
93 |
+
|
94 |
+
create_video_from_images(images, vs, vd, output_video)
|
95 |
|
96 |
+
# thêm audio
|
97 |
+
video_clip = VideoFileClip(output_video)
|
98 |
+
# cắt audio theo đúng độ dài video
|
99 |
+
audio_clip = AudioFileClip(audio_path).subclip(0, video_clip.duration)
|
100 |
+
final = video_clip.set_audio(audio_clip)
|
101 |
+
final.write_videofile(output_video, codec="libx264", audio_codec="aac", fps=24)
|
102 |
|
103 |
+
return output_video, "✅ video tạo thành công!"
|
104 |
except Exception as e:
|
105 |
import traceback
|
106 |
+
return None, f"❌ lỗi khi tạo video:\n{traceback.format_exc()}"
|
107 |
|
108 |
+
# giao diện gradio
|
109 |
demo = gr.Interface(
|
110 |
fn=generate_video,
|
111 |
inputs=[
|
112 |
+
gr.File(file_types=["image"], label="ảnh (nhiều)", file_count="multiple"),
|
113 |
+
gr.Textbox(label="scripts (danh sách)", placeholder="['chào', 'xin chào', ...]"),
|
114 |
+
gr.Textbox(label="durations (giây)", placeholder="[3.5, 4.2, ...]"),
|
115 |
+
gr.File(file_types=["audio"], label="nhạc nền (.mp3/.wav)")
|
116 |
],
|
117 |
outputs=[
|
118 |
+
gr.Video(label="video kết quả"),
|
119 |
+
gr.Textbox(label="trạng thái", interactive=False)
|
120 |
],
|
121 |
+
title="tạo video từ ảnh, chữ và nhạc",
|
122 |
+
description="upload nhiều ảnh + script + nhạc nền để tự động tạo video."
|
123 |
)
|
124 |
|
125 |
if __name__ == "__main__":
|