Luongsosad commited on
Commit
c5fce8d
·
1 Parent(s): 46649d8
Files changed (2) hide show
  1. app.py +155 -93
  2. requirements.txt +4 -1
app.py CHANGED
@@ -1,126 +1,188 @@
1
- import gradio as gr
2
- import cv2
3
  import numpy as np
4
- from PIL import Image, ImageDraw, ImageFont
5
  import json
6
- import os
7
- from moviepy.editor import AudioFileClip, VideoFileClip
 
 
 
8
 
9
- def wrap_text(text, max_width=40):
10
  words = text.split()
11
  lines = []
12
- line = ''
13
  for word in words:
14
- if len(line + ' ' + word) <= max_width:
15
- line += ' ' + word
16
  else:
17
- lines.append(line.strip())
18
- line = word
19
- lines.append(line.strip())
20
- return '\n'.join(lines)
21
-
22
- def create_text_image(text, width, height):
23
- # tạo ảnh chữ nền đen (rgb)
24
- img = Image.new('RGB', (width, height), (0, 0, 0))
25
- draw = ImageDraw.Draw(img)
26
- font = ImageFont.truetype("DejaVuSans-Bold.ttf", 30)
27
-
28
- # xuống dòng theo chiều rộng
29
- wrapped_text = wrap_text(text, max_width=int(width/18))
30
- text_size = draw.multiline_textsize(wrapped_text, font=font)
31
- # căn giữa ngang, dưới cách lề 20px
32
- text_x = (width - text_size[0])//2
33
- text_y = height - text_size[1] - 20
34
- draw.multiline_text((text_x, text_y), wrapped_text, font=font, fill="white", align="center")
35
-
36
- # chuyển sang bgr để cv2.addWeighted
37
- return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
38
-
39
- def create_video_from_images(images, scripts, durations, output_path):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  height, width, _ = images[0].shape
41
- fourcc = cv2.VideoWriter_fourcc(*'mp4v')
42
- out = cv2.VideoWriter(output_path, fourcc, 24, (width, height))
43
-
44
- for idx, (img, txt, dur) in enumerate(zip(images, scripts, durations)):
45
- total_frames = int(dur * 24)
46
- if total_frames <= 0:
47
- print(f"⚠️ ảnh {idx+1} bỏ qua do duration={dur}")
48
- continue
49
-
50
- text_img = create_text_image(txt, width, height)
51
- for i in range(total_frames):
52
- zoom_scale = 1 + 0.05 * (i / total_frames)
53
- new_w = int(width * zoom_scale)
54
- new_h = int(height * zoom_scale)
55
- zoomed = cv2.resize(img, (new_w, new_h))
56
- # crop về size gốc
57
- sx = (new_w - width)//2
58
- sy = (new_h - height)//2
59
- cropped = zoomed[sy:sy+height, sx:sx+width]
60
- frame = cv2.addWeighted(cropped, 1.0, text_img, 0.8, 0)
61
- out.write(frame)
62
-
63
- out.release()
64
-
65
- def generate_video(image_files, script_input, duration_input, audio_file):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  try:
67
  scripts = json.loads(script_input)
68
  durations = json.loads(duration_input)
69
  except Exception as e:
70
- return None, f"❌ lỗi parse script/duration:\n{e}"
71
 
72
  if len(image_files) != len(scripts) or len(scripts) != len(durations):
73
- return None, "❌ số lượng ảnh, scripts và durations phải bằng nhau!"
74
 
75
  try:
76
  os.makedirs("outputs", exist_ok=True)
77
  audio_path = audio_file.name
78
- output_video = os.path.join("outputs", "final_output.mp4")
79
-
80
- images, vs, vd = [], [], []
81
- for f, s, d in zip(image_files, scripts, durations):
82
- img = cv2.imread(f.name)
83
- if img is not None:
84
- images.append(img); vs.append(s); vd.append(d)
85
- else:
86
- print(f" không đọc được ảnh: {f.name}")
 
 
87
 
88
  if not images:
89
- return None, "❌ không có ảnh hợp lệ!"
90
-
91
- print(f"✅ đọc được {len(images)} ảnh hợp lệ.")
92
- print("durations:", vd)
93
-
94
- create_video_from_images(images, vs, vd, output_video)
95
-
96
- # thêm audio
97
- video_clip = VideoFileClip(output_video)
98
- # cắt audio theo đúng độ dài video
99
- audio_clip = AudioFileClip(audio_path).subclip(0, video_clip.duration)
100
- final = video_clip.set_audio(audio_clip)
101
- final.write_videofile(output_video, codec="libx264", audio_codec="aac", fps=24)
102
 
103
- return output_video, "✅ video tạo thành công!"
 
104
  except Exception as e:
105
  import traceback
106
- return None, f"❌ lỗi khi tạo video:\n{traceback.format_exc()}"
107
 
108
- # giao diện gradio
109
  demo = gr.Interface(
110
  fn=generate_video,
111
  inputs=[
112
- gr.File(file_types=["image"], label="ảnh (nhiều)", file_count="multiple"),
113
- gr.Textbox(label="scripts (danh sách)", placeholder="['chào', 'xin chào', ...]"),
114
- gr.Textbox(label="durations (giây)", placeholder="[3.5, 4.2, ...]"),
115
- gr.File(file_types=["audio"], label="nhạc nền (.mp3/.wav)")
 
116
  ],
117
  outputs=[
118
- gr.Video(label="video kết quả"),
119
- gr.Textbox(label="trạng thái", interactive=False)
120
  ],
121
- title="tạo video từ ảnh, chữ và nhạc",
122
- description="upload nhiều ảnh + script + nhạc nền để tự động tạo video."
123
  )
124
 
125
  if __name__ == "__main__":
126
- demo.launch()
 
1
+ import ffmpeg
2
+ import os
3
  import numpy as np
4
+ from PIL import Image
5
  import json
6
+ import gradio as gr
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ import tempfile
9
+
10
+ # os.environ["PATH"] += os.pathsep + r"D:\Downloads\ffmpeg-7.1.1-essentials_build\ffmpeg-7.1.1-essentials_build\bin"
11
 
12
+ def wrap_text(text, max_width=1060, font_size=20):
13
  words = text.split()
14
  lines = []
15
+ current_line = ""
16
  for word in words:
17
+ if len(current_line + " " + word) * font_size <= max_width:
18
+ current_line += " " + word
19
  else:
20
+ lines.append(current_line.strip())
21
+ current_line = word
22
+ if current_line:
23
+ lines.append(current_line.strip())
24
+ return "\n".join(lines)
25
+
26
+ def calculate_text_height(text, font_size=20, line_spacing=10):
27
+ lines = text.split("\n")
28
+ return len(lines) * font_size + (len(lines) - 1) * line_spacing
29
+
30
+ def create_single_video(args):
31
+ img, script, dur, output_path, width, height, fps = args
32
+ temp_img_path = tempfile.NamedTemporaryFile(suffix='.png', delete=False).name
33
+ Image.fromarray(img).save(temp_img_path)
34
+
35
+ d_frames = int(dur * fps)
36
+ wrapped_text = wrap_text(script, max_width=1060, font_size=20)
37
+ wrapped_text = wrapped_text.replace(":", "\\:").replace("'", "\\'")
38
+ text_height = calculate_text_height(wrapped_text, font_size=20, line_spacing=10)
39
+ y_position = height - text_height - 30
40
+
41
+ try:
42
+ os.makedirs(os.path.dirname(output_path), exist_ok=True)
43
+ stream = ffmpeg.input(temp_img_path, loop=1, t=dur)
44
+ stream = stream.output(output_path, **{
45
+ 'vf': (
46
+ f"scale=2400:-1,"
47
+ f"zoompan=z='min(zoom+0.0002,1.5)':x='floor(iw/2-(iw/zoom/2))':y='floor(ih/2-(ih/zoom/2))':d={d_frames}:s={width}x{height}:fps={fps},"
48
+ f"drawtext=text='{wrapped_text}':fontsize=20:fontcolor=white:x=(w-text_w)/2:y={y_position}:"
49
+ f"fontfile=fonts/Roboto-VariableFont_wdth\,wght.ttf:box=1:[email protected]:boxborderw=10:line_spacing=10,"
50
+ # f"noise=alls=10:allf=t+u"
51
+ ),
52
+ 't': dur,
53
+ 'pix_fmt': 'yuv420p',
54
+ 'crf': '20',
55
+ 'c:v': 'libx264',
56
+ 'an': None
57
+ })
58
+ out, err = stream.run(capture_stdout=True, capture_stderr=True)
59
+ except ffmpeg.Error as e:
60
+ print('FFmpeg Error:', e.stderr.decode('utf-8'))
61
+ raise
62
+ finally:
63
+ if os.path.exists(temp_img_path):
64
+ os.remove(temp_img_path)
65
+
66
+ return output_path
67
+
68
+ def create_video_from_images(images, scripts, durations, audio_path, output_path, fps=60):
69
  height, width, _ = images[0].shape
70
+ temp_dir = tempfile.mkdtemp()
71
+ video_paths = []
72
+
73
+ with ThreadPoolExecutor(max_workers=2) as executor:
74
+ tasks = [
75
+ (img, script, dur, os.path.join(temp_dir, f"temp_{i}.mp4"), width, height, fps)
76
+ for i, (img, script, dur) in enumerate(zip(images, scripts, durations))
77
+ ]
78
+ video_paths = list(executor.map(create_single_video, tasks))
79
+
80
+ concat_file = os.path.join(temp_dir, "concat.txt")
81
+ with open(concat_file, 'w') as f:
82
+ for path in video_paths:
83
+ f.write(f"file '{path}'\n")
84
+
85
+ ffmpeg.input(concat_file, format='concat', safe=0).output(output_path, c='copy', an=None).run()
86
+
87
+ final_output_path = output_path.replace(".mp4", "_with_audio.mp4")
88
+ video_input = ffmpeg.input(output_path)
89
+ audio_input = ffmpeg.input(audio_path)
90
+ ffmpeg.output(video_input, audio_input, final_output_path, vcodec='libx264', acodec='aac', shortest=None).run()
91
+
92
+ for path in video_paths:
93
+ if os.path.exists(path):
94
+ os.remove(path)
95
+ if os.path.exists(concat_file):
96
+ os.remove(concat_file)
97
+ if os.path.exists(temp_dir):
98
+ os.rmdir(temp_dir)
99
+
100
+ return final_output_path
101
+
102
+ def generate_video(image_files, script_input, duration_input, audio_file, fps=60):
103
+ # try:
104
+ # scripts = json.loads(script_input)
105
+ # durations = json.loads(duration_input)
106
+ # except Exception as e:
107
+ # return None, f"❌ Lỗi khi phân tích đầu vào:\n{e}"
108
+
109
+ # if len(image_files) != len(scripts) or len(scripts) != len(durations):
110
+ # return None, "❌ Số lượng ảnh, scripts và durations phải bằng nhau!"
111
+
112
+ # try:
113
+ # os.makedirs("outputs", exist_ok=True)
114
+ # audio_path = audio_file.name
115
+ # output_video_path = os.path.join("outputs", "temp_output.mp4")
116
+
117
+ # images = []
118
+ # target_size = (1080, 1920)
119
+ # for idx, img_file in enumerate(image_files):
120
+ # try:
121
+ # img_pil = Image.open(img_file).convert("RGB")
122
+ # img_pil = img_pil.resize(target_size, Image.Resampling.LANCZOS)
123
+ # img = np.array(img_pil)
124
+ # images.append(img)
125
+ # print(f"✅ Ảnh {idx+1}: {img_file.name} - kích thước {img.shape}")
126
+ # except Exception as e:
127
+ # print(f"❌ Không đọc được ảnh {idx+1}: {img_file.name} - lỗi: {e}")
128
+
129
+ # if not images:
130
+ # return None, "❌ Không có ảnh nào hợp lệ để tạo video!"
131
+
132
+ # final_video_path = create_video_from_images(images, scripts, durations, audio_path, output_video_path, fps)
133
+ # return final_video_path, "✅ Video tạo thành công!"
134
+ # except Exception as e:
135
+ # import traceback
136
+ # return None, f"❌ Lỗi khi tạo video:\n{traceback.format_exc()}"
137
  try:
138
  scripts = json.loads(script_input)
139
  durations = json.loads(duration_input)
140
  except Exception as e:
141
+ return None, f"❌ Lỗi khi phân tích đầu vào:\n{e}"
142
 
143
  if len(image_files) != len(scripts) or len(scripts) != len(durations):
144
+ return None, "❌ Số lượng ảnh, scripts và durations phải bằng nhau!"
145
 
146
  try:
147
  os.makedirs("outputs", exist_ok=True)
148
  audio_path = audio_file.name
149
+ output_video_path = os.path.join("outputs", "temp_output.mp4")
150
+
151
+ images = []
152
+ for idx, img_file in enumerate(image_files):
153
+ try:
154
+ img_pil = Image.open(img_file).convert("RGB")
155
+ img = np.array(img_pil)
156
+ images.append(img)
157
+ print(f" Ảnh {idx+1}: {img_file.name} - kích thước {img.shape}")
158
+ except Exception as e:
159
+ print(f"❌ Không đọc được ảnh {idx+1}: {img_file.name} - lỗi: {e}")
160
 
161
  if not images:
162
+ return None, "❌ Không có ảnh nào hợp lệ để tạo video!"
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
+ final_video_path = create_video_from_images(images, scripts, durations, audio_path, output_video_path, fps)
165
+ return final_video_path, "✅ Video tạo thành công!"
166
  except Exception as e:
167
  import traceback
168
+ return None, f"❌ Lỗi khi tạo video:\n{traceback.format_exc()}"
169
 
 
170
  demo = gr.Interface(
171
  fn=generate_video,
172
  inputs=[
173
+ gr.File(file_types=["image"], label="Ảnh (nhiều)", file_count="multiple"),
174
+ gr.Textbox(label="Scripts (danh sách)", placeholder="['Chào bạn', 'Video demo']"),
175
+ gr.Textbox(label="Durations (giây)", placeholder="[3, 4]"),
176
+ gr.File(file_types=["audio"], label="Nhạc nền (.mp3 hoặc .wav)"),
177
+ gr.Slider(minimum=1, maximum=120, step=1, label="FPS (frame/giây)", value=60),
178
  ],
179
  outputs=[
180
+ gr.Video(label="Video kết quả"),
181
+ gr.Textbox(label="Trạng thái", interactive=False),
182
  ],
183
+ title="Tạo video từ ảnh, chữ và nhạc",
184
+ description="Upload nhiều ảnh + đoạn chữ + nhạc nền để tạo video tự động."
185
  )
186
 
187
  if __name__ == "__main__":
188
+ demo.queue().launch()
requirements.txt CHANGED
@@ -2,4 +2,7 @@ moviepy==1.0.3
2
  Pillow==9.5.0
3
  imageio
4
  imageio-ffmpeg
5
- opencv-python
 
 
 
 
2
  Pillow==9.5.0
3
  imageio
4
  imageio-ffmpeg
5
+ opencv-python
6
+
7
+ ffmpeg-python
8
+ numpy