Spaces:

Luongsosad
/

video

Running

App Files Files Community

Luongsosad commited on May 13

Commit

46649d8

1 Parent(s): 23ca8b2

ud

Browse files

Files changed (1) hide show

app.py +68 -28

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ import numpy as np
 from PIL import Image, ImageDraw, ImageFont
 import json
 import os
 def wrap_text(text, max_width=40):
     words = text.split()
@@ -19,25 +20,45 @@ def wrap_text(text, max_width=40):
     return '\n'.join(lines)
 def create_text_image(text, width, height):
-    # Tạo ảnh trắng bằng Pillow
-    img = Image.new('RGB', (width, height), color=(0, 0, 0))
     draw = ImageDraw.Draw(img)
     font = ImageFont.truetype("DejaVuSans-Bold.ttf", 30)
-    wrapped_text = wrap_text(text)
-    draw.text((10, 10), wrapped_text, font=font, fill="white")
-    return np.array(img)
-def create_video_from_images(images, scripts, durations, audio_path, output_path):
     height, width, _ = images[0].shape
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, 24, (width, height))
-    for img, txt, dur in zip(images, scripts, durations):
         text_img = create_text_image(txt, width, height)
-        final_img = cv2.addWeighted(img, 1, text_img, 0.5, 0)  # Chồng ảnh và văn bản
-        out.write(final_img)
     out.release()
@@ -46,40 +67,59 @@ def generate_video(image_files, script_input, duration_input, audio_file):
         scripts = json.loads(script_input)
         durations = json.loads(duration_input)
     except Exception as e:
-        return None, f"❌ Lỗi khi phân tích đầu vào:\n{e}"
     if len(image_files) != len(scripts) or len(scripts) != len(durations):
-        return None, "❌ Số lượng ảnh, scripts và durations phải bằng nhau!"
     try:
-        image_paths = [img.name for img in image_files]
-        audio_path = audio_file.name
-        output_video_path = os.path.join("outputs", "final_output.mp4")
         os.makedirs("outputs", exist_ok=True)
-        images = [cv2.imread(img_path) for img_path in image_paths]
-        create_video_from_images(images, scripts, durations, audio_path, output_video_path)
-        return output_video_path, "✅ Video tạo thành công!"
     except Exception as e:
         import traceback
-        return None, f"❌ Lỗi khi tạo video:\n{traceback.format_exc()}"
-# Gradio Interface
 demo = gr.Interface(
     fn=generate_video,
     inputs=[
-        gr.File(file_types=["image"], label="Ảnh (nhiều)", file_count="multiple"),
-        gr.Textbox(label="Scripts (danh sách)", placeholder="['Chào bạn', 'Video demo']"),
-        gr.Textbox(label="Durations (giây)", placeholder="[3, 4]"),
-        gr.File(file_types=["audio"], label="Nhạc nền (.mp3 hoặc .wav)")
     ],
     outputs=[
-        gr.Video(label="Video kết quả"),
-        gr.Textbox(label="Trạng thái", interactive=False)
     ],
-    title="Tạo video từ ảnh, chữ và nhạc",
-    description="Upload nhiều ảnh + đoạn chữ + nhạc nền để tạo video tự động."
 )
 if __name__ == "__main__":

 from PIL import Image, ImageDraw, ImageFont
 import json
 import os
+from moviepy.editor import AudioFileClip, VideoFileClip
 def wrap_text(text, max_width=40):
     words = text.split()
     return '\n'.join(lines)
 def create_text_image(text, width, height):
+    # tạo ảnh chữ nền đen (rgb)
+    img = Image.new('RGB', (width, height), (0, 0, 0))
     draw = ImageDraw.Draw(img)
     font = ImageFont.truetype("DejaVuSans-Bold.ttf", 30)
+    # xuống dòng theo chiều rộng
+    wrapped_text = wrap_text(text, max_width=int(width/18))
+    text_size = draw.multiline_textsize(wrapped_text, font=font)
+    # căn giữa ngang, dưới cách lề 20px
+    text_x = (width - text_size[0])//2
+    text_y = height - text_size[1] - 20
+    draw.multiline_text((text_x, text_y), wrapped_text, font=font, fill="white", align="center")
+    # chuyển sang bgr để cv2.addWeighted
+    return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+def create_video_from_images(images, scripts, durations, output_path):
     height, width, _ = images[0].shape
     fourcc = cv2.VideoWriter_fourcc(*'mp4v')
     out = cv2.VideoWriter(output_path, fourcc, 24, (width, height))
+    for idx, (img, txt, dur) in enumerate(zip(images, scripts, durations)):
+        total_frames = int(dur * 24)
+        if total_frames <= 0:
+            print(f"⚠️ ảnh {idx+1} bỏ qua do duration={dur}")
+            continue
         text_img = create_text_image(txt, width, height)
+        for i in range(total_frames):
+            zoom_scale = 1 + 0.05 * (i / total_frames)
+            new_w = int(width * zoom_scale)
+            new_h = int(height * zoom_scale)
+            zoomed = cv2.resize(img, (new_w, new_h))
+            # crop về size gốc
+            sx = (new_w - width)//2
+            sy = (new_h - height)//2
+            cropped = zoomed[sy:sy+height, sx:sx+width]
+            frame = cv2.addWeighted(cropped, 1.0, text_img, 0.8, 0)
+            out.write(frame)
     out.release()
         scripts = json.loads(script_input)
         durations = json.loads(duration_input)
     except Exception as e:
+        return None, f"❌ lỗi parse script/duration:\n{e}"
     if len(image_files) != len(scripts) or len(scripts) != len(durations):
+        return None, "❌ số lượng ảnh, scripts và durations phải bằng nhau!"
     try:
         os.makedirs("outputs", exist_ok=True)
+        audio_path = audio_file.name
+        output_video = os.path.join("outputs", "final_output.mp4")
+        images, vs, vd = [], [], []
+        for f, s, d in zip(image_files, scripts, durations):
+            img = cv2.imread(f.name)
+            if img is not None:
+                images.append(img); vs.append(s); vd.append(d)
+            else:
+                print(f"❌ không đọc được ảnh: {f.name}")
+        if not images:
+            return None, "❌ không có ảnh hợp lệ!"
+        print(f"✅ đọc được {len(images)} ảnh hợp lệ.")
+        print("durations:", vd)
+        create_video_from_images(images, vs, vd, output_video)
+        # thêm audio
+        video_clip = VideoFileClip(output_video)
+        # cắt audio theo đúng độ dài video
+        audio_clip = AudioFileClip(audio_path).subclip(0, video_clip.duration)
+        final = video_clip.set_audio(audio_clip)
+        final.write_videofile(output_video, codec="libx264", audio_codec="aac", fps=24)
+        return output_video, "✅ video tạo thành công!"
     except Exception as e:
         import traceback
+        return None, f"❌ lỗi khi tạo video:\n{traceback.format_exc()}"
+# giao diện gradio
 demo = gr.Interface(
     fn=generate_video,
     inputs=[
+        gr.File(file_types=["image"], label="ảnh (nhiều)", file_count="multiple"),
+        gr.Textbox(label="scripts (danh sách)", placeholder="['chào', 'xin chào', ...]"),
+        gr.Textbox(label="durations (giây)", placeholder="[3.5, 4.2, ...]"),
+        gr.File(file_types=["audio"], label="nhạc nền (.mp3/.wav)")
     ],
     outputs=[
+        gr.Video(label="video kết quả"),
+        gr.Textbox(label="trạng thái", interactive=False)
     ],
+    title="tạo video từ ảnh, chữ và nhạc",
+    description="upload nhiều ảnh + script + nhạc nền để tự động tạo video."
 )
 if __name__ == "__main__":