Spaces:

Luongsosad
/

video

Running

App Files Files Community

Luongsosad commited on May 14

Commit

c5fce8d

1 Parent(s): 46649d8

ud

Browse files

Files changed (2) hide show

app.py +155 -93
requirements.txt +4 -1

app.py CHANGED Viewed

@@ -1,126 +1,188 @@
-import gradio as gr
-import cv2
 import numpy as np
-from PIL import Image, ImageDraw, ImageFont
 import json
-import os
-from moviepy.editor import AudioFileClip, VideoFileClip
-def wrap_text(text, max_width=40):
     words = text.split()
     lines = []
-    line = ''
     for word in words:
-        if len(line + ' ' + word) <= max_width:
-            line += ' ' + word
         else:
-            lines.append(line.strip())
-            line = word
-    lines.append(line.strip())
-    return '\n'.join(lines)
-def create_text_image(text, width, height):
-    # tạo ảnh chữ nền đen (rgb)
-    img = Image.new('RGB', (width, height), (0, 0, 0))
-    draw = ImageDraw.Draw(img)
-    font = ImageFont.truetype("DejaVuSans-Bold.ttf", 30)
-    # xuống dòng theo chiều rộng
-    wrapped_text = wrap_text(text, max_width=int(width/18))
-    text_size = draw.multiline_textsize(wrapped_text, font=font)
-    # căn giữa ngang, dưới cách lề 20px
-    text_x = (width - text_size[0])//2
-    text_y = height - text_size[1] - 20
-    draw.multiline_text((text_x, text_y), wrapped_text, font=font, fill="white", align="center")
-    # chuyển sang bgr để cv2.addWeighted
-    return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
-def create_video_from_images(images, scripts, durations, output_path):
     height, width, _ = images[0].shape
-    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
-    out = cv2.VideoWriter(output_path, fourcc, 24, (width, height))
-    for idx, (img, txt, dur) in enumerate(zip(images, scripts, durations)):
-        total_frames = int(dur * 24)
-        if total_frames <= 0:
-            print(f"⚠️ ảnh {idx+1} bỏ qua do duration={dur}")
-            continue
-        text_img = create_text_image(txt, width, height)
-        for i in range(total_frames):
-            zoom_scale = 1 + 0.05 * (i / total_frames)
-            new_w = int(width * zoom_scale)
-            new_h = int(height * zoom_scale)
-            zoomed = cv2.resize(img, (new_w, new_h))
-            # crop về size gốc
-            sx = (new_w - width)//2
-            sy = (new_h - height)//2
-            cropped = zoomed[sy:sy+height, sx:sx+width]
-            frame = cv2.addWeighted(cropped, 1.0, text_img, 0.8, 0)
-            out.write(frame)
-    out.release()
-def generate_video(image_files, script_input, duration_input, audio_file):
     try:
         scripts = json.loads(script_input)
         durations = json.loads(duration_input)
     except Exception as e:
-        return None, f"❌ lỗi parse script/duration:\n{e}"
     if len(image_files) != len(scripts) or len(scripts) != len(durations):
-        return None, "❌ số lượng ảnh, scripts và durations phải bằng nhau!"
     try:
         os.makedirs("outputs", exist_ok=True)
         audio_path = audio_file.name
-        output_video = os.path.join("outputs", "final_output.mp4")
-        images, vs, vd = [], [], []
-        for f, s, d in zip(image_files, scripts, durations):
-            img = cv2.imread(f.name)
-            if img is not None:
-                images.append(img); vs.append(s); vd.append(d)
-            else:
-                print(f"❌ không đọc được ảnh: {f.name}")
         if not images:
-            return None, "❌ không có ảnh hợp lệ!"
-        print(f"✅ đọc được {len(images)} ảnh hợp lệ.")
-        print("durations:", vd)
-        create_video_from_images(images, vs, vd, output_video)
-        # thêm audio
-        video_clip = VideoFileClip(output_video)
-        # cắt audio theo đúng độ dài video
-        audio_clip = AudioFileClip(audio_path).subclip(0, video_clip.duration)
-        final = video_clip.set_audio(audio_clip)
-        final.write_videofile(output_video, codec="libx264", audio_codec="aac", fps=24)
-        return output_video, "✅ video tạo thành công!"
     except Exception as e:
         import traceback
-        return None, f"❌ lỗi khi tạo video:\n{traceback.format_exc()}"
-# giao diện gradio
 demo = gr.Interface(
     fn=generate_video,
     inputs=[
-        gr.File(file_types=["image"], label="ảnh (nhiều)", file_count="multiple"),
-        gr.Textbox(label="scripts (danh sách)", placeholder="['chào', 'xin chào', ...]"),
-        gr.Textbox(label="durations (giây)", placeholder="[3.5, 4.2, ...]"),
-        gr.File(file_types=["audio"], label="nhạc nền (.mp3/.wav)")
     ],
     outputs=[
-        gr.Video(label="video kết quả"),
-        gr.Textbox(label="trạng thái", interactive=False)
     ],
-    title="tạo video từ ảnh, chữ và nhạc",
-    description="upload nhiều ảnh + script + nhạc nền để tự động tạo video."
 )
 if __name__ == "__main__":
-    demo.launch()

+import ffmpeg
+import os
 import numpy as np
+from PIL import Image
 import json
+import gradio as gr
+from concurrent.futures import ThreadPoolExecutor
+import tempfile
+# os.environ["PATH"] += os.pathsep + r"D:\Downloads\ffmpeg-7.1.1-essentials_build\ffmpeg-7.1.1-essentials_build\bin"
+def wrap_text(text, max_width=1060, font_size=20):
     words = text.split()
     lines = []
+    current_line = ""
     for word in words:
+        if len(current_line + " " + word) * font_size <= max_width:
+            current_line += " " + word
         else:
+            lines.append(current_line.strip())
+            current_line = word
+    if current_line:
+        lines.append(current_line.strip())
+    return "\n".join(lines)
+def calculate_text_height(text, font_size=20, line_spacing=10):
+    lines = text.split("\n")
+    return len(lines) * font_size + (len(lines) - 1) * line_spacing
+def create_single_video(args):
+    img, script, dur, output_path, width, height, fps = args
+    temp_img_path = tempfile.NamedTemporaryFile(suffix='.png', delete=False).name
+    Image.fromarray(img).save(temp_img_path)
+    d_frames = int(dur * fps)
+    wrapped_text = wrap_text(script, max_width=1060, font_size=20)
+    wrapped_text = wrapped_text.replace(":", "\\:").replace("'", "\\'")
+    text_height = calculate_text_height(wrapped_text, font_size=20, line_spacing=10)
+    y_position = height - text_height - 30
+    try:
+        os.makedirs(os.path.dirname(output_path), exist_ok=True)
+        stream = ffmpeg.input(temp_img_path, loop=1, t=dur)
+        stream = stream.output(output_path, **{
+            'vf': (
+                f"scale=2400:-1,"
+                f"zoompan=z='min(zoom+0.0002,1.5)':x='floor(iw/2-(iw/zoom/2))':y='floor(ih/2-(ih/zoom/2))':d={d_frames}:s={width}x{height}:fps={fps},"
+                f"drawtext=text='{wrapped_text}':fontsize=20:fontcolor=white:x=(w-text_w)/2:y={y_position}:"
+                f"fontfile=fonts/Roboto-VariableFont_wdth\,wght.ttf:box=1:[email protected]:boxborderw=10:line_spacing=10,"
+                # f"noise=alls=10:allf=t+u"
+            ),
+            't': dur,
+            'pix_fmt': 'yuv420p',
+            'crf': '20',
+            'c:v': 'libx264',
+            'an': None
+        })
+        out, err = stream.run(capture_stdout=True, capture_stderr=True)
+    except ffmpeg.Error as e:
+        print('FFmpeg Error:', e.stderr.decode('utf-8'))
+        raise
+    finally:
+        if os.path.exists(temp_img_path):
+            os.remove(temp_img_path)
+    return output_path
+def create_video_from_images(images, scripts, durations, audio_path, output_path, fps=60):
     height, width, _ = images[0].shape
+    temp_dir = tempfile.mkdtemp()
+    video_paths = []
+    with ThreadPoolExecutor(max_workers=2) as executor:
+        tasks = [
+            (img, script, dur, os.path.join(temp_dir, f"temp_{i}.mp4"), width, height, fps)
+            for i, (img, script, dur) in enumerate(zip(images, scripts, durations))
+        ]
+        video_paths = list(executor.map(create_single_video, tasks))
+    concat_file = os.path.join(temp_dir, "concat.txt")
+    with open(concat_file, 'w') as f:
+        for path in video_paths:
+            f.write(f"file '{path}'\n")
+    ffmpeg.input(concat_file, format='concat', safe=0).output(output_path, c='copy', an=None).run()
+    final_output_path = output_path.replace(".mp4", "_with_audio.mp4")
+    video_input = ffmpeg.input(output_path)
+    audio_input = ffmpeg.input(audio_path)
+    ffmpeg.output(video_input, audio_input, final_output_path, vcodec='libx264', acodec='aac', shortest=None).run()
+    for path in video_paths:
+        if os.path.exists(path):
+            os.remove(path)
+    if os.path.exists(concat_file):
+        os.remove(concat_file)
+    if os.path.exists(temp_dir):
+        os.rmdir(temp_dir)
+    return final_output_path
+def generate_video(image_files, script_input, duration_input, audio_file, fps=60):
+    # try:
+    #     scripts = json.loads(script_input)
+    #     durations = json.loads(duration_input)
+    # except Exception as e:
+    #     return None, f"❌ Lỗi khi phân tích đầu vào:\n{e}"
+    # if len(image_files) != len(scripts) or len(scripts) != len(durations):
+    #     return None, "❌ Số lượng ảnh, scripts và durations phải bằng nhau!"
+    # try:
+    #     os.makedirs("outputs", exist_ok=True)
+    #     audio_path = audio_file.name
+    #     output_video_path = os.path.join("outputs", "temp_output.mp4")
+    #     images = []
+    #     target_size = (1080, 1920)
+    #     for idx, img_file in enumerate(image_files):
+    #         try:
+    #             img_pil = Image.open(img_file).convert("RGB")
+    #             img_pil = img_pil.resize(target_size, Image.Resampling.LANCZOS)
+    #             img = np.array(img_pil)
+    #             images.append(img)
+    #             print(f"✅ Ảnh {idx+1}: {img_file.name} - kích thước {img.shape}")
+    #         except Exception as e:
+    #             print(f"❌ Không đọc được ảnh {idx+1}: {img_file.name} - lỗi: {e}")
+    #     if not images:
+    #         return None, "❌ Không có ảnh nào hợp lệ để tạo video!"
+    #     final_video_path = create_video_from_images(images, scripts, durations, audio_path, output_video_path, fps)
+    #     return final_video_path, "✅ Video tạo thành công!"
+    # except Exception as e:
+    #     import traceback
+    #     return None, f"❌ Lỗi khi tạo video:\n{traceback.format_exc()}"
     try:
         scripts = json.loads(script_input)
         durations = json.loads(duration_input)
     except Exception as e:
+        return None, f"❌ Lỗi khi phân tích đầu vào:\n{e}"
     if len(image_files) != len(scripts) or len(scripts) != len(durations):
+        return None, "❌ Số lượng ảnh, scripts và durations phải bằng nhau!"
     try:
         os.makedirs("outputs", exist_ok=True)
         audio_path = audio_file.name
+        output_video_path = os.path.join("outputs", "temp_output.mp4")
+        images = []
+        for idx, img_file in enumerate(image_files):
+            try:
+                img_pil = Image.open(img_file).convert("RGB")
+                img = np.array(img_pil)
+                images.append(img)
+                print(f"✅ Ảnh {idx+1}: {img_file.name} - kích thước {img.shape}")
+            except Exception as e:
+                print(f"❌ Không đọc được ảnh {idx+1}: {img_file.name} - lỗi: {e}")
         if not images:
+            return None, "❌ Không có ảnh nào hợp lệ để tạo video!"
+        final_video_path = create_video_from_images(images, scripts, durations, audio_path, output_video_path, fps)
+        return final_video_path, "✅ Video tạo thành công!"
     except Exception as e:
         import traceback
+        return None, f"❌ Lỗi khi tạo video:\n{traceback.format_exc()}"
 demo = gr.Interface(
     fn=generate_video,
     inputs=[
+        gr.File(file_types=["image"], label="Ảnh (nhiều)", file_count="multiple"),
+        gr.Textbox(label="Scripts (danh sách)", placeholder="['Chào bạn', 'Video demo']"),
+        gr.Textbox(label="Durations (giây)", placeholder="[3, 4]"),
+        gr.File(file_types=["audio"], label="Nhạc nền (.mp3 hoặc .wav)"),
+        gr.Slider(minimum=1, maximum=120, step=1, label="FPS (frame/giây)", value=60),
     ],
     outputs=[
+        gr.Video(label="Video kết quả"),
+        gr.Textbox(label="Trạng thái", interactive=False),
     ],
+    title="Tạo video từ ảnh, chữ và nhạc",
+    description="Upload nhiều ảnh + đoạn chữ + nhạc nền để tạo video tự động."
 )
 if __name__ == "__main__":
+    demo.queue().launch()

requirements.txt CHANGED Viewed

@@ -2,4 +2,7 @@ moviepy==1.0.3
 Pillow==9.5.0
 imageio
 imageio-ffmpeg
-opencv-python

 Pillow==9.5.0
 imageio
 imageio-ffmpeg
+opencv-python
+ffmpeg-python
+numpy