File size: 2,748 Bytes
8b4daa9
 
 
 
 
 
 
 
 
 
 
fda4fd0
8b4daa9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import json
import random
import os
from diffusers import StableDiffusionPipeline
import torch

# Load and cache the diffusion pipeline (only once)
pipe = StableDiffusionPipeline.from_pretrained(
    "CompVis/stable-diffusion-v1-4",
    torch_dtype=torch.float16
)
pipe = pipe.to("cpu")


def generate_keyframe_prompt(segment):
    """
    Generates a detailed prompt optimized for Stable Diffusion (low-resolution, preview style)
    based on the segment description.
    """
    description = segment.get("description", "")
    speaker = segment.get("speaker", "")
    narration = segment.get("narration", "")
    segment_id = segment.get("segment_id")

    prompt_parts = []

    if description:
        prompt_parts.append(f"Scene: {description}.")

    if speaker and narration:
        prompt_parts.append(f"Character '{speaker}' speaking: \"{narration}\".")
    elif narration:
        prompt_parts.append(f"Narration: \"{narration}\".")

    prompt_parts.append("Style: Simple, cartoonish, line art, sketch, low detail, illustrative, minimal background, focus on main subject.")
    prompt_parts.append("Resolution: lowres, 256x256.")
    prompt_parts.append("Lighting: Nighttime museum, dim lighting.")
    prompt_parts.append("Setting: Museum interior, exhibits.")

    negative_prompt = "blurry, distorted, ugly, tiling, poorly drawn, out of frame, disfigured, deformed, bad anatomy, watermark, text, signature, high detail, realistic, photorealistic, complex"

    return {
        "prompt": " ".join(prompt_parts).strip(),
        "negative_prompt": negative_prompt
    }


def generate_all_keyframe_images(script_data, output_dir="keyframes"):
    """
    Generates 3 keyframe images per segment using Stable Diffusion,
    stores them in the given output directory.
    """
    os.makedirs(output_dir, exist_ok=True)
    keyframe_outputs = []

    for segment in script_data:
        sd_prompts = generate_keyframe_prompt(segment)
        prompt = sd_prompts["prompt"]
        negative_prompt = sd_prompts["negative_prompt"]
        segment_id = segment.get("segment_id")

        frame_images = []
        for i in range(3):
            image = pipe(prompt, negative_prompt=negative_prompt, num_inference_steps=20, guidance_scale=7.5, height=256, width=256).images[0]
            image_path = os.path.join(output_dir, f"segment_{segment_id}_v{i+1}.png")
            image.save(image_path)
            frame_images.append(image_path)

        keyframe_outputs.append({
            "segment_id": segment_id,
            "prompt": prompt,
            "negative_prompt": negative_prompt,
            "frame_images": frame_images
        })

        print(f"✓ Generated 3 images for Segment {segment_id}")

    return keyframe_outputs