File size: 4,889 Bytes
7ae35c9
5a834fc
ac8612f
 
 
 
 
438c407
6125413
ac8612f
 
 
 
 
 
 
859e5f2
ac8612f
 
 
 
 
 
 
 
 
859e5f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438c407
859e5f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438c407
859e5f2
 
438c407
859e5f2
 
 
 
 
 
 
ac8612f
859e5f2
ac8612f
859e5f2
 
 
 
 
 
 
 
ac8612f
 
859e5f2
ac8612f
 
6125413
 
859e5f2
ac8612f
 
 
6125413
3638d85
 
859e5f2
6125413
859e5f2
 
 
 
 
 
 
 
 
 
 
9bea5a2
3638d85
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import gradio as gr
import torch
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
from TTS.api import TTS
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import re

# Initialize text generation model (GPT-2)
tokenizer = AutoTokenizer.from_pretrained("gpt2")
model = AutoModelForCausalLM.from_pretrained("gpt2")

# Initialize TTS model
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC")

def generate_text(prompt, max_length=200):
    input_ids = tokenizer.encode(prompt, return_tensors="pt")
    output = model.generate(input_ids, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2)
    return tokenizer.decode(output[0], skip_special_tokens=True)

def generate_speech(text):
    output_path = "generated_speech.wav"
    tts.tts_to_file(text=text, file_path=output_path)
    return output_path

def parse_script(script):
    lines = script.split('\n')
    scenes = []
    current_scene = {"characters": set(), "actions": []}
    
    for line in lines:
        if line.strip():
            if ':' in line:
                character, action = line.split(':', 1)
                current_scene["characters"].add(character.strip())
                current_scene["actions"].append((character.strip(), action.strip()))
            else:
                if current_scene["actions"]:
                    scenes.append(current_scene)
                    current_scene = {"characters": set(), "actions": []}
    
    if current_scene["actions"]:
        scenes.append(current_scene)
    
    return scenes

def create_stick_figure(ax, x, y, color):
    circle = plt.Circle((x, y+0.1), 0.1, fc=color)
    line = plt.Line2D([x, x], [y-0.3, y], color=color)
    left_arm = plt.Line2D([x-0.2, x], [y, y-0.1], color=color)
    right_arm = plt.Line2D([x, x+0.2], [y-0.1, y], color=color)
    left_leg = plt.Line2D([x-0.1, x], [y-0.5, y-0.3], color=color)
    right_leg = plt.Line2D([x, x+0.1], [y-0.3, y-0.5], color=color)
    
    ax.add_artist(circle)
    ax.add_artist(line)
    ax.add_artist(left_arm)
    ax.add_artist(right_arm)
    ax.add_artist(left_leg)
    ax.add_artist(right_leg)

def animate_scene(scene, ax):
    characters = list(scene["characters"])
    colors = plt.cm.get_cmap('Set3')(np.linspace(0, 1, len(characters)))
    character_positions = {char: (i/(len(characters)-1) if len(characters) > 1 else 0.5, 0.5) for i, char in enumerate(characters)}
    
    def init():
        ax.clear()
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')
        for char, (x, y) in character_positions.items():
            create_stick_figure(ax, x, y, colors[characters.index(char)])
        return []

    def animate(frame):
        ax.clear()
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.axis('off')
        
        action = scene["actions"][frame % len(scene["actions"])]
        speaking_char, text = action
        
        for char, (x, y) in character_positions.items():
            if char == speaking_char:
                y += 0.05 * np.sin(frame * 0.5)  # Make the speaking character bounce
            create_stick_figure(ax, x, y, colors[characters.index(char)])
        
        ax.text(0.5, 0.9, text, ha='center', va='center', wrap=True)
        return []

    return animation.FuncAnimation(fig, animate, init_func=init, frames=len(scene["actions"])*5, interval=1000, blit=True)

def create_character_animation(script):
    scenes = parse_script(script)
    
    fig, ax = plt.subplots(figsize=(10, 6))
    animations = [animate_scene(scene, ax) for scene in scenes]
    
    # Combine all animations
    combined_animation = animation.ArtistAnimation(fig, sum([anim._framedata for anim in animations], []), interval=1000, blit=True, repeat_delay=1000)
    
    # Save animation as gif
    combined_animation.save('character_animation.gif', writer='pillow')
    
    return 'character_animation.gif'

def generate_comedy_animation(prompt):
    script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.")
    animation_path = create_character_animation(script)
    speech_path = generate_speech(script)
    return script, animation_path, speech_path

# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("## Character-based Animation Generator")
    
    comedy_prompt = gr.Textbox(label="Enter comedy prompt")
    comedy_generate_btn = gr.Button("Generate Comedy Animation")
    comedy_script = gr.Textbox(label="Generated Comedy Script")
    comedy_animation = gr.Image(label="Comedy Animation")
    comedy_audio = gr.Audio(label="Comedy Speech")

    comedy_generate_btn.click(
        generate_comedy_animation,
        inputs=comedy_prompt,
        outputs=[comedy_script, comedy_animation, comedy_audio]
    )

app.launch()