Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
from TTS.api import TTS | |
import numpy as np | |
from PIL import Image | |
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler | |
from torchvision.io import write_video | |
import os | |
# Initialize text generation model (GPT-2) | |
tokenizer = AutoTokenizer.from_pretrained("gpt2") | |
model = AutoModelForCausalLM.from_pretrained("gpt2") | |
# Initialize TTS model | |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC") | |
# Initialize Stable Diffusion pipeline | |
pipe = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4", torch_dtype=torch.float16) | |
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config) | |
pipe = pipe.to("cpu") | |
def generate_text(prompt, max_length=200): | |
input_ids = tokenizer.encode(prompt, return_tensors="pt") | |
attention_mask = torch.ones_like(input_ids) | |
output = model.generate(input_ids, attention_mask=attention_mask, max_length=max_length, num_return_sequences=1, no_repeat_ngram_size=2, pad_token_id=tokenizer.eos_token_id) | |
return tokenizer.decode(output[0], skip_special_tokens=True) | |
def generate_speech(text): | |
output_path = "generated_speech.wav" | |
tts.tts_to_file(text=text, file_path=output_path) | |
return output_path | |
def generate_video_frames(prompt, num_frames=30): | |
frames = [] | |
for i in range(num_frames): | |
# Add some variation to the prompt for each frame | |
frame_prompt = f"{prompt}, frame {i+1} of {num_frames}" | |
image = pipe(frame_prompt).images[0] | |
frames.append(np.array(image)) | |
return frames | |
def create_video_from_frames(frames, output_path="output_video.mp4", fps=10): | |
frames_tensor = torch.from_numpy(np.array(frames)).permute(0, 3, 1, 2) | |
write_video(output_path, frames_tensor, fps=fps) | |
return output_path | |
def generate_comedy_animation(prompt): | |
script = generate_text(f"Write a short comedy script with two characters about {prompt}. Use the format 'Character: Action' for each line of dialogue or action.") | |
video_prompt = f"A comedic scene with two characters: {prompt}" | |
frames = generate_video_frames(video_prompt) | |
video_path = create_video_from_frames(frames) | |
speech_path = generate_speech(script) | |
return script, video_path, speech_path | |
def generate_kids_music_animation(theme): | |
lyrics = generate_text(f"Write short and simple lyrics for a children's song about {theme}. Each line should be on a new line. Don't include 'Verse' or 'Chorus' labels.") | |
video_prompt = f"A colorful, animated music video for children about {theme}" | |
frames = generate_video_frames(video_prompt) | |
video_path = create_video_from_frames(frames) | |
speech_path = generate_speech(lyrics) | |
return lyrics, video_path, speech_path | |
# Gradio Interface | |
with gr.Blocks() as app: | |
gr.Markdown("## AI-Generated Video and Audio Content") | |
with gr.Tab("Comedy Animation"): | |
comedy_prompt = gr.Textbox(label="Enter comedy prompt") | |
comedy_generate_btn = gr.Button("Generate Comedy Animation") | |
comedy_script = gr.Textbox(label="Generated Comedy Script") | |
comedy_animation = gr.Video(label="Comedy Animation") | |
comedy_audio = gr.Audio(label="Comedy Speech") | |
comedy_generate_btn.click( | |
generate_comedy_animation, | |
inputs=comedy_prompt, | |
outputs=[comedy_script, comedy_animation, comedy_audio] | |
) | |
with gr.Tab("Kids Music Animation"): | |
music_theme = gr.Textbox(label="Enter music theme for kids") | |
music_generate_btn = gr.Button("Generate Kids Music Animation") | |
music_lyrics = gr.Textbox(label="Generated Lyrics") | |
music_animation = gr.Video(label="Music Animation") | |
music_audio = gr.Audio(label="Music Audio") | |
music_generate_btn.click( | |
generate_kids_music_animation, | |
inputs=music_theme, | |
outputs=[music_lyrics, music_animation, music_audio] | |
) | |
app.launch() | |