Spaces:
Runtime error
Runtime error
File size: 6,266 Bytes
7ae35c9 dbe4319 5a834fc 4786e02 9bea5a2 7a59ca0 9bea5a2 4c35324 9bea5a2 7d90627 3638d85 9bea5a2 e395658 9bea5a2 3638d85 e395658 9bea5a2 3f22c4a 394d306 9bea5a2 96f47bd 6700b95 96f47bd 9bea5a2 e95a1cd 7d90627 e95a1cd b9d9615 3f22c4a 5a834fc e95a1cd 9bea5a2 3638d85 9bea5a2 5a834fc e95a1cd 3f22c4a 7d90627 9bea5a2 4c35324 e95a1cd 4c35324 e95a1cd 3638d85 9bea5a2 e95a1cd 5a834fc e95a1cd 9bea5a2 3638d85 3f22c4a 3638d85 9bea5a2 3638d85 9bea5a2 3638d85 3f22c4a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import gradio as gr
from transformers import pipeline
from diffusers import StableDiffusionPipeline
import torch
from PIL import Image, ImageDraw, ImageFont
import scipy.io.wavfile
from TTS.api import TTS
from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips, VideoFileClip
import os
from groq import Groq
from deepgram import Deepgram
import asyncio
import aiohttp
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Initialize Clients
groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
deepgram_client = Deepgram(api_key=os.getenv("DEEPGRAM_API_KEY"))
# Use GPT-3.5-turbo for text generation
async def generate_comedy_script(prompt):
chat_completion = await groq_client.chat.completions.create(
messages=[
{
"role": "system",
"content": "You are a comedy writer. Generate a short, funny script based on the given prompt."
},
{
"role": "user",
"content": prompt
}
],
model="mixtral-8x7b-32768",
max_tokens=200
)
return chat_completion.choices[0].message.content
# Use Coqui TTS for text-to-speech (unchanged)
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
# Use MusicGen for music generation (unchanged)
music_generator = pipeline("text-to-audio", model="facebook/musicgen-small", device="cpu")
# Use Fluently Anime (Stable Diffusion) for anime image generation (unchanged)
model_id = "fluently/Fluently-anime"
anime_image_generator = StableDiffusionPipeline.from_pretrained(model_id).to("cpu")
# Convert Text to Speech using Coqui TTS (unchanged)
def text_to_speech(script):
output_audio = 'output.wav'
tts.tts_to_file(text=script, file_path=output_audio)
return output_audio
# Create Anime Images Using Fluently Anime (unchanged)
def create_images_from_script(script):
lines = script.split('. ')
image_paths = []
for i, line in enumerate(lines):
img = anime_image_generator(line).images[0]
img_path = f'/tmp/anime_image_{i}.png'
img.save(img_path)
image_paths.append(img_path)
return image_paths
# Generate Fun Music Track using MusicGen (unchanged)
def generate_fun_music(prompt, output_music_file="fun_music.wav"):
response = music_generator(prompt)
audio_data = response["audio"]
sampling_rate = response["sampling_rate"]
scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
return output_music_file
# Create Video from Generated Anime Images (unchanged)
def generate_text_video(script):
image_paths = create_images_from_script(script)
video_clip = ImageSequenceClip(image_paths, fps=24)
video_path = "/tmp/final_video.mp4"
video_clip.write_videofile(video_path, codec='libx264')
return video_path
# Combine Audio and Video (unchanged)
def combine_audio_video(video_file, audio_file):
video = VideoFileClip(video_file)
audio = AudioFileClip(audio_file)
final_video = video.set_audio(audio)
return final_video
# Main Function to Generate Comedy Animation
async def generate_comedy_and_animation(prompt):
script = await generate_comedy_script(prompt)
audio_file = text_to_speech(script)
video_file = generate_text_video(script)
fun_music = generate_fun_music(prompt)
final_video = combine_audio_video(video_file, fun_music)
return script, audio_file, final_video
# Generate Kids Content (unchanged)
def generate_kids_content(theme):
music_file = generate_fun_music(theme, output_music_file="kids_music.wav")
clips = []
for i in range(5):
img = Image.new('RGB', (800, 400), color=(0, 0, 255))
d = ImageDraw.Draw(img)
fnt = ImageFont.load_default()
d.text((10, 180), f"Kids Music: {theme}", font=fnt, fill=(255, 255, 0))
frame_path = f'/tmp/kids_temp_{i}.png'
img.save(frame_path)
clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center')))
final_video = concatenate_videoclips(clips, method="compose").set_audio(AudioFileClip(music_file))
final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
return music_file, "/tmp/kids_animation.mp4"
# New function for speech-to-text
async def transcribe_audio(audio_file):
with open(audio_file, 'rb') as audio:
source = {'buffer': audio, 'mimetype': 'audio/wav'}
response = await deepgram_client.transcription.prerecorded(source, {'smart_format': True, 'model': 'general'})
return response['results']['channels'][0]['alternatives'][0]['transcript']
# Gradio Interface
with gr.Blocks() as app:
gr.Markdown("## AI Comedy and Kids Content Generator")
# Comedy Animation Tab
with gr.Tab("Generate Comedy Animation"):
prompt_input = gr.Textbox(label="Comedy Prompt")
generate_btn = gr.Button("Generate Comedy Script and Animation")
comedy_script = gr.Textbox(label="Generated Script")
comedy_audio = gr.Audio(label="Generated Audio")
comedy_video = gr.Video(label="Generated Animation")
generate_btn.click(
generate_comedy_and_animation,
inputs=prompt_input,
outputs=[comedy_script, comedy_audio, comedy_video]
)
# Kids Music Animation Tab
with gr.Tab("Generate Kids Music Animation"):
theme_input = gr.Textbox(label="Kids Music Theme")
generate_music_btn = gr.Button("Generate Kids Music and Animation")
kids_music_audio = gr.Audio(label="Generated Music")
kids_music_video = gr.Video(label="Generated Kids Animation")
generate_music_btn.click(
generate_kids_content,
inputs=theme_input,
outputs=[kids_music_audio, kids_music_video]
)
# New Speech-to-Text Tab
with gr.Tab("Speech-to-Text"):
audio_input = gr.Audio(label="Upload Audio")
transcribe_btn = gr.Button("Transcribe Audio")
transcription_output = gr.Textbox(label="Transcription")
transcribe_btn.click(
transcribe_audio,
inputs=audio_input,
outputs=transcription_output
)
app.launch()
|