Manasa1 commited on
Commit
ce142bb
·
verified ·
1 Parent(s): f020f7d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -30
app.py CHANGED
@@ -4,31 +4,34 @@ from diffusers import StableDiffusionPipeline
4
  import torch
5
  from PIL import Image, ImageDraw, ImageFont
6
  import scipy.io.wavfile
7
- from TTS.api import TTS # Coqui TTS (open source)
8
  from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips
9
  import os
10
  from groq import Groq
 
11
 
12
- # Load and Initialize Models
13
- # Use Groq for text generation
14
- client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
15
 
16
- # Use Stable Diffusion (open-source) for image generation
17
- image_generator = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float16).to("cpu")
18
 
19
- # Use Coqui TTS (open-source) for text-to-speech
20
- tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
21
 
 
 
 
 
 
 
 
22
  def generate_comedy_script(prompt):
23
- response = client.chat.completions.create(
24
- messages=[
25
- {"role": "user", "content": "write a comedy scipt on {prompt}"}
26
- ],
27
- model="llama3-8b-8192")
28
- script = response.choices[0].message.content
29
  return script
30
 
31
- # Convert Text to Speech using Coqui TTS
32
  def text_to_speech(script):
33
  output_audio = 'output.wav'
34
  tts.tts_to_file(text=script, file_path=output_audio)
@@ -45,21 +48,29 @@ def create_images_from_script(script):
45
  image_paths.append(img_path)
46
  return image_paths
47
 
 
48
  def generate_fun_music(prompt, output_music_file="fun_music.wav"):
49
- # Use an open-source music generation model
50
- music_generator = pipeline("text-to-audio", model="facebook/musicgen-small", device="cpu")
51
  response = music_generator(prompt)
 
 
52
  audio_data = response["audio"]
53
  sampling_rate = response["sampling_rate"]
 
 
54
  scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
 
55
  return output_music_file
56
 
57
-
58
- # Create Video from Generated Images Using ToonCrafter
59
  def generate_text_video(script):
60
  image_paths = create_images_from_script(script)
61
- tooncrafter_animation = tooncrafter.generate_animation(image_paths)
62
- tooncrafter_animation.write_videofile("/tmp/final_video.mp4", fps=24)
 
 
 
 
63
  return "/tmp/final_video.mp4"
64
 
65
  # Combine Audio and Video
@@ -79,8 +90,8 @@ def generate_comedy_and_animation(prompt):
79
  final_video = combine_audio_video(video_file, fun_music)
80
  return script, audio_file, final_video
81
 
82
- # Generate Kids Music Animation
83
- def generate_kids_animation_with_music(theme, output_video_file="kids_animation.mp4"):
84
  music_file = generate_fun_music(theme, output_music_file="kids_music.wav")
85
  clips = []
86
  for i in range(5):
@@ -93,13 +104,8 @@ def generate_kids_animation_with_music(theme, output_video_file="kids_animation.
93
  clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center')))
94
  final_video = CompositeVideoClip(clips, size=(800, 400))
95
  final_video = final_video.set_audio(AudioFileClip(music_file))
96
- final_video.write_videofile(output_video_file, fps=24)
97
- return music_file, output_video_file
98
-
99
- # Main Function to Generate Kids Content
100
- def generate_kids_content(theme):
101
- music_file, video_file = generate_kids_animation_with_music(theme)
102
- return music_file, video_file
103
 
104
  # Gradio Interface
105
  with gr.Blocks() as app:
@@ -141,3 +147,4 @@ app.launch()
141
 
142
 
143
 
 
 
4
  import torch
5
  from PIL import Image, ImageDraw, ImageFont
6
  import scipy.io.wavfile
7
+ from TTS.api import TTS
8
  from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips
9
  import os
10
  from groq import Groq
11
+ from deepgram import Deepgram
12
 
13
+ # Initialize Clients
14
+ groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
15
+ deepgram_client = Deepgram(api_key=os.environ.get("DEEGRAM_API_KEY"))
16
 
17
+ # Use DistilGPT-2 for text generation
18
+ script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100)
19
 
20
+ # Use Whisper for text-to-speech
21
+ tts = TTS(model_name="whisper", progress_bar=False, gpu=False)
22
 
23
+ # Use MusicLM for music generation
24
+ music_generator = pipeline("text-to-audio", model="musiclm", device="cpu")
25
+
26
+ # Use Stable Diffusion for image generation
27
+ image_generator = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float32).to("cpu")
28
+
29
+ # Generate Comedy Script using DistilGPT-2
30
  def generate_comedy_script(prompt):
31
+ script = script_generator(prompt)[0]['generated_text']
 
 
 
 
 
32
  return script
33
 
34
+ # Convert Text to Speech using Whisper
35
  def text_to_speech(script):
36
  output_audio = 'output.wav'
37
  tts.tts_to_file(text=script, file_path=output_audio)
 
48
  image_paths.append(img_path)
49
  return image_paths
50
 
51
+ # Generate Fun Music Track using MusicLM
52
  def generate_fun_music(prompt, output_music_file="fun_music.wav"):
53
+ # Generate music based on the prompt using MusicLM
 
54
  response = music_generator(prompt)
55
+
56
+ # Extract audio and sampling rate from the response
57
  audio_data = response["audio"]
58
  sampling_rate = response["sampling_rate"]
59
+
60
+ # Save the generated music to a file
61
  scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
62
+
63
  return output_music_file
64
 
65
+ # Create Video from Generated Images
 
66
  def generate_text_video(script):
67
  image_paths = create_images_from_script(script)
68
+ clips = []
69
+ for img_path in image_paths:
70
+ image_clip = ImageClip(img_path).set_duration(3).set_position(('center', 'center'))
71
+ clips.append(image_clip)
72
+ final_video = concatenate_videoclips(clips, method="compose")
73
+ final_video.write_videofile("/tmp/final_video.mp4", fps=24)
74
  return "/tmp/final_video.mp4"
75
 
76
  # Combine Audio and Video
 
90
  final_video = combine_audio_video(video_file, fun_music)
91
  return script, audio_file, final_video
92
 
93
+ # Generate Kids Content
94
+ def generate_kids_content(theme):
95
  music_file = generate_fun_music(theme, output_music_file="kids_music.wav")
96
  clips = []
97
  for i in range(5):
 
104
  clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center')))
105
  final_video = CompositeVideoClip(clips, size=(800, 400))
106
  final_video = final_video.set_audio(AudioFileClip(music_file))
107
+ final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
108
+ return music_file, "/tmp/kids_animation.mp4"
 
 
 
 
 
109
 
110
  # Gradio Interface
111
  with gr.Blocks() as app:
 
147
 
148
 
149
 
150
+