Manasa1 commited on
Commit
3f22c4a
·
verified ·
1 Parent(s): a29d631

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -17
app.py CHANGED
@@ -5,15 +5,14 @@ import torch
5
  from PIL import Image, ImageDraw, ImageFont
6
  import scipy.io.wavfile
7
  from TTS.api import TTS # Coqui TTS (open source)
8
- from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips
9
  import os
10
- from groq import Groq
11
- from deepgram import Deepgram
12
  import subprocess
13
 
14
  # Initialize Clients
15
- groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
16
- deepgram_client = Deepgram(api_key=os.environ.get("DEEGRAM_API_KEY"))
 
17
 
18
  # Use DistilGPT-2 for text generation
19
  script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100)
@@ -24,8 +23,9 @@ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False,
24
  # Use MusicGen for music generation
25
  music_generator = pipeline("text-to-audio", model="facebook/musicgen-small", device="cpu")
26
 
27
- # Use Stable Diffusion for image generation
28
- image_generator = StableDiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-2-1-base", torch_dtype=torch.float32).to("cpu")
 
29
 
30
  # Generate Comedy Script using DistilGPT-2
31
  def generate_comedy_script(prompt):
@@ -38,13 +38,13 @@ def text_to_speech(script):
38
  tts.tts_to_file(text=script, file_path=output_audio)
39
  return output_audio
40
 
41
- # Create Images Using Stable Diffusion
42
  def create_images_from_script(script):
43
  lines = script.split('. ')
44
  image_paths = []
45
  for i, line in enumerate(lines):
46
- img = image_generator(line).images[0]
47
- img_path = f'/tmp/image_{i}.png'
48
  img.save(img_path)
49
  image_paths.append(img_path)
50
  return image_paths
@@ -57,16 +57,18 @@ def generate_fun_music(prompt, output_music_file="fun_music.wav"):
57
  scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
58
  return output_music_file
59
 
60
- # Create Video from Generated Images Using PIA
61
  def generate_text_video(script):
62
  image_paths = create_images_from_script(script)
63
- # PIA logic here
64
- subprocess.run(["python", "-m", "pia", "generate", "--input", " ".join(image_paths), "--output", "/tmp/final_video.mp4"])
65
- return "/tmp/final_video.mp4"
 
 
66
 
67
  # Combine Audio and Video
68
  def combine_audio_video(video_path, audio_path):
69
- video = VideoFileClip(video_path)
70
  audio = AudioFileClip(audio_path)
71
  final_video = video.set_audio(audio)
72
  final_video.write_videofile("/tmp/final_comedy_video.mp4", fps=24)
@@ -93,8 +95,7 @@ def generate_kids_content(theme):
93
  frame_path = f'/tmp/kids_temp_{i}.png'
94
  img.save(frame_path)
95
  clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center')))
96
- final_video = CompositeVideoClip(clips, size=(800, 400))
97
- final_video = final_video.set_audio(AudioFileClip(music_file))
98
  final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
99
  return music_file, "/tmp/kids_animation.mp4"
100
 
@@ -131,3 +132,4 @@ with gr.Blocks() as app:
131
 
132
  app.launch()
133
 
 
 
5
  from PIL import Image, ImageDraw, ImageFont
6
  import scipy.io.wavfile
7
  from TTS.api import TTS # Coqui TTS (open source)
8
+ from moviepy.editor import ImageSequenceClip, AudioFileClip, concatenate_videoclips
9
  import os
 
 
10
  import subprocess
11
 
12
  # Initialize Clients
13
+ # Replace with your actual API keys or methods of getting them
14
+ # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
15
+ # deepgram_client = Deepgram(api_key=os.environ.get("DEEGRAM_API_KEY"))
16
 
17
  # Use DistilGPT-2 for text generation
18
  script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100)
 
23
  # Use MusicGen for music generation
24
  music_generator = pipeline("text-to-audio", model="facebook/musicgen-small", device="cpu")
25
 
26
+ # Use Fluently Anime (Stable Diffusion) for anime image generation
27
+ model_id = "fluently/Fluently-anime"
28
+ anime_image_generator = StableDiffusionPipeline.from_pretrained(model_id).to("cuda")
29
 
30
  # Generate Comedy Script using DistilGPT-2
31
  def generate_comedy_script(prompt):
 
38
  tts.tts_to_file(text=script, file_path=output_audio)
39
  return output_audio
40
 
41
+ # Create Anime Images Using Fluently Anime
42
  def create_images_from_script(script):
43
  lines = script.split('. ')
44
  image_paths = []
45
  for i, line in enumerate(lines):
46
+ img = anime_image_generator(line).images[0]
47
+ img_path = f'/tmp/anime_image_{i}.png'
48
  img.save(img_path)
49
  image_paths.append(img_path)
50
  return image_paths
 
57
  scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
58
  return output_music_file
59
 
60
+ # Create Video from Generated Anime Images Using FFmpeg
61
  def generate_text_video(script):
62
  image_paths = create_images_from_script(script)
63
+ # Generate video using moviepy from the sequence of images
64
+ video_clip = ImageSequenceClip(image_paths, fps=24)
65
+ video_path = "/tmp/final_video.mp4"
66
+ video_clip.write_videofile(video_path, codec='libx264')
67
+ return video_path
68
 
69
  # Combine Audio and Video
70
  def combine_audio_video(video_path, audio_path):
71
+ video = AudioFileClip(video_path)
72
  audio = AudioFileClip(audio_path)
73
  final_video = video.set_audio(audio)
74
  final_video.write_videofile("/tmp/final_comedy_video.mp4", fps=24)
 
95
  frame_path = f'/tmp/kids_temp_{i}.png'
96
  img.save(frame_path)
97
  clips.append(ImageClip(frame_path).set_duration(1).set_position(('center', 'center')))
98
+ final_video = concatenate_videoclips(clips, method="compose").set_audio(AudioFileClip(music_file))
 
99
  final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
100
  return music_file, "/tmp/kids_animation.mp4"
101
 
 
132
 
133
  app.launch()
134
 
135
+