Manasa1 commited on
Commit
9bea5a2
·
verified ·
1 Parent(s): b706d95

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -34
app.py CHANGED
@@ -4,44 +4,57 @@ from diffusers import StableDiffusionPipeline
4
  import torch
5
  from PIL import Image, ImageDraw, ImageFont
6
  import scipy.io.wavfile
7
- from TTS.api import TTS # Coqui TTS (open source)
8
- from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips
9
  import os
10
- import subprocess
11
- from moviepy.editor import VideoFileClip, AudioFileClip
 
 
 
12
 
 
 
13
 
14
  # Initialize Clients
15
- # Replace with your actual API keys or methods of getting them
16
- # groq_client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
17
- # deepgram_client = Deepgram(api_key=os.environ.get("DEEGRAM_API_KEY"))
18
-
19
- # Use DistilGPT-2 for text generation
20
- script_generator = pipeline("text-generation", model="distilgpt2", truncation=True, max_length=100)
21
-
22
- # Use Coqui TTS for text-to-speech
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
24
 
25
- # Use MusicGen for music generation
26
  music_generator = pipeline("text-to-audio", model="facebook/musicgen-small", device="cpu")
27
 
28
- # Use Fluently Anime (Stable Diffusion) for anime image generation
29
  model_id = "fluently/Fluently-anime"
30
  anime_image_generator = StableDiffusionPipeline.from_pretrained(model_id).to("cpu")
31
 
32
-
33
- # Generate Comedy Script using DistilGPT-2
34
- def generate_comedy_script(prompt):
35
- script = script_generator(prompt)[0]['generated_text']
36
- return script
37
-
38
- # Convert Text to Speech using Coqui TTS
39
  def text_to_speech(script):
40
  output_audio = 'output.wav'
41
  tts.tts_to_file(text=script, file_path=output_audio)
42
  return output_audio
43
 
44
- # Create Anime Images Using Fluently Anime
45
  def create_images_from_script(script):
46
  lines = script.split('. ')
47
  image_paths = []
@@ -52,7 +65,7 @@ def create_images_from_script(script):
52
  image_paths.append(img_path)
53
  return image_paths
54
 
55
- # Generate Fun Music Track using MusicGen
56
  def generate_fun_music(prompt, output_music_file="fun_music.wav"):
57
  response = music_generator(prompt)
58
  audio_data = response["audio"]
@@ -60,37 +73,31 @@ def generate_fun_music(prompt, output_music_file="fun_music.wav"):
60
  scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
61
  return output_music_file
62
 
63
- # Create Video from Generated Anime Images Using FFmpeg
64
  def generate_text_video(script):
65
  image_paths = create_images_from_script(script)
66
- # Generate video using moviepy from the sequence of images
67
  video_clip = ImageSequenceClip(image_paths, fps=24)
68
  video_path = "/tmp/final_video.mp4"
69
  video_clip.write_videofile(video_path, codec='libx264')
70
  return video_path
71
 
 
72
  def combine_audio_video(video_file, audio_file):
73
- # Load the video file
74
  video = VideoFileClip(video_file)
75
- # Load the audio file
76
  audio = AudioFileClip(audio_file)
77
-
78
- # Set the audio of the video clip
79
  final_video = video.set_audio(audio)
80
-
81
- # Return the final video clip
82
  return final_video
83
 
84
  # Main Function to Generate Comedy Animation
85
- def generate_comedy_and_animation(prompt):
86
- script = generate_comedy_script(prompt)
87
  audio_file = text_to_speech(script)
88
  video_file = generate_text_video(script)
89
  fun_music = generate_fun_music(prompt)
90
  final_video = combine_audio_video(video_file, fun_music)
91
  return script, audio_file, final_video
92
 
93
- # Generate Kids Content
94
  def generate_kids_content(theme):
95
  music_file = generate_fun_music(theme, output_music_file="kids_music.wav")
96
  clips = []
@@ -106,6 +113,13 @@ def generate_kids_content(theme):
106
  final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
107
  return music_file, "/tmp/kids_animation.mp4"
108
 
 
 
 
 
 
 
 
109
  # Gradio Interface
110
  with gr.Blocks() as app:
111
  gr.Markdown("## AI Comedy and Kids Content Generator")
@@ -137,6 +151,18 @@ with gr.Blocks() as app:
137
  outputs=[kids_music_audio, kids_music_video]
138
  )
139
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  app.launch()
141
 
142
 
 
4
  import torch
5
  from PIL import Image, ImageDraw, ImageFont
6
  import scipy.io.wavfile
7
+ from TTS.api import TTS
8
+ from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips, VideoFileClip
9
  import os
10
+ from groq import Groq
11
+ from deepgram import Deepgram
12
+ import asyncio
13
+ import aiohttp
14
+ from dotenv import load_dotenv
15
 
16
+ # Load environment variables
17
+ load_dotenv()
18
 
19
  # Initialize Clients
20
+ groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
21
+ deepgram_client = Deepgram(api_key=os.getenv("DEEPGRAM_API_KEY"))
22
+
23
+ # Use GPT-3.5-turbo for text generation
24
+ async def generate_comedy_script(prompt):
25
+ chat_completion = await groq_client.chat.completions.create(
26
+ messages=[
27
+ {
28
+ "role": "system",
29
+ "content": "You are a comedy writer. Generate a short, funny script based on the given prompt."
30
+ },
31
+ {
32
+ "role": "user",
33
+ "content": prompt
34
+ }
35
+ ],
36
+ model="mixtral-8x7b-32768",
37
+ max_tokens=200
38
+ )
39
+ return chat_completion.choices[0].message.content
40
+
41
+ # Use Coqui TTS for text-to-speech (unchanged)
42
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
43
 
44
+ # Use MusicGen for music generation (unchanged)
45
  music_generator = pipeline("text-to-audio", model="facebook/musicgen-small", device="cpu")
46
 
47
+ # Use Fluently Anime (Stable Diffusion) for anime image generation (unchanged)
48
  model_id = "fluently/Fluently-anime"
49
  anime_image_generator = StableDiffusionPipeline.from_pretrained(model_id).to("cpu")
50
 
51
+ # Convert Text to Speech using Coqui TTS (unchanged)
 
 
 
 
 
 
52
  def text_to_speech(script):
53
  output_audio = 'output.wav'
54
  tts.tts_to_file(text=script, file_path=output_audio)
55
  return output_audio
56
 
57
+ # Create Anime Images Using Fluently Anime (unchanged)
58
  def create_images_from_script(script):
59
  lines = script.split('. ')
60
  image_paths = []
 
65
  image_paths.append(img_path)
66
  return image_paths
67
 
68
+ # Generate Fun Music Track using MusicGen (unchanged)
69
  def generate_fun_music(prompt, output_music_file="fun_music.wav"):
70
  response = music_generator(prompt)
71
  audio_data = response["audio"]
 
73
  scipy.io.wavfile.write(output_music_file, rate=sampling_rate, data=audio_data)
74
  return output_music_file
75
 
76
+ # Create Video from Generated Anime Images (unchanged)
77
  def generate_text_video(script):
78
  image_paths = create_images_from_script(script)
 
79
  video_clip = ImageSequenceClip(image_paths, fps=24)
80
  video_path = "/tmp/final_video.mp4"
81
  video_clip.write_videofile(video_path, codec='libx264')
82
  return video_path
83
 
84
+ # Combine Audio and Video (unchanged)
85
  def combine_audio_video(video_file, audio_file):
 
86
  video = VideoFileClip(video_file)
 
87
  audio = AudioFileClip(audio_file)
 
 
88
  final_video = video.set_audio(audio)
 
 
89
  return final_video
90
 
91
  # Main Function to Generate Comedy Animation
92
+ async def generate_comedy_and_animation(prompt):
93
+ script = await generate_comedy_script(prompt)
94
  audio_file = text_to_speech(script)
95
  video_file = generate_text_video(script)
96
  fun_music = generate_fun_music(prompt)
97
  final_video = combine_audio_video(video_file, fun_music)
98
  return script, audio_file, final_video
99
 
100
+ # Generate Kids Content (unchanged)
101
  def generate_kids_content(theme):
102
  music_file = generate_fun_music(theme, output_music_file="kids_music.wav")
103
  clips = []
 
113
  final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
114
  return music_file, "/tmp/kids_animation.mp4"
115
 
116
+ # New function for speech-to-text
117
+ async def transcribe_audio(audio_file):
118
+ with open(audio_file, 'rb') as audio:
119
+ source = {'buffer': audio, 'mimetype': 'audio/wav'}
120
+ response = await deepgram_client.transcription.prerecorded(source, {'smart_format': True, 'model': 'general'})
121
+ return response['results']['channels'][0]['alternatives'][0]['transcript']
122
+
123
  # Gradio Interface
124
  with gr.Blocks() as app:
125
  gr.Markdown("## AI Comedy and Kids Content Generator")
 
151
  outputs=[kids_music_audio, kids_music_video]
152
  )
153
 
154
+ # New Speech-to-Text Tab
155
+ with gr.Tab("Speech-to-Text"):
156
+ audio_input = gr.Audio(label="Upload Audio")
157
+ transcribe_btn = gr.Button("Transcribe Audio")
158
+ transcription_output = gr.Textbox(label="Transcription")
159
+
160
+ transcribe_btn.click(
161
+ transcribe_audio,
162
+ inputs=audio_input,
163
+ outputs=transcription_output
164
+ )
165
+
166
  app.launch()
167
 
168