Manasa1 commited on
Commit
ec5dc91
·
verified ·
1 Parent(s): 77f4d51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -10
app.py CHANGED
@@ -8,17 +8,16 @@ from TTS.api import TTS
8
  from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips, VideoFileClip
9
  import os
10
  from groq import Groq
11
- from deepgram import Deepgram
12
  import asyncio
13
  import aiohttp
14
  from dotenv import load_dotenv
 
15
 
16
  # Load environment variables
17
  load_dotenv()
18
 
19
  # Initialize Clients
20
  groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
21
- deepgram_client = Deepgram(api_key=os.getenv("DEEPGRAM_API_KEY"))
22
 
23
  # Use GPT-3.5-turbo for text generation
24
  async def generate_comedy_script(prompt):
@@ -113,12 +112,17 @@ def generate_kids_content(theme):
113
  final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
114
  return music_file, "/tmp/kids_animation.mp4"
115
 
116
- # New function for speech-to-text
117
- async def transcribe_audio(audio_file):
118
- with open(audio_file, 'rb') as audio:
119
- source = {'buffer': audio, 'mimetype': 'audio/wav'}
120
- response = await deepgram_client.transcription.prerecorded(source, {'smart_format': True, 'model': 'general'})
121
- return response['results']['channels'][0]['alternatives'][0]['transcript']
 
 
 
 
 
122
 
123
  # Gradio Interface
124
  with gr.Blocks() as app:
@@ -151,7 +155,7 @@ with gr.Blocks() as app:
151
  outputs=[kids_music_audio, kids_music_video]
152
  )
153
 
154
- # New Speech-to-Text Tab
155
  with gr.Tab("Speech-to-Text"):
156
  audio_input = gr.Audio(label="Upload Audio")
157
  transcribe_btn = gr.Button("Transcribe Audio")
@@ -165,4 +169,3 @@ with gr.Blocks() as app:
165
 
166
  app.launch()
167
 
168
-
 
8
  from moviepy.editor import CompositeVideoClip, ImageClip, AudioFileClip, concatenate_videoclips, VideoFileClip
9
  import os
10
  from groq import Groq
 
11
  import asyncio
12
  import aiohttp
13
  from dotenv import load_dotenv
14
+ import speech_recognition as sr
15
 
16
  # Load environment variables
17
  load_dotenv()
18
 
19
  # Initialize Clients
20
  groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
 
21
 
22
  # Use GPT-3.5-turbo for text generation
23
  async def generate_comedy_script(prompt):
 
112
  final_video.write_videofile("/tmp/kids_animation.mp4", fps=24)
113
  return music_file, "/tmp/kids_animation.mp4"
114
 
115
+ # New function for speech-to-text using SpeechRecognition
116
+ def transcribe_audio(audio_file):
117
+ recognizer = sr.Recognizer()
118
+ with sr.AudioFile(audio_file) as source:
119
+ audio = recognizer.record(source)
120
+ try:
121
+ return recognizer.recognize_google(audio)
122
+ except sr.UnknownValueError:
123
+ return "Speech recognition could not understand the audio"
124
+ except sr.RequestError as e:
125
+ return f"Could not request results from speech recognition service; {e}"
126
 
127
  # Gradio Interface
128
  with gr.Blocks() as app:
 
155
  outputs=[kids_music_audio, kids_music_video]
156
  )
157
 
158
+ # Speech-to-Text Tab
159
  with gr.Tab("Speech-to-Text"):
160
  audio_input = gr.Audio(label="Upload Audio")
161
  transcribe_btn = gr.Button("Transcribe Audio")
 
169
 
170
  app.launch()
171