sheikhed commited on
Commit
fe1dcaa
·
verified ·
1 Parent(s): d7e99cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -41
app.py CHANGED
@@ -5,57 +5,31 @@ import time
5
  import subprocess
6
  import gradio as gr
7
  import uuid
 
8
  from dotenv import load_dotenv
9
 
10
  # Load environment variables
11
  load_dotenv()
12
 
13
  # API Keys
14
- A_KEY = os.getenv("A_KEY")
15
  B_KEY = os.getenv("B_KEY")
16
 
17
  # URLs
18
  API_URL = os.getenv("API_URL")
19
  UPLOAD_URL = os.getenv("UPLOAD_URL")
20
 
21
- def get_voices():
22
- url = "https://api.elevenlabs.io/v1/voices"
23
- headers = {
24
- "Accept": "application/json",
25
- "xi-api-key": A_KEY
26
- }
27
-
28
- response = requests.get(url, headers=headers)
29
- if response.status_code != 200:
30
- return []
31
- return [(voice['name'], voice['voice_id']) for voice in response.json().get('voices', [])]
32
 
33
- def text_to_speech(voice_id, text, session_id):
34
- url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
35
-
36
- headers = {
37
- "Accept": "audio/mpeg",
38
- "Content-Type": "application/json",
39
- "xi-api-key": A_KEY
40
- }
41
-
42
- data = {
43
- "text": text,
44
- "model_id": "eleven_turbo_v2_5",
45
- "voice_settings": {
46
- "stability": 0.5,
47
- "similarity_boost": 0.5
48
- }
49
- }
50
 
51
- response = requests.post(url, json=data, headers=headers)
52
- if response.status_code != 200:
53
- return None
54
 
55
- # Save temporary audio file with session ID
56
- audio_file_path = f'temp_voice_{session_id}.mp3'
57
- with open(audio_file_path, 'wb') as audio_file:
58
- audio_file.write(response.content)
59
  return audio_file_path
60
 
61
  def upload_file(file_path):
@@ -136,10 +110,10 @@ def combine_audio_video(video_path, audio_path, output_path):
136
 
137
  subprocess.run(cmd, check=True)
138
 
139
- def process_video(voice, video_url, text, progress=gr.Progress()):
140
  session_id = str(uuid.uuid4()) # Generate a unique session ID
141
  progress(0, desc="Generating speech...")
142
- audio_path = text_to_speech(voice, text, session_id)
143
  if not audio_path:
144
  return None, "Failed to generate speech audio."
145
 
@@ -197,13 +171,19 @@ def process_video(voice, video_url, text, progress=gr.Progress()):
197
  os.remove(f"temp_video_{session_id}.mp4")
198
 
199
  def create_interface():
200
- voices = get_voices()
 
 
 
 
 
 
201
 
202
  with gr.Blocks() as app:
203
  gr.Markdown("# JSON Train")
204
  with gr.Row():
205
  with gr.Column():
206
- voice_dropdown = gr.Dropdown(choices=[v[0] for v in voices], label="Select Voice", value=voices[0][0] if voices else None)
207
  video_url_input = gr.Textbox(label="Enter Video URL")
208
  text_input = gr.Textbox(label="Enter text", lines=3)
209
  generate_btn = gr.Button("Generate Video")
@@ -227,4 +207,4 @@ def create_interface():
227
 
228
  if __name__ == "__main__":
229
  app = create_interface()
230
- app.launch()
 
5
  import subprocess
6
  import gradio as gr
7
  import uuid
8
+ import edge_tts
9
  from dotenv import load_dotenv
10
 
11
  # Load environment variables
12
  load_dotenv()
13
 
14
  # API Keys
 
15
  B_KEY = os.getenv("B_KEY")
16
 
17
  # URLs
18
  API_URL = os.getenv("API_URL")
19
  UPLOAD_URL = os.getenv("UPLOAD_URL")
20
 
21
+ # Fetch voices from edge_tts
22
+ async def get_edge_tts_voices():
23
+ voices = await edge_tts.list_voices()
24
+ return [(voice['Name'], voice['ShortName']) for voice in voices]
 
 
 
 
 
 
 
25
 
26
+ # Text-to-speech using edge_tts
27
+ async def text_to_speech(voice_id, text, session_id):
28
+ audio_file_path = f'temp_voice_{session_id}.mp3'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
 
30
+ communicate = edge_tts.Communicate(text, voice_id)
31
+ await communicate.save(audio_file_path)
 
32
 
 
 
 
 
33
  return audio_file_path
34
 
35
  def upload_file(file_path):
 
110
 
111
  subprocess.run(cmd, check=True)
112
 
113
+ async def process_video(voice, video_url, text, progress=gr.Progress()):
114
  session_id = str(uuid.uuid4()) # Generate a unique session ID
115
  progress(0, desc="Generating speech...")
116
+ audio_path = await text_to_speech(voice, text, session_id)
117
  if not audio_path:
118
  return None, "Failed to generate speech audio."
119
 
 
171
  os.remove(f"temp_video_{session_id}.mp4")
172
 
173
  def create_interface():
174
+ voices = []
175
+
176
+ # Asynchronous Gradio callback for fetching voices
177
+ async def async_get_voices():
178
+ nonlocal voices
179
+ voices = await get_edge_tts_voices()
180
+ return [v[0] for v in voices]
181
 
182
  with gr.Blocks() as app:
183
  gr.Markdown("# JSON Train")
184
  with gr.Row():
185
  with gr.Column():
186
+ voice_dropdown = gr.Dropdown(choices=await async_get_voices(), label="Select Voice")
187
  video_url_input = gr.Textbox(label="Enter Video URL")
188
  text_input = gr.Textbox(label="Enter text", lines=3)
189
  generate_btn = gr.Button("Generate Video")
 
207
 
208
  if __name__ == "__main__":
209
  app = create_interface()
210
+ app.launch()