CRIMJOBBERS commited on
Commit
a51d2c7
Β·
verified Β·
1 Parent(s): 767d3ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -21
app.py CHANGED
@@ -4,8 +4,9 @@ from datetime import datetime
4
  import langid
5
  import os
6
  import requests
7
- import base64
8
- import tempfile
 
9
  import warnings
10
 
11
  warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
@@ -25,6 +26,11 @@ TONE_MODIFIERS = {
25
  "Casual": "Make this sound casual: "
26
  }
27
 
 
 
 
 
 
28
  loaded_models = {}
29
 
30
  def load_model(model_name):
@@ -46,11 +52,11 @@ def translate(text, direction, tone):
46
  expected_src = direction.split(" β†’ ")[0].lower()
47
  warning = ""
48
  if expected_src.startswith("english") and detected_lang != "en":
49
- warning = f"⚠ Detected language is '{detected_lang}', but you selected English as source."
50
  elif expected_src.startswith("french") and detected_lang != "fr":
51
- warning = f"⚠ Detected language is '{detected_lang}', but you selected French as source."
52
  elif expected_src.startswith("swahili") and detected_lang != "sw":
53
- warning = f"⚠ Detected language is '{detected_lang}', but you selected Swahili as source."
54
 
55
  prompt = TONE_MODIFIERS[tone] + text
56
  model_info = MODEL_MAP[direction]
@@ -77,13 +83,9 @@ def translate(text, direction, tone):
77
 
78
  return f"{warning}\n{translation}" if warning else translation
79
 
80
- #new tts_via_api function with this:
81
- def tts_via_api(text):
82
- import base64
83
- from io import BytesIO
84
-
85
  api_key = os.getenv("ELEVENLABS_API_KEY")
86
- voice_id = "21m00Tcm4TlvDq8ikWAM" # Rachel
87
 
88
  if not api_key:
89
  return None
@@ -108,25 +110,27 @@ def tts_via_api(text):
108
  response = requests.post(url, headers=headers, json=payload)
109
 
110
  if response.status_code == 200:
111
- audio_bytes = BytesIO(response.content)
112
- return (audio_bytes, "audio/mp3")
 
 
 
 
113
  else:
114
  print("TTS API Error:", response.status_code, response.text)
115
  return None
116
 
117
-
118
  def transcribe_and_translate(audio_path, direction, tone):
119
- import speech_recognition as sr
120
  recognizer = sr.Recognizer()
121
  try:
122
  with sr.AudioFile(audio_path) as source:
123
  audio = recognizer.record(source)
124
  if len(audio.frame_data) < 10000:
125
- return "⚠ Audio too short or empty. Please try again."
126
  text = recognizer.recognize_google(audio)
127
  return translate(text, direction, tone)
128
  except Exception as e:
129
- return f"⚠ Could not transcribe audio: {e}"
130
 
131
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
132
  gr.Markdown("## 🌍 EAC Translator")
@@ -138,17 +142,19 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
138
  input_text = gr.Textbox(label="Text to Translate", lines=3)
139
  direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
140
  tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
 
141
  output_text = gr.Textbox(label="Translated Text", lines=3)
142
  with gr.Row():
143
  translate_btn = gr.Button("Translate", scale=1)
144
  speak_btn = gr.Button("πŸ”Š Speak Translation", scale=1)
145
  audio_output = gr.Audio(label="Playback", interactive=False)
146
 
147
- with gr.Tab("πŸŽ™ Voice Translation"):
148
  with gr.Column():
149
  audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
150
  direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
151
  tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
 
152
  voice_output = gr.Textbox(label="Translated Text")
153
  with gr.Row():
154
  voice_translate_btn = gr.Button("Transcribe & Translate", scale=1)
@@ -156,14 +162,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
156
  audio_output2 = gr.Audio(label="Playback", interactive=False)
157
 
158
  translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
159
- speak_btn.click(fn=tts_via_api, inputs=[output_text], outputs=audio_output)
160
  voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
161
- voice_speak_btn.click(fn=tts_via_api, inputs=[voice_output], outputs=audio_output2)
162
 
163
  gr.Markdown(
164
  """<div style='text-align: center;'>
165
  <a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'>
166
- By Eng. Jobbers – Qtrinova Inc. NLP❀
167
  </a>
168
  </div>""",
169
  elem_id="footer"
 
4
  import langid
5
  import os
6
  import requests
7
+ from io import BytesIO
8
+ from pydub import AudioSegment
9
+ import speech_recognition as sr
10
  import warnings
11
 
12
  warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
 
26
  "Casual": "Make this sound casual: "
27
  }
28
 
29
+ VOICE_IDS = {
30
+ "Rachel (Female)": "21m00Tcm4TlvDq8ikWAM",
31
+ "Adam (Male)": "pNInz6obpgDQGcFmaJgB"
32
+ }
33
+
34
  loaded_models = {}
35
 
36
  def load_model(model_name):
 
52
  expected_src = direction.split(" β†’ ")[0].lower()
53
  warning = ""
54
  if expected_src.startswith("english") and detected_lang != "en":
55
+ warning = f"⚠️ Detected language is '{detected_lang}', but you selected English as source."
56
  elif expected_src.startswith("french") and detected_lang != "fr":
57
+ warning = f"⚠️ Detected language is '{detected_lang}', but you selected French as source."
58
  elif expected_src.startswith("swahili") and detected_lang != "sw":
59
+ warning = f"⚠️ Detected language is '{detected_lang}', but you selected Swahili as source."
60
 
61
  prompt = TONE_MODIFIERS[tone] + text
62
  model_info = MODEL_MAP[direction]
 
83
 
84
  return f"{warning}\n{translation}" if warning else translation
85
 
86
+ def tts_via_api(text, voice_choice):
 
 
 
 
87
  api_key = os.getenv("ELEVENLABS_API_KEY")
88
+ voice_id = VOICE_IDS.get(voice_choice, "21m00Tcm4TlvDq8ikWAM")
89
 
90
  if not api_key:
91
  return None
 
110
  response = requests.post(url, headers=headers, json=payload)
111
 
112
  if response.status_code == 200:
113
+ mp3_audio = BytesIO(response.content)
114
+ audio = AudioSegment.from_file(mp3_audio, format="mp3")
115
+ wav_io = BytesIO()
116
+ audio.export(wav_io, format="wav")
117
+ wav_io.seek(0)
118
+ return (wav_io, "audio/wav")
119
  else:
120
  print("TTS API Error:", response.status_code, response.text)
121
  return None
122
 
 
123
  def transcribe_and_translate(audio_path, direction, tone):
 
124
  recognizer = sr.Recognizer()
125
  try:
126
  with sr.AudioFile(audio_path) as source:
127
  audio = recognizer.record(source)
128
  if len(audio.frame_data) < 10000:
129
+ return "⚠️ Audio too short or empty. Please try again."
130
  text = recognizer.recognize_google(audio)
131
  return translate(text, direction, tone)
132
  except Exception as e:
133
+ return f"⚠️ Could not transcribe audio: {e}"
134
 
135
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
136
  gr.Markdown("## 🌍 EAC Translator")
 
142
  input_text = gr.Textbox(label="Text to Translate", lines=3)
143
  direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
144
  tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
145
+ voice_select = gr.Dropdown(choices=list(VOICE_IDS.keys()), label="Voice", value="Rachel (Female)")
146
  output_text = gr.Textbox(label="Translated Text", lines=3)
147
  with gr.Row():
148
  translate_btn = gr.Button("Translate", scale=1)
149
  speak_btn = gr.Button("πŸ”Š Speak Translation", scale=1)
150
  audio_output = gr.Audio(label="Playback", interactive=False)
151
 
152
+ with gr.Tab("πŸŽ™οΈ Voice Translation"):
153
  with gr.Column():
154
  audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
155
  direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
156
  tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
157
+ voice_select2 = gr.Dropdown(choices=list(VOICE_IDS.keys()), label="Voice", value="Rachel (Female)")
158
  voice_output = gr.Textbox(label="Translated Text")
159
  with gr.Row():
160
  voice_translate_btn = gr.Button("Transcribe & Translate", scale=1)
 
162
  audio_output2 = gr.Audio(label="Playback", interactive=False)
163
 
164
  translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
165
+ speak_btn.click(fn=tts_via_api, inputs=[output_text, voice_select], outputs=audio_output)
166
  voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
167
+ voice_speak_btn.click(fn=tts_via_api, inputs=[voice_output, voice_select2], outputs=audio_output2)
168
 
169
  gr.Markdown(
170
  """<div style='text-align: center;'>
171
  <a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'>
172
+ Built with ❀️ by Eng. Jobbers – Qtrinova Inc
173
  </a>
174
  </div>""",
175
  elem_id="footer"