Spaces:

CRIMJOBBERS
/

eac-translator

Building

App Files Files Community

CRIMJOBBERS commited on Jul 7

Commit

a51d2c7

verified ·

1 Parent(s): 767d3ba

Update app.py

Browse files

Files changed (1) hide show

app.py +27 -21

app.py CHANGED Viewed

@@ -4,8 +4,9 @@ from datetime import datetime
 import langid
 import os
 import requests
-import base64
-import tempfile
 import warnings
 warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
@@ -25,6 +26,11 @@ TONE_MODIFIERS = {
     "Casual": "Make this sound casual: "
 }
 loaded_models = {}
 def load_model(model_name):
@@ -46,11 +52,11 @@ def translate(text, direction, tone):
     expected_src = direction.split(" → ")[0].lower()
     warning = ""
     if expected_src.startswith("english") and detected_lang != "en":
-        warning = f"⚠ Detected language is '{detected_lang}', but you selected English as source."
     elif expected_src.startswith("french") and detected_lang != "fr":
-        warning = f"⚠ Detected language is '{detected_lang}', but you selected French as source."
     elif expected_src.startswith("swahili") and detected_lang != "sw":
-        warning = f"⚠ Detected language is '{detected_lang}', but you selected Swahili as source."
     prompt = TONE_MODIFIERS[tone] + text
     model_info = MODEL_MAP[direction]
@@ -77,13 +83,9 @@ def translate(text, direction, tone):
     return f"{warning}\n{translation}" if warning else translation
-#new tts_via_api function with this:
-def tts_via_api(text):
-    import base64
-    from io import BytesIO
     api_key = os.getenv("ELEVENLABS_API_KEY")
-    voice_id = "21m00Tcm4TlvDq8ikWAM"  # Rachel
     if not api_key:
         return None
@@ -108,25 +110,27 @@ def tts_via_api(text):
     response = requests.post(url, headers=headers, json=payload)
     if response.status_code == 200:
-        audio_bytes = BytesIO(response.content)
-        return (audio_bytes, "audio/mp3")
     else:
         print("TTS API Error:", response.status_code, response.text)
         return None
 def transcribe_and_translate(audio_path, direction, tone):
-    import speech_recognition as sr
     recognizer = sr.Recognizer()
     try:
         with sr.AudioFile(audio_path) as source:
             audio = recognizer.record(source)
         if len(audio.frame_data) < 10000:
-            return "⚠ Audio too short or empty. Please try again."
         text = recognizer.recognize_google(audio)
         return translate(text, direction, tone)
     except Exception as e:
-        return f"⚠ Could not transcribe audio: {e}"
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🌍 EAC Translator")
@@ -138,17 +142,19 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 input_text = gr.Textbox(label="Text to Translate", lines=3)
                 direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English → Swahili")
                 tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
                 output_text = gr.Textbox(label="Translated Text", lines=3)
                 with gr.Row():
                     translate_btn = gr.Button("Translate", scale=1)
                     speak_btn = gr.Button("🔊 Speak Translation", scale=1)
                 audio_output = gr.Audio(label="Playback", interactive=False)
-        with gr.Tab("🎙 Voice Translation"):
             with gr.Column():
                 audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
                 direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English → Swahili")
                 tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
                 voice_output = gr.Textbox(label="Translated Text")
                 with gr.Row():
                     voice_translate_btn = gr.Button("Transcribe & Translate", scale=1)
@@ -156,14 +162,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
                 audio_output2 = gr.Audio(label="Playback", interactive=False)
         translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
-        speak_btn.click(fn=tts_via_api, inputs=[output_text], outputs=audio_output)
         voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
-        voice_speak_btn.click(fn=tts_via_api, inputs=[voice_output], outputs=audio_output2)
     gr.Markdown(
         """<div style='text-align: center;'>
         <a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'>
-        By Eng. Jobbers – Qtrinova Inc. NLP❤
         </a>
         </div>""",
         elem_id="footer"

 import langid
 import os
 import requests
+from io import BytesIO
+from pydub import AudioSegment
+import speech_recognition as sr
 import warnings
 warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
     "Casual": "Make this sound casual: "
 }
+VOICE_IDS = {
+    "Rachel (Female)": "21m00Tcm4TlvDq8ikWAM",
+    "Adam (Male)": "pNInz6obpgDQGcFmaJgB"
+}
 loaded_models = {}
 def load_model(model_name):
     expected_src = direction.split(" → ")[0].lower()
     warning = ""
     if expected_src.startswith("english") and detected_lang != "en":
+        warning = f"⚠️ Detected language is '{detected_lang}', but you selected English as source."
     elif expected_src.startswith("french") and detected_lang != "fr":
+        warning = f"⚠️ Detected language is '{detected_lang}', but you selected French as source."
     elif expected_src.startswith("swahili") and detected_lang != "sw":
+        warning = f"⚠️ Detected language is '{detected_lang}', but you selected Swahili as source."
     prompt = TONE_MODIFIERS[tone] + text
     model_info = MODEL_MAP[direction]
     return f"{warning}\n{translation}" if warning else translation
+def tts_via_api(text, voice_choice):
     api_key = os.getenv("ELEVENLABS_API_KEY")
+    voice_id = VOICE_IDS.get(voice_choice, "21m00Tcm4TlvDq8ikWAM")
     if not api_key:
         return None
     response = requests.post(url, headers=headers, json=payload)
     if response.status_code == 200:
+        mp3_audio = BytesIO(response.content)
+        audio = AudioSegment.from_file(mp3_audio, format="mp3")
+        wav_io = BytesIO()
+        audio.export(wav_io, format="wav")
+        wav_io.seek(0)
+        return (wav_io, "audio/wav")
     else:
         print("TTS API Error:", response.status_code, response.text)
         return None
 def transcribe_and_translate(audio_path, direction, tone):
     recognizer = sr.Recognizer()
     try:
         with sr.AudioFile(audio_path) as source:
             audio = recognizer.record(source)
         if len(audio.frame_data) < 10000:
+            return "⚠️ Audio too short or empty. Please try again."
         text = recognizer.recognize_google(audio)
         return translate(text, direction, tone)
     except Exception as e:
+        return f"⚠️ Could not transcribe audio: {e}"
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("## 🌍 EAC Translator")
                 input_text = gr.Textbox(label="Text to Translate", lines=3)
                 direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English → Swahili")
                 tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
+                voice_select = gr.Dropdown(choices=list(VOICE_IDS.keys()), label="Voice", value="Rachel (Female)")
                 output_text = gr.Textbox(label="Translated Text", lines=3)
                 with gr.Row():
                     translate_btn = gr.Button("Translate", scale=1)
                     speak_btn = gr.Button("🔊 Speak Translation", scale=1)
                 audio_output = gr.Audio(label="Playback", interactive=False)
+        with gr.Tab("🎙️ Voice Translation"):
             with gr.Column():
                 audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
                 direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English → Swahili")
                 tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
+                voice_select2 = gr.Dropdown(choices=list(VOICE_IDS.keys()), label="Voice", value="Rachel (Female)")
                 voice_output = gr.Textbox(label="Translated Text")
                 with gr.Row():
                     voice_translate_btn = gr.Button("Transcribe & Translate", scale=1)
                 audio_output2 = gr.Audio(label="Playback", interactive=False)
         translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
+        speak_btn.click(fn=tts_via_api, inputs=[output_text, voice_select], outputs=audio_output)
         voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
+        voice_speak_btn.click(fn=tts_via_api, inputs=[voice_output, voice_select2], outputs=audio_output2)
     gr.Markdown(
         """<div style='text-align: center;'>
         <a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'>
+        Built with ❤️ by Eng. Jobbers – Qtrinova Inc
         </a>
         </div>""",
         elem_id="footer"