Spaces:
Building
Building
Update app.py
Browse files
app.py
CHANGED
@@ -4,8 +4,9 @@ from datetime import datetime
|
|
4 |
import langid
|
5 |
import os
|
6 |
import requests
|
7 |
-
import
|
8 |
-
import
|
|
|
9 |
import warnings
|
10 |
|
11 |
warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
|
@@ -25,6 +26,11 @@ TONE_MODIFIERS = {
|
|
25 |
"Casual": "Make this sound casual: "
|
26 |
}
|
27 |
|
|
|
|
|
|
|
|
|
|
|
28 |
loaded_models = {}
|
29 |
|
30 |
def load_model(model_name):
|
@@ -46,11 +52,11 @@ def translate(text, direction, tone):
|
|
46 |
expected_src = direction.split(" β ")[0].lower()
|
47 |
warning = ""
|
48 |
if expected_src.startswith("english") and detected_lang != "en":
|
49 |
-
warning = f"
|
50 |
elif expected_src.startswith("french") and detected_lang != "fr":
|
51 |
-
warning = f"
|
52 |
elif expected_src.startswith("swahili") and detected_lang != "sw":
|
53 |
-
warning = f"
|
54 |
|
55 |
prompt = TONE_MODIFIERS[tone] + text
|
56 |
model_info = MODEL_MAP[direction]
|
@@ -77,13 +83,9 @@ def translate(text, direction, tone):
|
|
77 |
|
78 |
return f"{warning}\n{translation}" if warning else translation
|
79 |
|
80 |
-
|
81 |
-
def tts_via_api(text):
|
82 |
-
import base64
|
83 |
-
from io import BytesIO
|
84 |
-
|
85 |
api_key = os.getenv("ELEVENLABS_API_KEY")
|
86 |
-
voice_id = "21m00Tcm4TlvDq8ikWAM"
|
87 |
|
88 |
if not api_key:
|
89 |
return None
|
@@ -108,25 +110,27 @@ def tts_via_api(text):
|
|
108 |
response = requests.post(url, headers=headers, json=payload)
|
109 |
|
110 |
if response.status_code == 200:
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
113 |
else:
|
114 |
print("TTS API Error:", response.status_code, response.text)
|
115 |
return None
|
116 |
|
117 |
-
|
118 |
def transcribe_and_translate(audio_path, direction, tone):
|
119 |
-
import speech_recognition as sr
|
120 |
recognizer = sr.Recognizer()
|
121 |
try:
|
122 |
with sr.AudioFile(audio_path) as source:
|
123 |
audio = recognizer.record(source)
|
124 |
if len(audio.frame_data) < 10000:
|
125 |
-
return "
|
126 |
text = recognizer.recognize_google(audio)
|
127 |
return translate(text, direction, tone)
|
128 |
except Exception as e:
|
129 |
-
return f"
|
130 |
|
131 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
132 |
gr.Markdown("## π EAC Translator")
|
@@ -138,17 +142,19 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
138 |
input_text = gr.Textbox(label="Text to Translate", lines=3)
|
139 |
direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β Swahili")
|
140 |
tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
|
|
|
141 |
output_text = gr.Textbox(label="Translated Text", lines=3)
|
142 |
with gr.Row():
|
143 |
translate_btn = gr.Button("Translate", scale=1)
|
144 |
speak_btn = gr.Button("π Speak Translation", scale=1)
|
145 |
audio_output = gr.Audio(label="Playback", interactive=False)
|
146 |
|
147 |
-
with gr.Tab("
|
148 |
with gr.Column():
|
149 |
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
|
150 |
direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β Swahili")
|
151 |
tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
|
|
|
152 |
voice_output = gr.Textbox(label="Translated Text")
|
153 |
with gr.Row():
|
154 |
voice_translate_btn = gr.Button("Transcribe & Translate", scale=1)
|
@@ -156,14 +162,14 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
156 |
audio_output2 = gr.Audio(label="Playback", interactive=False)
|
157 |
|
158 |
translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
|
159 |
-
speak_btn.click(fn=tts_via_api, inputs=[output_text], outputs=audio_output)
|
160 |
voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
|
161 |
-
voice_speak_btn.click(fn=tts_via_api, inputs=[voice_output], outputs=audio_output2)
|
162 |
|
163 |
gr.Markdown(
|
164 |
"""<div style='text-align: center;'>
|
165 |
<a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'>
|
166 |
-
|
167 |
</a>
|
168 |
</div>""",
|
169 |
elem_id="footer"
|
|
|
4 |
import langid
|
5 |
import os
|
6 |
import requests
|
7 |
+
from io import BytesIO
|
8 |
+
from pydub import AudioSegment
|
9 |
+
import speech_recognition as sr
|
10 |
import warnings
|
11 |
|
12 |
warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
|
|
|
26 |
"Casual": "Make this sound casual: "
|
27 |
}
|
28 |
|
29 |
+
VOICE_IDS = {
|
30 |
+
"Rachel (Female)": "21m00Tcm4TlvDq8ikWAM",
|
31 |
+
"Adam (Male)": "pNInz6obpgDQGcFmaJgB"
|
32 |
+
}
|
33 |
+
|
34 |
loaded_models = {}
|
35 |
|
36 |
def load_model(model_name):
|
|
|
52 |
expected_src = direction.split(" β ")[0].lower()
|
53 |
warning = ""
|
54 |
if expected_src.startswith("english") and detected_lang != "en":
|
55 |
+
warning = f"β οΈ Detected language is '{detected_lang}', but you selected English as source."
|
56 |
elif expected_src.startswith("french") and detected_lang != "fr":
|
57 |
+
warning = f"β οΈ Detected language is '{detected_lang}', but you selected French as source."
|
58 |
elif expected_src.startswith("swahili") and detected_lang != "sw":
|
59 |
+
warning = f"β οΈ Detected language is '{detected_lang}', but you selected Swahili as source."
|
60 |
|
61 |
prompt = TONE_MODIFIERS[tone] + text
|
62 |
model_info = MODEL_MAP[direction]
|
|
|
83 |
|
84 |
return f"{warning}\n{translation}" if warning else translation
|
85 |
|
86 |
+
def tts_via_api(text, voice_choice):
|
|
|
|
|
|
|
|
|
87 |
api_key = os.getenv("ELEVENLABS_API_KEY")
|
88 |
+
voice_id = VOICE_IDS.get(voice_choice, "21m00Tcm4TlvDq8ikWAM")
|
89 |
|
90 |
if not api_key:
|
91 |
return None
|
|
|
110 |
response = requests.post(url, headers=headers, json=payload)
|
111 |
|
112 |
if response.status_code == 200:
|
113 |
+
mp3_audio = BytesIO(response.content)
|
114 |
+
audio = AudioSegment.from_file(mp3_audio, format="mp3")
|
115 |
+
wav_io = BytesIO()
|
116 |
+
audio.export(wav_io, format="wav")
|
117 |
+
wav_io.seek(0)
|
118 |
+
return (wav_io, "audio/wav")
|
119 |
else:
|
120 |
print("TTS API Error:", response.status_code, response.text)
|
121 |
return None
|
122 |
|
|
|
123 |
def transcribe_and_translate(audio_path, direction, tone):
|
|
|
124 |
recognizer = sr.Recognizer()
|
125 |
try:
|
126 |
with sr.AudioFile(audio_path) as source:
|
127 |
audio = recognizer.record(source)
|
128 |
if len(audio.frame_data) < 10000:
|
129 |
+
return "β οΈ Audio too short or empty. Please try again."
|
130 |
text = recognizer.recognize_google(audio)
|
131 |
return translate(text, direction, tone)
|
132 |
except Exception as e:
|
133 |
+
return f"β οΈ Could not transcribe audio: {e}"
|
134 |
|
135 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
136 |
gr.Markdown("## π EAC Translator")
|
|
|
142 |
input_text = gr.Textbox(label="Text to Translate", lines=3)
|
143 |
direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β Swahili")
|
144 |
tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
|
145 |
+
voice_select = gr.Dropdown(choices=list(VOICE_IDS.keys()), label="Voice", value="Rachel (Female)")
|
146 |
output_text = gr.Textbox(label="Translated Text", lines=3)
|
147 |
with gr.Row():
|
148 |
translate_btn = gr.Button("Translate", scale=1)
|
149 |
speak_btn = gr.Button("π Speak Translation", scale=1)
|
150 |
audio_output = gr.Audio(label="Playback", interactive=False)
|
151 |
|
152 |
+
with gr.Tab("ποΈ Voice Translation"):
|
153 |
with gr.Column():
|
154 |
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
|
155 |
direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β Swahili")
|
156 |
tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
|
157 |
+
voice_select2 = gr.Dropdown(choices=list(VOICE_IDS.keys()), label="Voice", value="Rachel (Female)")
|
158 |
voice_output = gr.Textbox(label="Translated Text")
|
159 |
with gr.Row():
|
160 |
voice_translate_btn = gr.Button("Transcribe & Translate", scale=1)
|
|
|
162 |
audio_output2 = gr.Audio(label="Playback", interactive=False)
|
163 |
|
164 |
translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
|
165 |
+
speak_btn.click(fn=tts_via_api, inputs=[output_text, voice_select], outputs=audio_output)
|
166 |
voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
|
167 |
+
voice_speak_btn.click(fn=tts_via_api, inputs=[voice_output, voice_select2], outputs=audio_output2)
|
168 |
|
169 |
gr.Markdown(
|
170 |
"""<div style='text-align: center;'>
|
171 |
<a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'>
|
172 |
+
Built with β€οΈ by Eng. Jobbers β Qtrinova Inc
|
173 |
</a>
|
174 |
</div>""",
|
175 |
elem_id="footer"
|