Spaces:
Building
Building
import gradio as gr | |
from transformers import MarianMTModel, MarianTokenizer | |
from datetime import datetime | |
import langid | |
import os | |
import requests | |
from io import BytesIO | |
from pydub import AudioSegment | |
import speech_recognition as sr | |
import warnings | |
warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.") | |
langid.set_languages(['en', 'fr', 'sw']) | |
MODEL_MAP = { | |
"English β Swahili": "Helsinki-NLP/opus-mt-en-sw", | |
"English β French": "Helsinki-NLP/opus-mt-en-fr", | |
"French β English": "Helsinki-NLP/opus-mt-fr-en", | |
"French β Swahili (via English)": ["Helsinki-NLP/opus-mt-fr-en", "Helsinki-NLP/opus-mt-en-sw"] | |
} | |
TONE_MODIFIERS = { | |
"Neutral": "", | |
"Romantic": "Express this romantically: ", | |
"Formal": "Translate this in a formal tone: ", | |
"Casual": "Make this sound casual: " | |
} | |
VOICE_IDS = { | |
"Rachel (Female)": "21m00Tcm4TlvDq8ikWAM", | |
"Adam (Male)": "pNInz6obpgDQGcFmaJgB" | |
} | |
loaded_models = {} | |
def load_model(model_name): | |
if model_name not in loaded_models: | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
loaded_models[model_name] = (tokenizer, model) | |
return loaded_models[model_name] | |
def detect_language(text): | |
try: | |
lang, score = langid.classify(text) | |
return lang | |
except: | |
return "unknown" | |
def translate(text, direction, tone): | |
detected_lang = detect_language(text) | |
expected_src = direction.split(" β ")[0].lower() | |
warning = "" | |
if expected_src.startswith("english") and detected_lang != "en": | |
warning = f"β οΈ Detected language is '{detected_lang}', but you selected English as source." | |
elif expected_src.startswith("french") and detected_lang != "fr": | |
warning = f"β οΈ Detected language is '{detected_lang}', but you selected French as source." | |
elif expected_src.startswith("swahili") and detected_lang != "sw": | |
warning = f"β οΈ Detected language is '{detected_lang}', but you selected Swahili as source." | |
prompt = TONE_MODIFIERS[tone] + text | |
model_info = MODEL_MAP[direction] | |
if isinstance(model_info, list): | |
tokenizer1, model1 = load_model(model_info[0]) | |
encoded1 = tokenizer1(prompt, return_tensors="pt", padding=True, truncation=True) | |
intermediate = model1.generate(**encoded1) | |
intermediate_text = tokenizer1.decode(intermediate[0], skip_special_tokens=True) | |
tokenizer2, model2 = load_model(model_info[1]) | |
encoded2 = tokenizer2(intermediate_text, return_tensors="pt", padding=True, truncation=True) | |
final = model2.generate(**encoded2) | |
translation = tokenizer2.decode(final[0], skip_special_tokens=True) | |
else: | |
tokenizer, model = load_model(model_info) | |
encoded = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True) | |
generated = model.generate(**encoded) | |
translation = tokenizer.decode(generated[0], skip_special_tokens=True) | |
with open("translation_log.txt", "a", encoding="utf-8") as f: | |
f.write(f"[{datetime.now()}] {direction} | Tone: {tone}\n") | |
f.write(f"Input: {text}\nOutput: {translation}\n\n") | |
return f"{warning}\n{translation}" if warning else translation | |
def tts_via_api(text, voice_choice): | |
api_key = os.getenv("ELEVENLABS_API_KEY") | |
voice_id = VOICE_IDS.get(voice_choice, "21m00Tcm4TlvDq8ikWAM") | |
if not api_key: | |
return None | |
url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}" | |
headers = { | |
"xi-api-key": api_key, | |
"Content-Type": "application/json", | |
"accept": "audio/mpeg" | |
} | |
payload = { | |
"text": text, | |
"model_id": "eleven_monolingual_v1", | |
"voice_settings": { | |
"stability": 0.5, | |
"similarity_boost": 0.75 | |
} | |
} | |
response = requests.post(url, headers=headers, json=payload) | |
if response.status_code == 200: | |
mp3_audio = BytesIO(response.content) | |
audio = AudioSegment.from_file(mp3_audio, format="mp3") | |
wav_io = BytesIO() | |
audio.export(wav_io, format="wav") | |
wav_io.seek(0) | |
return (wav_io.read(), "audio/wav") | |
else: | |
print("TTS API Error:", response.status_code, response.text) | |
return None | |
def transcribe_and_translate(audio_path, direction, tone): | |
recognizer = sr.Recognizer() | |
try: | |
with sr.AudioFile(audio_path) as source: | |
audio = recognizer.record(source) | |
if len(audio.frame_data) < 10000: | |
return "β οΈ Audio too short or empty. Please try again." | |
text = recognizer.recognize_google(audio) | |
return translate(text, direction, tone) | |
except Exception as e: | |
return f"β οΈ Could not transcribe audio: {e}" | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown("## π EAC Translator") | |
gr.Markdown("Supports English, French, and Swahili. Includes tone control, language detection, voice input, and speech playback.") | |
with gr.Tabs(): | |
with gr.Tab("π Text Translation"): | |
input_text = gr.Textbox(label="Text to Translate", lines=3) | |
direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β Swahili") | |
tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral") | |
output_text = gr.Textbox(label="Translated Text", lines=3) | |
voice_select = gr.Dropdown(choices=list(VOICE_IDS.keys()), label="Voice", value="Rachel (Female)") | |
translate_btn = gr.Button("Translate") | |
speak_btn = gr.Button("π Speak Translation") | |
audio_output = gr.Audio(label="Playback", interactive=False) | |
with gr.Tab("ποΈ Voice Translation"): | |
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now") | |
direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β Swahili") | |
tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral") | |
voice_output = gr.Textbox(label="Translated Text") | |
voice_select2 = gr.Dropdown(choices=list(VOICE_IDS.keys()), label="Voice", value="Rachel (Female)") | |
voice_translate_btn = gr.Button("Transcribe & Translate") | |
voice_speak_btn = gr.Button("π Speak Translation") | |
audio_output2 = gr.Audio(label="Playback", interactive=False) | |
translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text) | |
speak_btn.click(fn=tts_via_api, inputs=[output_text, voice_select], outputs=audio_output) | |
voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output) | |
voice_speak_btn.click(fn=tts_via_api, inputs=[voice_output, voice_select2], outputs=audio_output2) | |
gr.Markdown( | |
"""<div style='text-align: center;'> | |
<a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'> | |
Built with β€οΈ by Eng. Jobbers β Qtrinova Inc | |
</a> | |
</div>""", | |
elem_id="footer" | |
) | |
demo.launch() |