CRIMJOBBERS commited on
Commit
b4191f7
Β·
verified Β·
1 Parent(s): b7e3328

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -32
app.py CHANGED
@@ -3,12 +3,12 @@ from transformers import MarianMTModel, MarianTokenizer
3
  from datetime import datetime
4
  import langid
5
  import os
6
- import pyttsx3
7
- import time
 
8
  import warnings
9
 
10
  warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
11
- os.environ["PATH"] += os.pathsep + r"C:\ffmpeg\bin"
12
  langid.set_languages(['en', 'fr', 'sw'])
13
 
14
  MODEL_MAP = {
@@ -46,11 +46,11 @@ def translate(text, direction, tone):
46
  expected_src = direction.split(" β†’ ")[0].lower()
47
  warning = ""
48
  if expected_src.startswith("english") and detected_lang != "en":
49
- warning = f"⚠️ Detected language is '{detected_lang}', but you selected English as source."
50
  elif expected_src.startswith("french") and detected_lang != "fr":
51
- warning = f"⚠️ Detected language is '{detected_lang}', but you selected French as source."
52
  elif expected_src.startswith("swahili") and detected_lang != "sw":
53
- warning = f"⚠️ Detected language is '{detected_lang}', but you selected Swahili as source."
54
 
55
  prompt = TONE_MODIFIERS[tone] + text
56
  model_info = MODEL_MAP[direction]
@@ -77,23 +77,31 @@ def translate(text, direction, tone):
77
 
78
  return f"{warning}\n{translation}" if warning else translation
79
 
80
- engine = pyttsx3.init()
81
- voices = engine.getProperty('voices')
82
- voice_names = [voice.name for voice in voices]
 
 
83
 
84
- def speak_text_to_file(text, voice_name):
85
- try:
86
- engine = pyttsx3.init()
87
- engine.setProperty('rate', 150)
88
- for voice in voices:
89
- if voice.name == voice_name:
90
- engine.setProperty('voice', voice.id)
91
- break
92
- output_path = "tts_output.wav"
93
- engine.save_to_file(text, output_path)
94
- engine.runAndWait()
95
- return output_path
96
- except Exception:
 
 
 
 
 
 
97
  return None
98
 
99
  def transcribe_and_translate(audio_path, direction, tone):
@@ -103,11 +111,11 @@ def transcribe_and_translate(audio_path, direction, tone):
103
  with sr.AudioFile(audio_path) as source:
104
  audio = recognizer.record(source)
105
  if len(audio.frame_data) < 10000:
106
- return "⚠️ Audio too short or empty. Please try again."
107
  text = recognizer.recognize_google(audio)
108
  return translate(text, direction, tone)
109
  except Exception as e:
110
- return f"⚠️ Could not transcribe audio: {e}"
111
 
112
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
113
  gr.Markdown("## 🌍 EAC Translator")
@@ -120,33 +128,31 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
120
  direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
121
  tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
122
  output_text = gr.Textbox(label="Translated Text", lines=3)
123
- voice_choice = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0])
124
  with gr.Row():
125
  translate_btn = gr.Button("Translate", scale=1)
126
- speak_btn = gr.Button("πŸ”Š Listen to Translation", scale=1)
127
  audio_output = gr.Audio(label="Playback", interactive=False)
128
 
129
- with gr.Tab("πŸŽ™οΈ Voice Translation"):
130
  with gr.Column():
131
  audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
132
  direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
133
  tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
134
  voice_output = gr.Textbox(label="Translated Text")
135
- voice_choice2 = gr.Dropdown(choices=voice_names, label="Voice for Playback", value=voice_names[0])
136
  with gr.Row():
137
  voice_translate_btn = gr.Button("Transcribe & Translate", scale=1)
138
- voice_speak_btn = gr.Button("πŸ”Š Listen to Translation", scale=1)
139
  audio_output2 = gr.Audio(label="Playback", interactive=False)
140
 
141
  translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
142
- speak_btn.click(fn=speak_text_to_file, inputs=[output_text, voice_choice], outputs=audio_output)
143
  voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
144
- voice_speak_btn.click(fn=speak_text_to_file, inputs=[voice_output, voice_choice2], outputs=audio_output2)
145
 
146
  gr.Markdown(
147
  """<div style='text-align: center;'>
148
  <a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'>
149
- By Eng. Jobbers – Qtrinova Inc. NLP❀️
150
  </a>
151
  </div>""",
152
  elem_id="footer"
 
3
  from datetime import datetime
4
  import langid
5
  import os
6
+ import requests
7
+ import base64
8
+ import tempfile
9
  import warnings
10
 
11
  warnings.filterwarnings("ignore", message="Recommended: pip install sacremoses.")
 
12
  langid.set_languages(['en', 'fr', 'sw'])
13
 
14
  MODEL_MAP = {
 
46
  expected_src = direction.split(" β†’ ")[0].lower()
47
  warning = ""
48
  if expected_src.startswith("english") and detected_lang != "en":
49
+ warning = f"⚠ Detected language is '{detected_lang}', but you selected English as source."
50
  elif expected_src.startswith("french") and detected_lang != "fr":
51
+ warning = f"⚠ Detected language is '{detected_lang}', but you selected French as source."
52
  elif expected_src.startswith("swahili") and detected_lang != "sw":
53
+ warning = f"⚠ Detected language is '{detected_lang}', but you selected Swahili as source."
54
 
55
  prompt = TONE_MODIFIERS[tone] + text
56
  model_info = MODEL_MAP[direction]
 
77
 
78
  return f"{warning}\n{translation}" if warning else translation
79
 
80
+ # TTS using Hugging Face Inference API
81
+ def tts_via_api(text):
82
+ api_token = os.getenv("HUGGINGFACEHUB_API_TOKEN")
83
+ if not api_token:
84
+ return None
85
 
86
+ headers = {
87
+ "Authorization": f"Bearer {api_token}"
88
+ }
89
+
90
+ payload = {
91
+ "inputs": text
92
+ }
93
+
94
+ response = requests.post(
95
+ "https://api-inference.huggingface.co/models/microsoft/speecht5_tts",
96
+ headers=headers,
97
+ json=payload
98
+ )
99
+
100
+ if response.status_code == 200:
101
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
102
+ tmp.write(response.content)
103
+ return tmp.name
104
+ else:
105
  return None
106
 
107
  def transcribe_and_translate(audio_path, direction, tone):
 
111
  with sr.AudioFile(audio_path) as source:
112
  audio = recognizer.record(source)
113
  if len(audio.frame_data) < 10000:
114
+ return "⚠ Audio too short or empty. Please try again."
115
  text = recognizer.recognize_google(audio)
116
  return translate(text, direction, tone)
117
  except Exception as e:
118
+ return f"⚠ Could not transcribe audio: {e}"
119
 
120
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
121
  gr.Markdown("## 🌍 EAC Translator")
 
128
  direction = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
129
  tone = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
130
  output_text = gr.Textbox(label="Translated Text", lines=3)
 
131
  with gr.Row():
132
  translate_btn = gr.Button("Translate", scale=1)
133
+ speak_btn = gr.Button("πŸ”Š Speak Translation", scale=1)
134
  audio_output = gr.Audio(label="Playback", interactive=False)
135
 
136
+ with gr.Tab("πŸŽ™ Voice Translation"):
137
  with gr.Column():
138
  audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Speak Now")
139
  direction_voice = gr.Dropdown(choices=list(MODEL_MAP.keys()), label="Translation Direction", value="English β†’ Swahili")
140
  tone_voice = gr.Radio(choices=list(TONE_MODIFIERS.keys()), label="Tone", value="Neutral")
141
  voice_output = gr.Textbox(label="Translated Text")
 
142
  with gr.Row():
143
  voice_translate_btn = gr.Button("Transcribe & Translate", scale=1)
144
+ voice_speak_btn = gr.Button("πŸ”Š Speak Translation", scale=1)
145
  audio_output2 = gr.Audio(label="Playback", interactive=False)
146
 
147
  translate_btn.click(fn=translate, inputs=[input_text, direction, tone], outputs=output_text)
148
+ speak_btn.click(fn=tts_via_api, inputs=[output_text], outputs=audio_output)
149
  voice_translate_btn.click(fn=transcribe_and_translate, inputs=[audio_input, direction_voice, tone_voice], outputs=voice_output)
150
+ voice_speak_btn.click(fn=tts_via_api, inputs=[voice_output], outputs=audio_output2)
151
 
152
  gr.Markdown(
153
  """<div style='text-align: center;'>
154
  <a href='https://eng-jobbers.vercel.app/' target='_blank' style='text-decoration: none; font-weight: bold;'>
155
+ By Eng. Jobbers – Qtrinova Inc. NLP❀
156
  </a>
157
  </div>""",
158
  elem_id="footer"