Athspi commited on
Commit
413a70d
·
verified ·
1 Parent(s): 241214a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -16
app.py CHANGED
@@ -5,13 +5,19 @@ import google.generativeai as genai
5
  from gtts import gTTS, lang
6
  import tempfile
7
 
8
- # Configure Gemini API (replace with your API key or use environment variable)
9
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_GEMINI_API_KEY_HERE")
 
 
10
  genai.configure(api_key=GEMINI_API_KEY)
11
 
12
- # Initialize the faster-whisper model
13
  model_size = "Systran/faster-whisper-large-v3"
14
- whisper_model = WhisperModel(model_size, device="auto", compute_type="float16")
 
 
 
 
15
 
16
  # Function to transcribe audio using faster-whisper
17
  def transcribe_audio(audio_file):
@@ -27,7 +33,6 @@ def transcribe_audio(audio_file):
27
  def translate_text(text, target_language):
28
  try:
29
  model = genai.GenerativeModel("gemini-1.5-flash")
30
- # Magic prompt to ensure only translated text is returned
31
  prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{text}"
32
  response = model.generate_content(prompt)
33
  translated_text = response.text.strip()
@@ -38,9 +43,7 @@ def translate_text(text, target_language):
38
  # Function to convert text to speech using gTTS with full language support
39
  def text_to_speech(text, language):
40
  try:
41
- # Get all supported languages from gTTS
42
  lang_map = lang.tts_langs()
43
- # Use the language code directly if supported, otherwise default to 'en'
44
  tts_lang = language.lower() if language.lower() in lang_map else "en"
45
  tts = gTTS(text=text, lang=tts_lang, slow=False)
46
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
@@ -51,20 +54,18 @@ def text_to_speech(text, language):
51
 
52
  # Main function to process audio input and return outputs
53
  def process_audio(audio_file, target_language):
54
- # Step 1: Transcribe audio
 
 
55
  transcription, detected_language, error = transcribe_audio(audio_file)
56
  if error:
57
  return error, None, None, None
58
 
59
- # Step 2: Translate transcription
60
  translated_text, error = translate_text(transcription, target_language)
61
  if error:
62
  return error, transcription, None, None
63
 
64
- # Step 3: Convert translated text to speech
65
- # Map target language name to gTTS language code
66
  lang_map = lang.tts_langs()
67
- # Convert target_language to lowercase keys as in lang_map
68
  lang_key = next((k for k, v in lang_map.items() if v.lower() == target_language.lower()), "en")
69
  audio_output, error = text_to_speech(translated_text, lang_key)
70
  if error:
@@ -75,11 +76,10 @@ def process_audio(audio_file, target_language):
75
  # Gradio interface
76
  with gr.Blocks(title="AI Audio Translator") as demo:
77
  gr.Markdown("# AI Audio Translator")
78
- gr.Markdown("Upload an audio file, select a target language, and get the transcription, translation, and translated audio!")
79
 
80
- # Get all supported languages from gTTS
81
- supported_langs = {v: k for k, v in lang.tts_langs().items()} # {name: code}
82
- language_choices = list(supported_langs.keys()) # List of language names
83
 
84
  with gr.Row():
85
  audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio")
 
5
  from gtts import gTTS, lang
6
  import tempfile
7
 
8
+ # Configure Gemini API (use environment variable for Hugging Face Spaces)
9
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
10
+ if not GEMINI_API_KEY:
11
+ raise ValueError("GEMINI_API_KEY environment variable not set. Please set it in the Hugging Face Spaces Secrets.")
12
  genai.configure(api_key=GEMINI_API_KEY)
13
 
14
+ # Initialize the faster-whisper model with fallback compute type
15
  model_size = "Systran/faster-whisper-large-v3"
16
+ try:
17
+ whisper_model = WhisperModel(model_size, device="auto", compute_type="float16")
18
+ except ValueError:
19
+ print("Float16 not supported, falling back to int8 on CPU")
20
+ whisper_model = WhisperModel(model_size, device="cpu", compute_type="int8")
21
 
22
  # Function to transcribe audio using faster-whisper
23
  def transcribe_audio(audio_file):
 
33
  def translate_text(text, target_language):
34
  try:
35
  model = genai.GenerativeModel("gemini-1.5-flash")
 
36
  prompt = f"Translate the following text to {target_language} and return only the translated text with no additional explanation or commentary:\n\n{text}"
37
  response = model.generate_content(prompt)
38
  translated_text = response.text.strip()
 
43
  # Function to convert text to speech using gTTS with full language support
44
  def text_to_speech(text, language):
45
  try:
 
46
  lang_map = lang.tts_langs()
 
47
  tts_lang = language.lower() if language.lower() in lang_map else "en"
48
  tts = gTTS(text=text, lang=tts_lang, slow=False)
49
  with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as fp:
 
54
 
55
  # Main function to process audio input and return outputs
56
  def process_audio(audio_file, target_language):
57
+ if audio_file is None:
58
+ return "Please upload an audio file or record audio.", None, None, None
59
+
60
  transcription, detected_language, error = transcribe_audio(audio_file)
61
  if error:
62
  return error, None, None, None
63
 
 
64
  translated_text, error = translate_text(transcription, target_language)
65
  if error:
66
  return error, transcription, None, None
67
 
 
 
68
  lang_map = lang.tts_langs()
 
69
  lang_key = next((k for k, v in lang_map.items() if v.lower() == target_language.lower()), "en")
70
  audio_output, error = text_to_speech(translated_text, lang_key)
71
  if error:
 
76
  # Gradio interface
77
  with gr.Blocks(title="AI Audio Translator") as demo:
78
  gr.Markdown("# AI Audio Translator")
79
+ gr.Markdown("Upload an audio file or record via microphone, select a target language, and get the transcription, translation, and translated audio!")
80
 
81
+ supported_langs = {v: k for k, v in lang.tts_langs().items()}
82
+ language_choices = list(supported_langs.keys())
 
83
 
84
  with gr.Row():
85
  audio_input = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Input Audio")