Spaces:

Athspi
/

Ai-audio

Sleeping

App Files Files Community

Athspi commited on Jan 11

Commit

bda7faf

verified ·

1 Parent(s): ae667ab

Update app.py

Browse files

Files changed (1) hide show

app.py +125 -15

app.py CHANGED Viewed

@@ -17,7 +17,7 @@ def split_audio(filepath, chunk_length_ms=30000):
         chunks.append(chunk_path)
     return chunks
-def transcribe_audio(audio_file):
     # Split the audio into chunks
     chunks = split_audio(audio_file)
@@ -26,33 +26,143 @@ def transcribe_audio(audio_file):
     detected_language = None
     for chunk in chunks:
-        # Transcribe the chunk and detect the language
-        result = model.transcribe(chunk, fp16=False)  # Set fp16=False if not using GPU
-        transcriptions.append(result["text"])
-        # Extract detected language from the result
-        if detected_language is None and "language" in result:
-            detected_language = result["language"]
         os.remove(chunk)  # Clean up chunk files
     # Combine all transcriptions into one
     full_transcription = " ".join(transcriptions)
-    # If no language was detected, set a default message
-    if detected_language is None:
-        detected_language = "unknown (language not detected)"
     # Return transcription and detected language
     return f"Detected Language: {detected_language}\n\nTranscription:\n{full_transcription}"
 # Define the Gradio interface
 iface = gr.Interface(
     fn=transcribe_audio,
-    inputs=gr.Audio(type="filepath", label="Upload Audio File"),
     outputs=gr.Textbox(label="Transcription and Detected Language"),
-    title="Audio Transcription with Automatic Language Detection",
-    description="Upload an audio file, and the system will automatically detect the language and transcribe it."
 )
 # Launch the Gradio interface

         chunks.append(chunk_path)
     return chunks
+def transcribe_audio(audio_file, language="Auto Detect"):
     # Split the audio into chunks
     chunks = split_audio(audio_file)
     detected_language = None
     for chunk in chunks:
+        # If language is "Auto Detect", let Whisper detect the language
+        if language == "Auto Detect":
+            result = model.transcribe(chunk, fp16=False)  # Set fp16=False if not using GPU
+            detected_language = result.get("language", "unknown")
+        else:
+            # Use the user-selected language for transcription
+            language_code = LANGUAGE_NAME_TO_CODE.get(language, "en")  # Default to English if not found
+            result = model.transcribe(chunk, language=language_code, fp16=False)
+            detected_language = language_code
+        transcriptions.append(result["text"])
         os.remove(chunk)  # Clean up chunk files
     # Combine all transcriptions into one
     full_transcription = " ".join(transcriptions)
     # Return transcription and detected language
     return f"Detected Language: {detected_language}\n\nTranscription:\n{full_transcription}"
+# Mapping of full language names to language codes
+LANGUAGE_NAME_TO_CODE = {
+    "Auto Detect": "Auto Detect",
+    "English": "en",
+    "Chinese": "zh",
+    "German": "de",
+    "Spanish": "es",
+    "Russian": "ru",
+    "Korean": "ko",
+    "French": "fr",
+    "Japanese": "ja",
+    "Portuguese": "pt",
+    "Turkish": "tr",
+    "Polish": "pl",
+    "Catalan": "ca",
+    "Dutch": "nl",
+    "Arabic": "ar",
+    "Swedish": "sv",
+    "Italian": "it",
+    "Indonesian": "id",
+    "Hindi": "hi",
+    "Finnish": "fi",
+    "Vietnamese": "vi",
+    "Hebrew": "he",
+    "Ukrainian": "uk",
+    "Greek": "el",
+    "Malay": "ms",
+    "Czech": "cs",
+    "Romanian": "ro",
+    "Danish": "da",
+    "Hungarian": "hu",
+    "Tamil": "ta",
+    "Norwegian": "no",
+    "Thai": "th",
+    "Urdu": "ur",
+    "Croatian": "hr",
+    "Bulgarian": "bg",
+    "Lithuanian": "lt",
+    "Latin": "la",
+    "Maori": "mi",
+    "Malayalam": "ml",
+    "Welsh": "cy",
+    "Slovak": "sk",
+    "Telugu": "te",
+    "Persian": "fa",
+    "Latvian": "lv",
+    "Bengali": "bn",
+    "Serbian": "sr",
+    "Azerbaijani": "az",
+    "Slovenian": "sl",
+    "Kannada": "kn",
+    "Estonian": "et",
+    "Macedonian": "mk",
+    "Breton": "br",
+    "Basque": "eu",
+    "Icelandic": "is",
+    "Armenian": "hy",
+    "Nepali": "ne",
+    "Mongolian": "mn",
+    "Bosnian": "bs",
+    "Kazakh": "kk",
+    "Albanian": "sq",
+    "Swahili": "sw",
+    "Galician": "gl",
+    "Marathi": "mr",
+    "Punjabi": "pa",
+    "Sinhala": "si",  # Sinhala support
+    "Khmer": "km",
+    "Shona": "sn",
+    "Yoruba": "yo",
+    "Somali": "so",
+    "Afrikaans": "af",
+    "Occitan": "oc",
+    "Georgian": "ka",
+    "Belarusian": "be",
+    "Tajik": "tg",
+    "Sindhi": "sd",
+    "Gujarati": "gu",
+    "Amharic": "am",
+    "Yiddish": "yi",
+    "Lao": "lo",
+    "Uzbek": "uz",
+    "Faroese": "fo",
+    "Haitian Creole": "ht",
+    "Pashto": "ps",
+    "Turkmen": "tk",
+    "Nynorsk": "nn",
+    "Maltese": "mt",
+    "Sanskrit": "sa",
+    "Luxembourgish": "lb",
+    "Burmese": "my",
+    "Tibetan": "bo",
+    "Tagalog": "tl",
+    "Malagasy": "mg",
+    "Assamese": "as",
+    "Tatar": "tt",
+    "Hawaiian": "haw",
+    "Lingala": "ln",
+    "Hausa": "ha",
+    "Bashkir": "ba",
+    "Javanese": "jw",
+    "Sundanese": "su",
+}
 # Define the Gradio interface
 iface = gr.Interface(
     fn=transcribe_audio,
+    inputs=[
+        gr.Audio(type="filepath", label="Upload Audio File"),
+        gr.Dropdown(
+            choices=list(LANGUAGE_NAME_TO_CODE.keys()),  # Full language names
+            label="Select Language",
+            value="Auto Detect"
+        )
+    ],
     outputs=gr.Textbox(label="Transcription and Detected Language"),
+    title="Audio Transcription with Language Selection",
+    description="Upload an audio file and select a language (or choose 'Auto Detect')."
 )
 # Launch the Gradio interface