Athspi commited on
Commit
bda7faf
·
verified ·
1 Parent(s): ae667ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +125 -15
app.py CHANGED
@@ -17,7 +17,7 @@ def split_audio(filepath, chunk_length_ms=30000):
17
  chunks.append(chunk_path)
18
  return chunks
19
 
20
- def transcribe_audio(audio_file):
21
  # Split the audio into chunks
22
  chunks = split_audio(audio_file)
23
 
@@ -26,33 +26,143 @@ def transcribe_audio(audio_file):
26
  detected_language = None
27
 
28
  for chunk in chunks:
29
- # Transcribe the chunk and detect the language
30
- result = model.transcribe(chunk, fp16=False) # Set fp16=False if not using GPU
31
- transcriptions.append(result["text"])
32
-
33
- # Extract detected language from the result
34
- if detected_language is None and "language" in result:
35
- detected_language = result["language"]
 
 
36
 
 
37
  os.remove(chunk) # Clean up chunk files
38
 
39
  # Combine all transcriptions into one
40
  full_transcription = " ".join(transcriptions)
41
 
42
- # If no language was detected, set a default message
43
- if detected_language is None:
44
- detected_language = "unknown (language not detected)"
45
-
46
  # Return transcription and detected language
47
  return f"Detected Language: {detected_language}\n\nTranscription:\n{full_transcription}"
48
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  # Define the Gradio interface
50
  iface = gr.Interface(
51
  fn=transcribe_audio,
52
- inputs=gr.Audio(type="filepath", label="Upload Audio File"),
 
 
 
 
 
 
 
53
  outputs=gr.Textbox(label="Transcription and Detected Language"),
54
- title="Audio Transcription with Automatic Language Detection",
55
- description="Upload an audio file, and the system will automatically detect the language and transcribe it."
56
  )
57
 
58
  # Launch the Gradio interface
 
17
  chunks.append(chunk_path)
18
  return chunks
19
 
20
+ def transcribe_audio(audio_file, language="Auto Detect"):
21
  # Split the audio into chunks
22
  chunks = split_audio(audio_file)
23
 
 
26
  detected_language = None
27
 
28
  for chunk in chunks:
29
+ # If language is "Auto Detect", let Whisper detect the language
30
+ if language == "Auto Detect":
31
+ result = model.transcribe(chunk, fp16=False) # Set fp16=False if not using GPU
32
+ detected_language = result.get("language", "unknown")
33
+ else:
34
+ # Use the user-selected language for transcription
35
+ language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
36
+ result = model.transcribe(chunk, language=language_code, fp16=False)
37
+ detected_language = language_code
38
 
39
+ transcriptions.append(result["text"])
40
  os.remove(chunk) # Clean up chunk files
41
 
42
  # Combine all transcriptions into one
43
  full_transcription = " ".join(transcriptions)
44
 
 
 
 
 
45
  # Return transcription and detected language
46
  return f"Detected Language: {detected_language}\n\nTranscription:\n{full_transcription}"
47
 
48
+ # Mapping of full language names to language codes
49
+ LANGUAGE_NAME_TO_CODE = {
50
+ "Auto Detect": "Auto Detect",
51
+ "English": "en",
52
+ "Chinese": "zh",
53
+ "German": "de",
54
+ "Spanish": "es",
55
+ "Russian": "ru",
56
+ "Korean": "ko",
57
+ "French": "fr",
58
+ "Japanese": "ja",
59
+ "Portuguese": "pt",
60
+ "Turkish": "tr",
61
+ "Polish": "pl",
62
+ "Catalan": "ca",
63
+ "Dutch": "nl",
64
+ "Arabic": "ar",
65
+ "Swedish": "sv",
66
+ "Italian": "it",
67
+ "Indonesian": "id",
68
+ "Hindi": "hi",
69
+ "Finnish": "fi",
70
+ "Vietnamese": "vi",
71
+ "Hebrew": "he",
72
+ "Ukrainian": "uk",
73
+ "Greek": "el",
74
+ "Malay": "ms",
75
+ "Czech": "cs",
76
+ "Romanian": "ro",
77
+ "Danish": "da",
78
+ "Hungarian": "hu",
79
+ "Tamil": "ta",
80
+ "Norwegian": "no",
81
+ "Thai": "th",
82
+ "Urdu": "ur",
83
+ "Croatian": "hr",
84
+ "Bulgarian": "bg",
85
+ "Lithuanian": "lt",
86
+ "Latin": "la",
87
+ "Maori": "mi",
88
+ "Malayalam": "ml",
89
+ "Welsh": "cy",
90
+ "Slovak": "sk",
91
+ "Telugu": "te",
92
+ "Persian": "fa",
93
+ "Latvian": "lv",
94
+ "Bengali": "bn",
95
+ "Serbian": "sr",
96
+ "Azerbaijani": "az",
97
+ "Slovenian": "sl",
98
+ "Kannada": "kn",
99
+ "Estonian": "et",
100
+ "Macedonian": "mk",
101
+ "Breton": "br",
102
+ "Basque": "eu",
103
+ "Icelandic": "is",
104
+ "Armenian": "hy",
105
+ "Nepali": "ne",
106
+ "Mongolian": "mn",
107
+ "Bosnian": "bs",
108
+ "Kazakh": "kk",
109
+ "Albanian": "sq",
110
+ "Swahili": "sw",
111
+ "Galician": "gl",
112
+ "Marathi": "mr",
113
+ "Punjabi": "pa",
114
+ "Sinhala": "si", # Sinhala support
115
+ "Khmer": "km",
116
+ "Shona": "sn",
117
+ "Yoruba": "yo",
118
+ "Somali": "so",
119
+ "Afrikaans": "af",
120
+ "Occitan": "oc",
121
+ "Georgian": "ka",
122
+ "Belarusian": "be",
123
+ "Tajik": "tg",
124
+ "Sindhi": "sd",
125
+ "Gujarati": "gu",
126
+ "Amharic": "am",
127
+ "Yiddish": "yi",
128
+ "Lao": "lo",
129
+ "Uzbek": "uz",
130
+ "Faroese": "fo",
131
+ "Haitian Creole": "ht",
132
+ "Pashto": "ps",
133
+ "Turkmen": "tk",
134
+ "Nynorsk": "nn",
135
+ "Maltese": "mt",
136
+ "Sanskrit": "sa",
137
+ "Luxembourgish": "lb",
138
+ "Burmese": "my",
139
+ "Tibetan": "bo",
140
+ "Tagalog": "tl",
141
+ "Malagasy": "mg",
142
+ "Assamese": "as",
143
+ "Tatar": "tt",
144
+ "Hawaiian": "haw",
145
+ "Lingala": "ln",
146
+ "Hausa": "ha",
147
+ "Bashkir": "ba",
148
+ "Javanese": "jw",
149
+ "Sundanese": "su",
150
+ }
151
+
152
  # Define the Gradio interface
153
  iface = gr.Interface(
154
  fn=transcribe_audio,
155
+ inputs=[
156
+ gr.Audio(type="filepath", label="Upload Audio File"),
157
+ gr.Dropdown(
158
+ choices=list(LANGUAGE_NAME_TO_CODE.keys()), # Full language names
159
+ label="Select Language",
160
+ value="Auto Detect"
161
+ )
162
+ ],
163
  outputs=gr.Textbox(label="Transcription and Detected Language"),
164
+ title="Audio Transcription with Language Selection",
165
+ description="Upload an audio file and select a language (or choose 'Auto Detect')."
166
  )
167
 
168
  # Launch the Gradio interface