Athspi commited on
Commit
b995bb6
·
verified ·
1 Parent(s): bda7faf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -36
app.py CHANGED
@@ -3,47 +3,30 @@ import whisper
3
  import os
4
  from pydub import AudioSegment
5
 
6
- # Load the Whisper model
7
- model = whisper.load_model("base") # Use "base" for faster processing
8
-
9
- def split_audio(filepath, chunk_length_ms=30000):
10
- """Split audio into chunks of `chunk_length_ms` milliseconds."""
11
- audio = AudioSegment.from_file(filepath)
12
- chunks = []
13
- for i in range(0, len(audio), chunk_length_ms):
14
- chunk = audio[i:i + chunk_length_ms]
15
- chunk_path = f"chunk_{i}.wav"
16
- chunk.export(chunk_path, format="wav")
17
- chunks.append(chunk_path)
18
- return chunks
19
 
20
  def transcribe_audio(audio_file, language="Auto Detect"):
21
- # Split the audio into chunks
22
- chunks = split_audio(audio_file)
23
-
24
- # Transcribe each chunk and collect results
25
- transcriptions = []
26
- detected_language = None
27
 
28
- for chunk in chunks:
29
- # If language is "Auto Detect", let Whisper detect the language
30
- if language == "Auto Detect":
31
- result = model.transcribe(chunk, fp16=False) # Set fp16=False if not using GPU
32
- detected_language = result.get("language", "unknown")
33
- else:
34
- # Use the user-selected language for transcription
35
- language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
36
- result = model.transcribe(chunk, language=language_code, fp16=False)
37
- detected_language = language_code
38
-
39
- transcriptions.append(result["text"])
40
- os.remove(chunk) # Clean up chunk files
41
 
42
- # Combine all transcriptions into one
43
- full_transcription = " ".join(transcriptions)
44
 
45
  # Return transcription and detected language
46
- return f"Detected Language: {detected_language}\n\nTranscription:\n{full_transcription}"
47
 
48
  # Mapping of full language names to language codes
49
  LANGUAGE_NAME_TO_CODE = {
@@ -111,7 +94,7 @@ LANGUAGE_NAME_TO_CODE = {
111
  "Galician": "gl",
112
  "Marathi": "mr",
113
  "Punjabi": "pa",
114
- "Sinhala": "si", # Sinhala support
115
  "Khmer": "km",
116
  "Shona": "sn",
117
  "Yoruba": "yo",
 
3
  import os
4
  from pydub import AudioSegment
5
 
6
+ # Load a larger Whisper model for better accuracy
7
+ model = whisper.load_model("medium") # Use "medium" or "large" for better results
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  def transcribe_audio(audio_file, language="Auto Detect"):
10
+ # Convert audio to 16kHz mono for better compatibility with Whisper
11
+ audio = AudioSegment.from_file(audio_file)
12
+ audio = audio.set_frame_rate(16000).set_channels(1)
13
+ processed_audio_path = "processed_audio.wav"
14
+ audio.export(processed_audio_path, format="wav")
 
15
 
16
+ # Transcribe the audio
17
+ if language == "Auto Detect":
18
+ result = model.transcribe(processed_audio_path, fp16=False) # Auto-detect language
19
+ detected_language = result.get("language", "unknown")
20
+ else:
21
+ language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
22
+ result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
23
+ detected_language = language_code
 
 
 
 
 
24
 
25
+ # Clean up processed audio file
26
+ os.remove(processed_audio_path)
27
 
28
  # Return transcription and detected language
29
+ return f"Detected Language: {detected_language}\n\nTranscription:\n{result['text']}"
30
 
31
  # Mapping of full language names to language codes
32
  LANGUAGE_NAME_TO_CODE = {
 
94
  "Galician": "gl",
95
  "Marathi": "mr",
96
  "Punjabi": "pa",
97
+ "Sinhala": "si",
98
  "Khmer": "km",
99
  "Shona": "sn",
100
  "Yoruba": "yo",