Athspi commited on
Commit
1a0ef3f
·
verified ·
1 Parent(s): 19bb2e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -8
app.py CHANGED
@@ -1,22 +1,45 @@
1
  import gradio as gr
2
  from transformers import pipeline
 
 
3
 
4
- # Load the Whisper model for speech recognition
5
- model = pipeline("automatic-speech-recognition", model="openai/whisper-medium")
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def transcribe_audio(audio_file):
8
- # Transcribe the audio file and automatically detect the language
9
- transcription = model(audio_file)
10
- return transcription["text"]
 
 
 
 
 
 
 
 
 
 
11
 
12
  # Define the Gradio interface
13
  iface = gr.Interface(
14
  fn=transcribe_audio,
15
  inputs=gr.Audio(type="filepath", label="Upload Audio File"),
16
  outputs=gr.Textbox(label="Transcription"),
17
- title="Automatic Language Detection & Audio Transcription",
18
- description="Upload an audio file, and the system will automatically detect the language and transcribe it."
19
  )
20
 
21
  # Launch the Gradio interface
22
- iface.launch()
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ from pydub import AudioSegment
4
+ import os
5
 
6
+ # Load a smaller Whisper model for faster transcription
7
+ model = pipeline("automatic-speech-recognition", model="openai/whisper-base")
8
+
9
+ def split_audio(filepath, chunk_length_ms=30000):
10
+ """Split audio into chunks of `chunk_length_ms` milliseconds."""
11
+ audio = AudioSegment.from_file(filepath)
12
+ chunks = []
13
+ for i in range(0, len(audio), chunk_length_ms):
14
+ chunk = audio[i:i + chunk_length_ms]
15
+ chunk_path = f"chunk_{i}.wav"
16
+ chunk.export(chunk_path, format="wav")
17
+ chunks.append(chunk_path)
18
+ return chunks
19
 
20
  def transcribe_audio(audio_file):
21
+ # Split the audio into chunks
22
+ chunks = split_audio(audio_file)
23
+
24
+ # Transcribe each chunk
25
+ transcriptions = []
26
+ for chunk in chunks:
27
+ result = model(chunk)
28
+ transcriptions.append(result["text"])
29
+ os.remove(chunk) # Clean up chunk files
30
+
31
+ # Combine all transcriptions into one
32
+ full_transcription = " ".join(transcriptions)
33
+ return full_transcription
34
 
35
  # Define the Gradio interface
36
  iface = gr.Interface(
37
  fn=transcribe_audio,
38
  inputs=gr.Audio(type="filepath", label="Upload Audio File"),
39
  outputs=gr.Textbox(label="Transcription"),
40
+ title="Fast Audio Transcription with Automatic Language Detection",
41
+ description="Upload an audio file, and the system will automatically detect the language and transcribe it quickly."
42
  )
43
 
44
  # Launch the Gradio interface
45
+ iface.launch(share=True, queue=True)