MusIre commited on
Commit
7634d42
·
1 Parent(s): 81137f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -12,15 +12,19 @@ import torch
12
  processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-italian")
13
  model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-italian")
14
 
 
 
 
 
15
  # Function to perform ASR on audio data
16
- def transcribe_audio(audio_data):
17
- print("Received audio data:", audio_data) # Debug print
18
 
19
  # Check if audio_data is None or not a tuple of length 2
20
- if audio_data is None or not isinstance(audio_data, tuple) or len(audio_data) != 2:
21
  return "Invalid audio data format."
22
 
23
- sample_rate, waveform = audio_data
24
 
25
  # Check if waveform is None or not a NumPy array
26
  if waveform is None or not isinstance(waveform, torch.Tensor):
@@ -29,10 +33,10 @@ def transcribe_audio(audio_data):
29
  try:
30
  # Convert audio data to mono and normalize
31
  audio_data = torchaudio.transforms.Resample(sample_rate, 100000)(waveform)
32
- audio_data = torchaudio.functional.gain(audio_data, gain_db=5.0)
33
 
34
  # Apply custom preprocessing to the audio data if needed
35
- input_values = processor(audio_data[0], return_tensors="pt").input_values
36
 
37
  # Perform ASR
38
  with torch.no_grad():
@@ -48,5 +52,5 @@ def transcribe_audio(audio_data):
48
  return f"An error occurred: {str(e)}"
49
 
50
  # Create Gradio interface
51
- audio_input = gr.Audio()
52
  gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs="text").launch()
 
12
  processor = Wav2Vec2Processor.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-italian")
13
  model = Wav2Vec2ForCTC.from_pretrained("jonatasgrosman/wav2vec2-large-xlsr-53-italian")
14
 
15
+ def preprocess_audio(audio_data):
16
+ # Apply any custom preprocessing to the audio data here if needed
17
+ return processor(audio_data, return_tensors="pt").input_features
18
+
19
  # Function to perform ASR on audio data
20
+ def transcribe_audio(input_features):
21
+ print("Received audio data:", input_features) # Debug print
22
 
23
  # Check if audio_data is None or not a tuple of length 2
24
+ if audio_data is None or not isinstance(input_features, tuple) or len(input_features) != 2:
25
  return "Invalid audio data format."
26
 
27
+ sample_rate, waveform = input_features
28
 
29
  # Check if waveform is None or not a NumPy array
30
  if waveform is None or not isinstance(waveform, torch.Tensor):
 
33
  try:
34
  # Convert audio data to mono and normalize
35
  audio_data = torchaudio.transforms.Resample(sample_rate, 100000)(waveform)
36
+ audio_data = torchaudio.functional.gain(input_features, gain_db=5.0)
37
 
38
  # Apply custom preprocessing to the audio data if needed
39
+ input_values = processor(input_features[0], return_tensors="pt").input_values
40
 
41
  # Perform ASR
42
  with torch.no_grad():
 
52
  return f"An error occurred: {str(e)}"
53
 
54
  # Create Gradio interface
55
+ audio_input = gr.Audio(sources=["microphone"])
56
  gr.Interface(fn=transcribe_audio, inputs=audio_input, outputs="text").launch()