MusIre commited on
Commit
638cf1e
·
1 Parent(s): d2a0f58

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -27
app.py CHANGED
@@ -16,38 +16,15 @@ forced_decoder_ids = processor.get_decoder_prompt_ids(language="italian", task="
16
 
17
  # Custom preprocessing function
18
  def preprocess_audio(audio_data, sampling_rate=16_000):
19
- print(type(audio_data))
20
- print(audio_data)
21
- raw_speech = np.asarray(audio_data, dtype=np.float32)
22
-
23
- # Pad or truncate the audio data to the required length
24
- if len(raw_speech) > processor.feature_extractor.max_len:
25
- raw_speech = raw_speech[:processor.feature_extractor.max_len]
26
- else:
27
- raw_speech = np.pad(raw_speech, (0, processor.feature_extractor.max_len - len(raw_speech)))
28
-
29
- # Process the audio data using the Whisper processor
30
- processed_data = processor(
31
- raw_speech,
32
- sampling_rate=sampling_rate,
33
- return_tensors="pt",
34
- padding=True,
35
- truncation=True
36
- )
37
-
38
- return processed_data.input_features
39
 
40
  # Function to perform ASR on audio data
41
  def transcribe_audio(audio_data):
42
- # Preprocess the audio data
43
  input_features = preprocess_audio(audio_data)
44
-
45
- # Generate token ids
46
- predicted_ids = model.generate(input_features)
47
-
48
- # Decode token ids to text
49
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
50
-
51
  return transcription[0]
52
 
53
  # Create Gradio interface
 
16
 
17
  # Custom preprocessing function
18
  def preprocess_audio(audio_data, sampling_rate=16_000):
19
+ sample_rate, raw_audio = audio_data
20
+ raw_speech = np.asarray(raw_audio, dtype=np.float32)
21
+ return {"input_values": raw_speech, "sampling_rate": sample_rate}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  # Function to perform ASR on audio data
24
  def transcribe_audio(audio_data):
 
25
  input_features = preprocess_audio(audio_data)
26
+ predicted_ids = model.generate(input_features["input_values"])
 
 
 
 
27
  transcription = processor.batch_decode(predicted_ids, skip_special_tokens=True)
 
28
  return transcription[0]
29
 
30
  # Create Gradio interface