bluenevus commited on
Commit
3bed379
·
verified ·
1 Parent(s): 71090a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -9,6 +9,7 @@ import io
9
  import os
10
  from bs4 import BeautifulSoup
11
  import re
 
12
 
13
  # Load the transcription model
14
  transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
@@ -36,8 +37,11 @@ def transcribe_audio(audio_bytes):
36
  waveform, sample_rate = torchaudio.load("temp_audio.wav")
37
  os.remove("temp_audio.wav")
38
 
 
 
 
39
  # Transcribe the audio
40
- result = transcription_pipeline(waveform, chunk_length_s=30)
41
  transcript = result['text']
42
 
43
  # Split transcript into paragraphs based on silence
@@ -47,10 +51,13 @@ def transcribe_audio(audio_bytes):
47
 
48
  for chunk in chunks:
49
  chunk.export("temp_chunk.wav", format="wav")
50
- waveform, sample_rate = torchaudio.load("temp_chunk.wav")
51
  os.remove("temp_chunk.wav")
52
 
53
- chunk_result = transcription_pipeline(waveform, chunk_length_s=30)
 
 
 
54
  chunk_transcript = chunk_result['text']
55
 
56
  if chunk_transcript:
 
9
  import os
10
  from bs4 import BeautifulSoup
11
  import re
12
+ import numpy as np
13
 
14
  # Load the transcription model
15
  transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
 
37
  waveform, sample_rate = torchaudio.load("temp_audio.wav")
38
  os.remove("temp_audio.wav")
39
 
40
+ # Convert torch.Tensor to numpy.ndarray
41
+ waveform_np = waveform.numpy().squeeze()
42
+
43
  # Transcribe the audio
44
+ result = transcription_pipeline(waveform_np, chunk_length_s=30)
45
  transcript = result['text']
46
 
47
  # Split transcript into paragraphs based on silence
 
51
 
52
  for chunk in chunks:
53
  chunk.export("temp_chunk.wav", format="wav")
54
+ waveform_chunk, sample_rate_chunk = torchaudio.load("temp_chunk.wav")
55
  os.remove("temp_chunk.wav")
56
 
57
+ # Convert torch.Tensor to numpy.ndarray
58
+ waveform_chunk_np = waveform_chunk.numpy().squeeze()
59
+
60
+ chunk_result = transcription_pipeline(waveform_chunk_np, chunk_length_s=30)
61
  chunk_transcript = chunk_result['text']
62
 
63
  if chunk_transcript: