Update app.py
Browse files
app.py
CHANGED
@@ -9,6 +9,7 @@ import io
|
|
9 |
import os
|
10 |
from bs4 import BeautifulSoup
|
11 |
import re
|
|
|
12 |
|
13 |
# Load the transcription model
|
14 |
transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
@@ -36,8 +37,11 @@ def transcribe_audio(audio_bytes):
|
|
36 |
waveform, sample_rate = torchaudio.load("temp_audio.wav")
|
37 |
os.remove("temp_audio.wav")
|
38 |
|
|
|
|
|
|
|
39 |
# Transcribe the audio
|
40 |
-
result = transcription_pipeline(
|
41 |
transcript = result['text']
|
42 |
|
43 |
# Split transcript into paragraphs based on silence
|
@@ -47,10 +51,13 @@ def transcribe_audio(audio_bytes):
|
|
47 |
|
48 |
for chunk in chunks:
|
49 |
chunk.export("temp_chunk.wav", format="wav")
|
50 |
-
|
51 |
os.remove("temp_chunk.wav")
|
52 |
|
53 |
-
|
|
|
|
|
|
|
54 |
chunk_transcript = chunk_result['text']
|
55 |
|
56 |
if chunk_transcript:
|
|
|
9 |
import os
|
10 |
from bs4 import BeautifulSoup
|
11 |
import re
|
12 |
+
import numpy as np
|
13 |
|
14 |
# Load the transcription model
|
15 |
transcription_pipeline = pipeline("automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
|
|
|
37 |
waveform, sample_rate = torchaudio.load("temp_audio.wav")
|
38 |
os.remove("temp_audio.wav")
|
39 |
|
40 |
+
# Convert torch.Tensor to numpy.ndarray
|
41 |
+
waveform_np = waveform.numpy().squeeze()
|
42 |
+
|
43 |
# Transcribe the audio
|
44 |
+
result = transcription_pipeline(waveform_np, chunk_length_s=30)
|
45 |
transcript = result['text']
|
46 |
|
47 |
# Split transcript into paragraphs based on silence
|
|
|
51 |
|
52 |
for chunk in chunks:
|
53 |
chunk.export("temp_chunk.wav", format="wav")
|
54 |
+
waveform_chunk, sample_rate_chunk = torchaudio.load("temp_chunk.wav")
|
55 |
os.remove("temp_chunk.wav")
|
56 |
|
57 |
+
# Convert torch.Tensor to numpy.ndarray
|
58 |
+
waveform_chunk_np = waveform_chunk.numpy().squeeze()
|
59 |
+
|
60 |
+
chunk_result = transcription_pipeline(waveform_chunk_np, chunk_length_s=30)
|
61 |
chunk_transcript = chunk_result['text']
|
62 |
|
63 |
if chunk_transcript:
|