Spaces:
Runtime error
Runtime error
time tracking
Browse files
app.py
CHANGED
@@ -5,6 +5,7 @@ import torch
|
|
5 |
from pyctcdecode import BeamSearchDecoderCTC
|
6 |
import torch
|
7 |
import librosa
|
|
|
8 |
|
9 |
|
10 |
lmID = "aware-ai/german-lowercase-wiki-4gram"
|
@@ -36,12 +37,21 @@ def translate(src, tgt, text):
|
|
36 |
|
37 |
def transcribe(audio):
|
38 |
sampling_rate = 16000
|
|
|
39 |
audio, sr = librosa.load(audio, sr=sampling_rate)
|
|
|
|
|
40 |
speech_timestamps = get_speech_timestamps(audio, vadmodel, sampling_rate=sampling_rate)
|
|
|
|
|
41 |
chunks = [audio[i["start"]:i["end"]] for i in speech_timestamps]
|
|
|
|
|
42 |
transcribed = " ".join([text["text"] for text in p(chunks, chunk_length_s=20, stride_length_s=(0, 0))])
|
43 |
-
|
|
|
44 |
punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
|
|
|
45 |
|
46 |
return transcribed, punctuated
|
47 |
|
|
|
5 |
from pyctcdecode import BeamSearchDecoderCTC
|
6 |
import torch
|
7 |
import librosa
|
8 |
+
import time
|
9 |
|
10 |
|
11 |
lmID = "aware-ai/german-lowercase-wiki-4gram"
|
|
|
37 |
|
38 |
def transcribe(audio):
|
39 |
sampling_rate = 16000
|
40 |
+
start_time = time.time()
|
41 |
audio, sr = librosa.load(audio, sr=sampling_rate)
|
42 |
+
print("--- %s seconds audio loading ---" % (time.time() - start_time))
|
43 |
+
start_time = time.time()
|
44 |
speech_timestamps = get_speech_timestamps(audio, vadmodel, sampling_rate=sampling_rate)
|
45 |
+
print("--- %s seconds audio timestamps---" % (time.time() - start_time))
|
46 |
+
start_time = time.time()
|
47 |
chunks = [audio[i["start"]:i["end"]] for i in speech_timestamps]
|
48 |
+
print("--- %s seconds audio chunking---" % (time.time() - start_time))
|
49 |
+
start_time = time.time()
|
50 |
transcribed = " ".join([text["text"] for text in p(chunks, chunk_length_s=20, stride_length_s=(0, 0))])
|
51 |
+
print("--- %s seconds audio transcription ---" % (time.time() - start_time))
|
52 |
+
start_time = time.time()
|
53 |
punctuated = ttp(transcribed, max_length = 512)[0]["generated_text"]
|
54 |
+
print("--- %s seconds audio formatting ---" % (time.time() - start_time))
|
55 |
|
56 |
return transcribed, punctuated
|
57 |
|