Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,8 +32,8 @@ from gpuinfo import GPUInfo
|
|
| 32 |
#import csv
|
| 33 |
import numpy as np
|
| 34 |
import torch
|
| 35 |
-
|
| 36 |
-
|
| 37 |
|
| 38 |
from transformers import pipeline, AutoModel
|
| 39 |
|
|
@@ -68,19 +68,13 @@ pipe = pipeline("automatic-speech-recognition", model="NbAiLab/nb-whisper-large"
|
|
| 68 |
|
| 69 |
@spaces.GPU()
|
| 70 |
def transcribe_audio(audio_file, batch_size=16):
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
audio_file = audio_file[0]
|
| 75 |
-
|
| 76 |
-
# --place audio file in numpy array
|
| 77 |
-
audio = AudioSegment.from_wav(audio_file)
|
| 78 |
-
samples = np.array(audio.get_array_of_samples())
|
| 79 |
-
sample_rate = audio.frame_rate
|
| 80 |
|
| 81 |
start_time = time.time()
|
| 82 |
|
| 83 |
-
# --
|
| 84 |
outputs = pipe(samples, sampling_rate=sample_rate, batch_size=batch_size, return_timestamps=False)
|
| 85 |
text = outputs["text"]
|
| 86 |
|
|
@@ -89,13 +83,13 @@ def transcribe_audio(audio_file, batch_size=16):
|
|
| 89 |
output_time = end_time - start_time
|
| 90 |
word_count = len(text.split())
|
| 91 |
|
| 92 |
-
# --GPU
|
| 93 |
memory = psutil.virtual_memory()
|
| 94 |
gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
|
| 95 |
gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
|
| 96 |
gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
|
| 97 |
|
| 98 |
-
# --CPU
|
| 99 |
cpu_usage = psutil.cpu_percent(interval=1)
|
| 100 |
|
| 101 |
# --system info string
|
|
@@ -110,6 +104,7 @@ def transcribe_audio(audio_file, batch_size=16):
|
|
| 110 |
return text.strip(), system_info
|
| 111 |
|
| 112 |
|
|
|
|
| 113 |
# ------------summary section------------
|
| 114 |
|
| 115 |
|
|
|
|
| 32 |
#import csv
|
| 33 |
import numpy as np
|
| 34 |
import torch
|
| 35 |
+
import torchaudio
|
| 36 |
+
import torchaudio.transforms as transforms
|
| 37 |
|
| 38 |
from transformers import pipeline, AutoModel
|
| 39 |
|
|
|
|
| 68 |
|
| 69 |
@spaces.GPU()
|
| 70 |
def transcribe_audio(audio_file, batch_size=16):
|
| 71 |
+
# --audio file to tensor
|
| 72 |
+
waveform, sample_rate = torchaudio.load(audio_file)
|
| 73 |
+
samples = waveform.numpy()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
start_time = time.time()
|
| 76 |
|
| 77 |
+
# --pipe it
|
| 78 |
outputs = pipe(samples, sampling_rate=sample_rate, batch_size=batch_size, return_timestamps=False)
|
| 79 |
text = outputs["text"]
|
| 80 |
|
|
|
|
| 83 |
output_time = end_time - start_time
|
| 84 |
word_count = len(text.split())
|
| 85 |
|
| 86 |
+
# --GPU metrics
|
| 87 |
memory = psutil.virtual_memory()
|
| 88 |
gpu_utilization, gpu_memory = GPUInfo.gpu_usage()
|
| 89 |
gpu_utilization = gpu_utilization[0] if len(gpu_utilization) > 0 else 0
|
| 90 |
gpu_memory = gpu_memory[0] if len(gpu_memory) > 0 else 0
|
| 91 |
|
| 92 |
+
# --CPU metric
|
| 93 |
cpu_usage = psutil.cpu_percent(interval=1)
|
| 94 |
|
| 95 |
# --system info string
|
|
|
|
| 104 |
return text.strip(), system_info
|
| 105 |
|
| 106 |
|
| 107 |
+
|
| 108 |
# ------------summary section------------
|
| 109 |
|
| 110 |
|