Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -2,22 +2,10 @@ from funasr import AutoModel
|
|
2 |
from funasr.utils.postprocess_utils import rich_transcription_postprocess
|
3 |
from modelscope import snapshot_download
|
4 |
|
5 |
-
import datetime
|
6 |
-
import math
|
7 |
-
import io
|
8 |
-
import os
|
9 |
-
import tempfile
|
10 |
import json
|
11 |
-
from typing import Optional
|
12 |
-
|
13 |
import torch
|
14 |
import gradio as gr
|
15 |
|
16 |
-
from config import model_config
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
22 |
model_dir = snapshot_download(model_config['model_dir'])
|
23 |
|
@@ -26,7 +14,7 @@ model = AutoModel(
|
|
26 |
trust_remote_code=False,
|
27 |
remote_code="./model.py",
|
28 |
vad_model="fsmn-vad",
|
29 |
-
punc_model="ct-punc",
|
30 |
spk_model="cam++",
|
31 |
vad_kwargs={"max_single_segment_time": 15000},
|
32 |
ncpu=torch.get_num_threads(),
|
@@ -35,13 +23,12 @@ model = AutoModel(
|
|
35 |
device=device,
|
36 |
)
|
37 |
|
38 |
-
def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_model="cam++", vad_kwargs='{"max_single_segment_time": 15000}',
|
39 |
-
batch_size=1, language="auto", use_itn=True, batch_size_s=60,
|
40 |
-
merge_vad=True, merge_length_s=15, batch_size_threshold_s=50,
|
41 |
hotword=" ", ban_emo_unk=True):
|
42 |
try:
|
43 |
vad_kwargs = json.loads(vad_kwargs)
|
44 |
-
|
45 |
temp_file_path = file_path
|
46 |
|
47 |
res = model.generate(
|
@@ -57,9 +44,18 @@ def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_
|
|
57 |
ban_emo_unk=ban_emo_unk
|
58 |
)
|
59 |
|
60 |
-
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
63 |
|
64 |
except Exception as e:
|
65 |
return str(e)
|
@@ -84,8 +80,8 @@ inputs = [
|
|
84 |
outputs = gr.Textbox(label="Transcription")
|
85 |
|
86 |
gr.Interface(
|
87 |
-
fn=transcribe_audio,
|
88 |
-
inputs=inputs,
|
89 |
-
outputs=outputs,
|
90 |
-
title="ASR Transcription with
|
91 |
-
).launch()
|
|
|
2 |
from funasr.utils.postprocess_utils import rich_transcription_postprocess
|
3 |
from modelscope import snapshot_download
|
4 |
|
|
|
|
|
|
|
|
|
|
|
5 |
import json
|
|
|
|
|
6 |
import torch
|
7 |
import gradio as gr
|
8 |
|
|
|
|
|
|
|
|
|
|
|
9 |
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
10 |
model_dir = snapshot_download(model_config['model_dir'])
|
11 |
|
|
|
14 |
trust_remote_code=False,
|
15 |
remote_code="./model.py",
|
16 |
vad_model="fsmn-vad",
|
17 |
+
punc_model="ct-punc",
|
18 |
spk_model="cam++",
|
19 |
vad_kwargs={"max_single_segment_time": 15000},
|
20 |
ncpu=torch.get_num_threads(),
|
|
|
23 |
device=device,
|
24 |
)
|
25 |
|
26 |
+
def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_model="cam++", vad_kwargs='{"max_single_segment_time": 15000}',
|
27 |
+
batch_size=1, language="auto", use_itn=True, batch_size_s=60,
|
28 |
+
merge_vad=True, merge_length_s=15, batch_size_threshold_s=50,
|
29 |
hotword=" ", ban_emo_unk=True):
|
30 |
try:
|
31 |
vad_kwargs = json.loads(vad_kwargs)
|
|
|
32 |
temp_file_path = file_path
|
33 |
|
34 |
res = model.generate(
|
|
|
44 |
ban_emo_unk=ban_emo_unk
|
45 |
)
|
46 |
|
47 |
+
segments = res[0]["segments"]
|
48 |
+
transcription = ""
|
49 |
+
|
50 |
+
for segment in segments:
|
51 |
+
start_time = segment["start"]
|
52 |
+
end_time = segment["end"]
|
53 |
+
speaker = segment.get("speaker", "unknown")
|
54 |
+
text = segment["text"]
|
55 |
+
|
56 |
+
transcription += f"[{start_time:.2f}s - {end_time:.2f}s] Speaker {speaker}: {text}\n"
|
57 |
+
|
58 |
+
return transcription
|
59 |
|
60 |
except Exception as e:
|
61 |
return str(e)
|
|
|
80 |
outputs = gr.Textbox(label="Transcription")
|
81 |
|
82 |
gr.Interface(
|
83 |
+
fn=transcribe_audio,
|
84 |
+
inputs=inputs,
|
85 |
+
outputs=outputs,
|
86 |
+
title="ASR Transcription with Speaker Diarization and Timestamps"
|
87 |
+
).launch()
|