TaiYouWeb commited on
Commit
cfa3d27
·
verified ·
1 Parent(s): 81f707b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -25
app.py CHANGED
@@ -2,22 +2,10 @@ from funasr import AutoModel
2
  from funasr.utils.postprocess_utils import rich_transcription_postprocess
3
  from modelscope import snapshot_download
4
 
5
- import datetime
6
- import math
7
- import io
8
- import os
9
- import tempfile
10
  import json
11
- from typing import Optional
12
-
13
  import torch
14
  import gradio as gr
15
 
16
- from config import model_config
17
-
18
-
19
-
20
-
21
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
22
  model_dir = snapshot_download(model_config['model_dir'])
23
 
@@ -26,7 +14,7 @@ model = AutoModel(
26
  trust_remote_code=False,
27
  remote_code="./model.py",
28
  vad_model="fsmn-vad",
29
- punc_model="ct-punc",
30
  spk_model="cam++",
31
  vad_kwargs={"max_single_segment_time": 15000},
32
  ncpu=torch.get_num_threads(),
@@ -35,13 +23,12 @@ model = AutoModel(
35
  device=device,
36
  )
37
 
38
- def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_model="cam++", vad_kwargs='{"max_single_segment_time": 15000}',
39
- batch_size=1, language="auto", use_itn=True, batch_size_s=60,
40
- merge_vad=True, merge_length_s=15, batch_size_threshold_s=50,
41
  hotword=" ", ban_emo_unk=True):
42
  try:
43
  vad_kwargs = json.loads(vad_kwargs)
44
-
45
  temp_file_path = file_path
46
 
47
  res = model.generate(
@@ -57,9 +44,18 @@ def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_
57
  ban_emo_unk=ban_emo_unk
58
  )
59
 
60
- text = rich_transcription_postprocess(res[0]["text"])
61
-
62
- return text
 
 
 
 
 
 
 
 
 
63
 
64
  except Exception as e:
65
  return str(e)
@@ -84,8 +80,8 @@ inputs = [
84
  outputs = gr.Textbox(label="Transcription")
85
 
86
  gr.Interface(
87
- fn=transcribe_audio,
88
- inputs=inputs,
89
- outputs=outputs,
90
- title="ASR Transcription with FunASR"
91
- ).launch()
 
2
  from funasr.utils.postprocess_utils import rich_transcription_postprocess
3
  from modelscope import snapshot_download
4
 
 
 
 
 
 
5
  import json
 
 
6
  import torch
7
  import gradio as gr
8
 
 
 
 
 
 
9
  device = "cuda:0" if torch.cuda.is_available() else "cpu"
10
  model_dir = snapshot_download(model_config['model_dir'])
11
 
 
14
  trust_remote_code=False,
15
  remote_code="./model.py",
16
  vad_model="fsmn-vad",
17
+ punc_model="ct-punc",
18
  spk_model="cam++",
19
  vad_kwargs={"max_single_segment_time": 15000},
20
  ncpu=torch.get_num_threads(),
 
23
  device=device,
24
  )
25
 
26
+ def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_model="cam++", vad_kwargs='{"max_single_segment_time": 15000}',
27
+ batch_size=1, language="auto", use_itn=True, batch_size_s=60,
28
+ merge_vad=True, merge_length_s=15, batch_size_threshold_s=50,
29
  hotword=" ", ban_emo_unk=True):
30
  try:
31
  vad_kwargs = json.loads(vad_kwargs)
 
32
  temp_file_path = file_path
33
 
34
  res = model.generate(
 
44
  ban_emo_unk=ban_emo_unk
45
  )
46
 
47
+ segments = res[0]["segments"]
48
+ transcription = ""
49
+
50
+ for segment in segments:
51
+ start_time = segment["start"]
52
+ end_time = segment["end"]
53
+ speaker = segment.get("speaker", "unknown")
54
+ text = segment["text"]
55
+
56
+ transcription += f"[{start_time:.2f}s - {end_time:.2f}s] Speaker {speaker}: {text}\n"
57
+
58
+ return transcription
59
 
60
  except Exception as e:
61
  return str(e)
 
80
  outputs = gr.Textbox(label="Transcription")
81
 
82
  gr.Interface(
83
+ fn=transcribe_audio,
84
+ inputs=inputs,
85
+ outputs=outputs,
86
+ title="ASR Transcription with Speaker Diarization and Timestamps"
87
+ ).launch()