Spaces:

TaiYouWeb
/

funasr-svsmall

Runtime error

App Files Files Community

TaiYouWeb commited on Oct 6, 2024

Commit

cfa3d27

verified ·

1 Parent(s): 81f707b

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -25

app.py CHANGED Viewed

@@ -2,22 +2,10 @@ from funasr import AutoModel
 from funasr.utils.postprocess_utils import rich_transcription_postprocess
 from modelscope import snapshot_download
-import datetime
-import math
-import io
-import os
-import tempfile
 import json
-from typing import Optional
 import torch
 import gradio as gr
-from config import model_config
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 model_dir = snapshot_download(model_config['model_dir'])
@@ -26,7 +14,7 @@ model = AutoModel(
     trust_remote_code=False,
     remote_code="./model.py",
     vad_model="fsmn-vad",
-    punc_model="ct-punc",
     spk_model="cam++",
     vad_kwargs={"max_single_segment_time": 15000},
     ncpu=torch.get_num_threads(),
@@ -35,13 +23,12 @@ model = AutoModel(
     device=device,
 )
-def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_model="cam++", vad_kwargs='{"max_single_segment_time": 15000}',
-                     batch_size=1, language="auto", use_itn=True, batch_size_s=60,
-                     merge_vad=True, merge_length_s=15, batch_size_threshold_s=50,
                      hotword=" ", ban_emo_unk=True):
     try:
         vad_kwargs = json.loads(vad_kwargs)
         temp_file_path = file_path
         res = model.generate(
@@ -57,9 +44,18 @@ def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_
             ban_emo_unk=ban_emo_unk
         )
-        text = rich_transcription_postprocess(res[0]["text"])
-        return text
     except Exception as e:
         return str(e)
@@ -84,8 +80,8 @@ inputs = [
 outputs = gr.Textbox(label="Transcription")
 gr.Interface(
-    fn=transcribe_audio,
-    inputs=inputs,
-    outputs=outputs,
-    title="ASR Transcription with FunASR"
-).launch()

 from funasr.utils.postprocess_utils import rich_transcription_postprocess
 from modelscope import snapshot_download
 import json
 import torch
 import gradio as gr
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 model_dir = snapshot_download(model_config['model_dir'])
     trust_remote_code=False,
     remote_code="./model.py",
     vad_model="fsmn-vad",
+    punc_model="ct-punc",
     spk_model="cam++",
     vad_kwargs={"max_single_segment_time": 15000},
     ncpu=torch.get_num_threads(),
     device=device,
 )
+def transcribe_audio(file_path, vad_model="fsmn-vad", punc_model="ct-punc", spk_model="cam++", vad_kwargs='{"max_single_segment_time": 15000}',
+                     batch_size=1, language="auto", use_itn=True, batch_size_s=60,
+                     merge_vad=True, merge_length_s=15, batch_size_threshold_s=50,
                      hotword=" ", ban_emo_unk=True):
     try:
         vad_kwargs = json.loads(vad_kwargs)
         temp_file_path = file_path
         res = model.generate(
             ban_emo_unk=ban_emo_unk
         )
+        segments = res[0]["segments"]
+        transcription = ""
+        for segment in segments:
+            start_time = segment["start"]
+            end_time = segment["end"]
+            speaker = segment.get("speaker", "unknown")
+            text = segment["text"]
+            transcription += f"[{start_time:.2f}s - {end_time:.2f}s] Speaker {speaker}: {text}\n"
+        return transcription
     except Exception as e:
         return str(e)
 outputs = gr.Textbox(label="Transcription")
 gr.Interface(
+    fn=transcribe_audio,
+    inputs=inputs,
+    outputs=outputs,
+    title="ASR Transcription with Speaker Diarization and Timestamps"
+).launch()