Luigi commited on
Commit
264de1a
·
1 Parent(s): 22699db

pretty print diarized transcript

Browse files
Files changed (1) hide show
  1. app.py +23 -6
app.py CHANGED
@@ -57,6 +57,23 @@ sense_models = {}
57
  dar_pipe = None
58
 
59
  converter = opencc.OpenCC('s2t')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  # —————— Helpers ——————
62
  def get_whisper_pipe(model_id: str, device: int):
@@ -124,7 +141,7 @@ def _transcribe_whisper_cpu(model_id, language, audio_path, enable_diar):
124
  os.unlink(tmp.name)
125
  text = converter.convert(out.get("text", "").strip())
126
  snippets.append(f"[{speaker}] {text}")
127
- return "", "\n".join(snippets)
128
  # Raw-only branch
129
  result = pipe(audio_path) if language == "auto" else pipe(audio_path, generate_kwargs={"language": language})
130
  transcript = converter.convert(result.get("text", "").strip())
@@ -149,7 +166,7 @@ def _transcribe_whisper_gpu(model_id, language, audio_path, enable_diar):
149
  os.unlink(tmp.name)
150
  text = converter.convert(out.get("text", "").strip())
151
  snippets.append(f"[{speaker}] {text}")
152
- return "", "\n".join(snippets)
153
  # Raw-only branch
154
  result = pipe(audio_path) if language == "auto" else pipe(audio_path, generate_kwargs={"language": language})
155
  transcript = converter.convert(result.get("text", "").strip())
@@ -195,7 +212,7 @@ def _transcribe_sense_cpu(model_id: str,
195
  txt = re.sub(r"[^\w\s]", "", txt)
196
  txt = converter.convert(txt)
197
  snippets.append(f"[{speaker}] {txt}")
198
- return "", "\n".join(snippets)
199
  # Raw-only branch
200
  segs = model.generate(
201
  input=audio_path,
@@ -246,7 +263,7 @@ def _transcribe_sense_gpu(model_id: str,
246
  txt = re.sub(r"[^\w\s]", "", txt)
247
  txt = converter.convert(txt)
248
  snippets.append(f"[{speaker}] {txt}")
249
- return "", "\n".join(snippets)
250
  # Raw-only branch
251
  segs = model.generate(
252
  input=audio_path,
@@ -300,7 +317,7 @@ with Demo:
300
  device_radio = gr.Radio(choices=["GPU", "CPU"], value="GPU", label="Device")
301
  diar_check = gr.Checkbox(label="Enable Diarization", value=True)
302
  out_w = gr.Textbox(label="Transcript", visible=False)
303
- out_w_d = gr.Textbox(label="Diarized Transcript", visible=True)
304
  # Toggle visibility based on checkbox
305
  diar_check.change(lambda e: gr.update(visible=not e), inputs=diar_check, outputs=out_w)
306
  diar_check.change(lambda e: gr.update(visible=e), inputs=diar_check, outputs=out_w_d)
@@ -317,7 +334,7 @@ with Demo:
317
  punct_chk = gr.Checkbox(label="Enable Punctuation", value=True)
318
  diar_s_chk = gr.Checkbox(label="Enable Diarization", value=True)
319
  out_s = gr.Textbox(label="Transcript", visible=False)
320
- out_s_d = gr.Textbox(label="Diarized Transcript", visible=True)
321
  # Toggle visibility
322
  diar_s_chk.change(lambda e: gr.update(visible=not e), inputs=diar_s_chk, outputs=out_s)
323
  diar_s_chk.change(lambda e: gr.update(visible=e), inputs=diar_s_chk, outputs=out_s_d)
 
57
  dar_pipe = None
58
 
59
  converter = opencc.OpenCC('s2t')
60
+ # —————— Helpers ——————
61
+ def format_diarization_html(snippets):
62
+ palette = ["#e74c3c", "#3498db", "#27ae60", "#e67e22", "#9b59b6", "#16a085", "#f1c40f"]
63
+ speaker_colors = {}
64
+ html = ["<div style='font-family:monospace; line-height:1.5em;'>"]
65
+ for s in snippets:
66
+ if s.startswith("[") and "]" in s:
67
+ spk, txt = s[1:].split("]", 1)
68
+ spk, txt = spk.strip(), txt.strip()
69
+ else:
70
+ spk, txt = "", s
71
+ if spk not in speaker_colors:
72
+ speaker_colors[spk] = palette[len(speaker_colors) % len(palette)]
73
+ color = speaker_colors[spk]
74
+ html.append(f"<p style='margin:4px 0; color:{color};'><strong>{spk}:</strong> {txt}</p>")
75
+ html.append("</div>")
76
+ return "".join(html)
77
 
78
  # —————— Helpers ——————
79
  def get_whisper_pipe(model_id: str, device: int):
 
141
  os.unlink(tmp.name)
142
  text = converter.convert(out.get("text", "").strip())
143
  snippets.append(f"[{speaker}] {text}")
144
+ return "", format_diarization_html(snippets)
145
  # Raw-only branch
146
  result = pipe(audio_path) if language == "auto" else pipe(audio_path, generate_kwargs={"language": language})
147
  transcript = converter.convert(result.get("text", "").strip())
 
166
  os.unlink(tmp.name)
167
  text = converter.convert(out.get("text", "").strip())
168
  snippets.append(f"[{speaker}] {text}")
169
+ return "", format_diarization_html(snippets)
170
  # Raw-only branch
171
  result = pipe(audio_path) if language == "auto" else pipe(audio_path, generate_kwargs={"language": language})
172
  transcript = converter.convert(result.get("text", "").strip())
 
212
  txt = re.sub(r"[^\w\s]", "", txt)
213
  txt = converter.convert(txt)
214
  snippets.append(f"[{speaker}] {txt}")
215
+ return "", format_diarization_html(snippets)
216
  # Raw-only branch
217
  segs = model.generate(
218
  input=audio_path,
 
263
  txt = re.sub(r"[^\w\s]", "", txt)
264
  txt = converter.convert(txt)
265
  snippets.append(f"[{speaker}] {txt}")
266
+ return "", format_diarization_html(snippets)
267
  # Raw-only branch
268
  segs = model.generate(
269
  input=audio_path,
 
317
  device_radio = gr.Radio(choices=["GPU", "CPU"], value="GPU", label="Device")
318
  diar_check = gr.Checkbox(label="Enable Diarization", value=True)
319
  out_w = gr.Textbox(label="Transcript", visible=False)
320
+ out_w_d = gr.HTML(label="Diarized Transcript", visible=True)
321
  # Toggle visibility based on checkbox
322
  diar_check.change(lambda e: gr.update(visible=not e), inputs=diar_check, outputs=out_w)
323
  diar_check.change(lambda e: gr.update(visible=e), inputs=diar_check, outputs=out_w_d)
 
334
  punct_chk = gr.Checkbox(label="Enable Punctuation", value=True)
335
  diar_s_chk = gr.Checkbox(label="Enable Diarization", value=True)
336
  out_s = gr.Textbox(label="Transcript", visible=False)
337
+ out_s_d = gr.HTML(label="Diarized Transcript", visible=True)
338
  # Toggle visibility
339
  diar_s_chk.change(lambda e: gr.update(visible=not e), inputs=diar_s_chk, outputs=out_s)
340
  diar_s_chk.change(lambda e: gr.update(visible=e), inputs=diar_s_chk, outputs=out_s_d)