Spaces:

dwarkesh
/

whisper-speaker-recognition

Build error

dwarkesh commited on Dec 9, 2022

Commit

dcc9ea1

1 Parent(s): 626e00c

duration check

Files changed (1) hide show

app.py CHANGED Viewed

@@ -25,15 +25,18 @@ embedding_model = PretrainedSpeakerEmbedding(
 def transcribe(audio, num_speakers):
   path = convert_to_wav(audio)
   result = model.transcribe(path)
   segments = result["segments"]
-  num_speakers = max(round(num_speakers), 1)
-  if len(segments) < num_speakers:
-    num_speakers = len(segments)
   if len(segments) == 1:
     segments[0]['speaker'] = 'SPEAKER 1'
   else:
-    duration = get_duration(path)
     embeddings = make_embeddings(path, segments, duration)
     add_speaker_labels(segments, embeddings, num_speakers)
   output = get_output(segments)
@@ -94,5 +97,5 @@ gr.Interface(
     ],
     outputs=[
         gr.outputs.Textbox(label='Transcript')
-    ],
-    debug=True).launch()

 def transcribe(audio, num_speakers):
   path = convert_to_wav(audio)
+  duration = get_duration(path)
+  if duration > 4 * 60 * 60:
+    return "Audio duration too long"
   result = model.transcribe(path)
   segments = result["segments"]
+  num_speakers = min(max(round(num_speakers), 1), len(segments))
   if len(segments) == 1:
     segments[0]['speaker'] = 'SPEAKER 1'
   else:
     embeddings = make_embeddings(path, segments, duration)
     add_speaker_labels(segments, embeddings, num_speakers)
   output = get_output(segments)
     ],
     outputs=[
         gr.outputs.Textbox(label='Transcript')
+    ]
+  ).launch()