Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 24

Commit

dd906ec

verified ·

1 Parent(s): aed95f4

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -17

app.py CHANGED Viewed

@@ -101,13 +101,14 @@ def transcribe_audio(audio_file):
         raise
 def separate_speakers(transcription):
-    print("Starting speaker separation...")
     prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
 1. Label speakers as "Speaker 1", "Speaker 2", etc.
 2. Start each speaker's text on a new line beginning with their label.
 3. Separate different speakers' contributions with a blank line.
 4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
 Now, please process the following transcribed text:
@@ -119,10 +120,10 @@ Now, please process the following transcribed text:
         outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
     result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
-    # Extract the processed text (remove the instruction part)
     processed_text = result.split("Now, please process the following transcribed text:")[-1].strip()
-    print("Speaker separation complete.")
     return processed_text
 def transcribe_video(url):
@@ -142,12 +143,12 @@ def transcribe_video(url):
         logger.info("Separating speakers...")
         try:
-            separated_transcript = separate_speakers(transcript)
-            logger.info(f"Speaker separation complete. Result length: {len(separated_transcript)} characters")
-            if len(separated_transcript) < 10:
                 logger.warning("Speaker separation result too short, using original transcript")
                 return transcript
-            return separated_transcript
         except Exception as e:
             logger.error(f"Error during speaker separation: {str(e)}")
             logger.info("Returning original transcript without speaker separation")
@@ -202,22 +203,13 @@ def update_transcription(n_clicks, url):
     else:
         return transcript, {'display': 'none'}
-    if transcript and not transcript.startswith("An error occurred"):
-        return dbc.Card([
-            dbc.CardBody([
-                html.H5("Transcription Result with Speaker Separation"),
-                html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"})
-            ])
-        ]), {'display': 'block'}
-    else:
-        return transcript, {'display': 'none'}
 @app.callback(
     Output("download-transcript", "data"),
     Input("download-button", "n_clicks"),
     State("transcription-output", "children"),
     prevent_initial_call=True
 )
 def download_transcript(n_clicks, transcription_output):
     if not transcription_output:
         raise PreventUpdate

         raise
 def separate_speakers(transcription):
+    logger.info("Starting speaker separation...")
     prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
 1. Label speakers as "Speaker 1", "Speaker 2", etc.
 2. Start each speaker's text on a new line beginning with their label.
 3. Separate different speakers' contributions with a blank line.
 4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
+5. Do not include any additional explanations or metadata.
 Now, please process the following transcribed text:
         outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
     result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the processed text (remove the instruction part)
     processed_text = result.split("Now, please process the following transcribed text:")[-1].strip()
+    logger.info("Speaker separation complete.")
     return processed_text
 def transcribe_video(url):
         logger.info("Separating speakers...")
         try:
+            diarized_transcript = separate_speakers(transcript)
+            logger.info(f"Speaker separation complete. Result length: {len(diarized_transcript)} characters")
+            if len(diarized_transcript) < 10:
                 logger.warning("Speaker separation result too short, using original transcript")
                 return transcript
+            return diarized_transcript
         except Exception as e:
             logger.error(f"Error during speaker separation: {str(e)}")
             logger.info("Returning original transcript without speaker separation")
     else:
         return transcript, {'display': 'none'}
 @app.callback(
     Output("download-transcript", "data"),
     Input("download-button", "n_clicks"),
     State("transcription-output", "children"),
     prevent_initial_call=True
 )
 def download_transcript(n_clicks, transcription_output):
     if not transcription_output:
         raise PreventUpdate