Update app.py
Browse files
app.py
CHANGED
|
@@ -101,13 +101,14 @@ def transcribe_audio(audio_file):
|
|
| 101 |
raise
|
| 102 |
|
| 103 |
def separate_speakers(transcription):
|
| 104 |
-
|
| 105 |
prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
|
| 106 |
|
| 107 |
1. Label speakers as "Speaker 1", "Speaker 2", etc.
|
| 108 |
2. Start each speaker's text on a new line beginning with their label.
|
| 109 |
3. Separate different speakers' contributions with a blank line.
|
| 110 |
4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
|
|
|
|
| 111 |
|
| 112 |
Now, please process the following transcribed text:
|
| 113 |
|
|
@@ -119,10 +120,10 @@ Now, please process the following transcribed text:
|
|
| 119 |
outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
|
| 120 |
result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 121 |
|
| 122 |
-
# Extract the processed text (remove the instruction part)
|
| 123 |
processed_text = result.split("Now, please process the following transcribed text:")[-1].strip()
|
| 124 |
|
| 125 |
-
|
| 126 |
return processed_text
|
| 127 |
|
| 128 |
def transcribe_video(url):
|
|
@@ -142,12 +143,12 @@ def transcribe_video(url):
|
|
| 142 |
|
| 143 |
logger.info("Separating speakers...")
|
| 144 |
try:
|
| 145 |
-
|
| 146 |
-
logger.info(f"Speaker separation complete. Result length: {len(
|
| 147 |
-
if len(
|
| 148 |
logger.warning("Speaker separation result too short, using original transcript")
|
| 149 |
return transcript
|
| 150 |
-
return
|
| 151 |
except Exception as e:
|
| 152 |
logger.error(f"Error during speaker separation: {str(e)}")
|
| 153 |
logger.info("Returning original transcript without speaker separation")
|
|
@@ -202,22 +203,13 @@ def update_transcription(n_clicks, url):
|
|
| 202 |
else:
|
| 203 |
return transcript, {'display': 'none'}
|
| 204 |
|
| 205 |
-
if transcript and not transcript.startswith("An error occurred"):
|
| 206 |
-
return dbc.Card([
|
| 207 |
-
dbc.CardBody([
|
| 208 |
-
html.H5("Transcription Result with Speaker Separation"),
|
| 209 |
-
html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"})
|
| 210 |
-
])
|
| 211 |
-
]), {'display': 'block'}
|
| 212 |
-
else:
|
| 213 |
-
return transcript, {'display': 'none'}
|
| 214 |
-
|
| 215 |
@app.callback(
|
| 216 |
Output("download-transcript", "data"),
|
| 217 |
Input("download-button", "n_clicks"),
|
| 218 |
State("transcription-output", "children"),
|
| 219 |
prevent_initial_call=True
|
| 220 |
)
|
|
|
|
| 221 |
def download_transcript(n_clicks, transcription_output):
|
| 222 |
if not transcription_output:
|
| 223 |
raise PreventUpdate
|
|
|
|
| 101 |
raise
|
| 102 |
|
| 103 |
def separate_speakers(transcription):
|
| 104 |
+
logger.info("Starting speaker separation...")
|
| 105 |
prompt = f"""Analyze the following transcribed text and separate it into different speakers. Identify potential speaker changes based on context, content shifts, or dialogue patterns. Format the output as follows:
|
| 106 |
|
| 107 |
1. Label speakers as "Speaker 1", "Speaker 2", etc.
|
| 108 |
2. Start each speaker's text on a new line beginning with their label.
|
| 109 |
3. Separate different speakers' contributions with a blank line.
|
| 110 |
4. If the same speaker continues, do not insert a blank line or repeat the speaker label.
|
| 111 |
+
5. Do not include any additional explanations or metadata.
|
| 112 |
|
| 113 |
Now, please process the following transcribed text:
|
| 114 |
|
|
|
|
| 120 |
outputs = qwen_model.generate(**inputs, max_new_tokens=4000)
|
| 121 |
result = qwen_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 122 |
|
| 123 |
+
# Extract only the processed text (remove the instruction part)
|
| 124 |
processed_text = result.split("Now, please process the following transcribed text:")[-1].strip()
|
| 125 |
|
| 126 |
+
logger.info("Speaker separation complete.")
|
| 127 |
return processed_text
|
| 128 |
|
| 129 |
def transcribe_video(url):
|
|
|
|
| 143 |
|
| 144 |
logger.info("Separating speakers...")
|
| 145 |
try:
|
| 146 |
+
diarized_transcript = separate_speakers(transcript)
|
| 147 |
+
logger.info(f"Speaker separation complete. Result length: {len(diarized_transcript)} characters")
|
| 148 |
+
if len(diarized_transcript) < 10:
|
| 149 |
logger.warning("Speaker separation result too short, using original transcript")
|
| 150 |
return transcript
|
| 151 |
+
return diarized_transcript
|
| 152 |
except Exception as e:
|
| 153 |
logger.error(f"Error during speaker separation: {str(e)}")
|
| 154 |
logger.info("Returning original transcript without speaker separation")
|
|
|
|
| 203 |
else:
|
| 204 |
return transcript, {'display': 'none'}
|
| 205 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
@app.callback(
|
| 207 |
Output("download-transcript", "data"),
|
| 208 |
Input("download-button", "n_clicks"),
|
| 209 |
State("transcription-output", "children"),
|
| 210 |
prevent_initial_call=True
|
| 211 |
)
|
| 212 |
+
|
| 213 |
def download_transcript(n_clicks, transcription_output):
|
| 214 |
if not transcription_output:
|
| 215 |
raise PreventUpdate
|