Spaces:
Running
Running
Upload simple_casl_app.py
Browse files- simple_casl_app.py +36 -75
simple_casl_app.py
CHANGED
@@ -450,6 +450,40 @@ def perform_speaker_diarization(audio_path):
|
|
450 |
logger.error(f"Error in diarization: {e}")
|
451 |
return None, f"Diarization error: {str(e)}"
|
452 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
|
454 |
"""Transcribe audio with timestamps, sentiment, and metadata"""
|
455 |
if not audio_file:
|
@@ -1223,60 +1257,6 @@ with gr.Blocks(title="Enhanced Speech Sample Analysis", theme=gr.themes.Soft())
|
|
1223 |
)
|
1224 |
|
1225 |
quick_progress = gr.Markdown("")
|
1226 |
-
|
1227 |
-
# Tab 4: Advanced Transcription
|
1228 |
-
with gr.Tab("π€ Advanced Transcription"):
|
1229 |
-
with gr.Row():
|
1230 |
-
with gr.Column(scale=1):
|
1231 |
-
gr.Markdown("### Audio/Video Upload")
|
1232 |
-
gr.Markdown("**Supported formats:** MP4, AVI, MOV, MKV, WMV, FLV, WAV, MP3, M4A, FLAC, OGG")
|
1233 |
-
|
1234 |
-
transcription_file_input = gr.File(
|
1235 |
-
label="Upload Audio or Video File",
|
1236 |
-
file_types=["audio", "video"]
|
1237 |
-
)
|
1238 |
-
|
1239 |
-
enable_diarization = gr.Checkbox(
|
1240 |
-
label="Enable Speaker Diarization",
|
1241 |
-
value=True,
|
1242 |
-
info="Identify different speakers in the audio"
|
1243 |
-
)
|
1244 |
-
|
1245 |
-
transcribe_advanced_btn = gr.Button(
|
1246 |
-
"π€ Transcribe with Metadata",
|
1247 |
-
variant="primary",
|
1248 |
-
size="lg"
|
1249 |
-
)
|
1250 |
-
|
1251 |
-
transcription_status = gr.Markdown("")
|
1252 |
-
|
1253 |
-
with gr.Column(scale=2):
|
1254 |
-
gr.Markdown("### Rich Transcript with Metadata")
|
1255 |
-
|
1256 |
-
rich_transcript_display = gr.Textbox(
|
1257 |
-
label="Transcription with Speakers, Timestamps, Sentiment & Emotion",
|
1258 |
-
lines=15,
|
1259 |
-
max_lines=20
|
1260 |
-
)
|
1261 |
-
|
1262 |
-
with gr.Row():
|
1263 |
-
with gr.Column():
|
1264 |
-
gr.Markdown("### Speech Metrics")
|
1265 |
-
|
1266 |
-
transcription_metrics_display = gr.Textbox(
|
1267 |
-
label="SLP Metrics",
|
1268 |
-
lines=10,
|
1269 |
-
max_lines=15
|
1270 |
-
)
|
1271 |
-
|
1272 |
-
with gr.Column():
|
1273 |
-
gr.Markdown("### Word Frequency")
|
1274 |
-
|
1275 |
-
transcription_word_freq_display = gr.Dataframe(
|
1276 |
-
headers=["Word", "Frequency"],
|
1277 |
-
label="Most Frequent Words",
|
1278 |
-
interactive=False
|
1279 |
-
)
|
1280 |
|
1281 |
# Event handlers
|
1282 |
def on_analyze_file(file, age_val, gender_val, notes):
|
@@ -1305,19 +1285,6 @@ with gr.Blocks(title="Enhanced Speech Sample Analysis", theme=gr.themes.Soft())
|
|
1305 |
else:
|
1306 |
return f"Transcription failed: {status}", "", status
|
1307 |
|
1308 |
-
def on_transcribe_advanced(audio_file, enable_diarization):
|
1309 |
-
"""Handle advanced transcription"""
|
1310 |
-
if not audio_file:
|
1311 |
-
return "Please upload an audio/video file first.", "", "No file provided"
|
1312 |
-
|
1313 |
-
transcript, status = transcribe_audio_with_metadata(audio_file.name, enable_diarization)
|
1314 |
-
if transcript:
|
1315 |
-
metrics = calculate_slp_metrics(transcript)
|
1316 |
-
word_freq_data = metrics.get('word_frequency', {})
|
1317 |
-
return transcript, status, metrics, word_freq_data
|
1318 |
-
else:
|
1319 |
-
return f"Transcription failed: {status}", "", {}, {}
|
1320 |
-
|
1321 |
def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
|
1322 |
"""Handle targeted analysis"""
|
1323 |
result = targeted_analysis(transcript, question, age_val, gender_val, notes)
|
@@ -1518,12 +1485,6 @@ with gr.Blocks(title="Enhanced Speech Sample Analysis", theme=gr.themes.Soft())
|
|
1518 |
outputs=[output, transcript_input, transcription_status]
|
1519 |
)
|
1520 |
|
1521 |
-
transcribe_advanced_btn.click(
|
1522 |
-
on_transcribe_advanced,
|
1523 |
-
inputs=[transcription_file_input, enable_diarization],
|
1524 |
-
outputs=[rich_transcript_display, transcription_status, transcription_metrics_display, transcription_word_freq_display]
|
1525 |
-
)
|
1526 |
-
|
1527 |
targeted_analyze_btn.click(
|
1528 |
on_targeted_analyze,
|
1529 |
inputs=[transcript_input, custom_question, age, gender, slp_notes],
|
@@ -1544,8 +1505,8 @@ with gr.Blocks(title="Enhanced Speech Sample Analysis", theme=gr.themes.Soft())
|
|
1544 |
|
1545 |
if __name__ == "__main__":
|
1546 |
print("π Starting Enhanced Speech Sample Analysis Tool...")
|
1547 |
-
print("π Features: Basic Analysis, Targeted Questions, Quick Multi-Analysis
|
1548 |
-
print("π€ Transcription: Audio/Video support with
|
1549 |
print("π Analysis: Complex sentences, figurative language, pragmatic skills, cognitive-linguistic factors")
|
1550 |
|
1551 |
if not ANTHROPIC_API_KEY:
|
|
|
450 |
logger.error(f"Error in diarization: {e}")
|
451 |
return None, f"Diarization error: {str(e)}"
|
452 |
|
453 |
+
def transcribe_audio(audio_file):
|
454 |
+
"""Simple transcription function for basic audio/video files"""
|
455 |
+
if not audio_file:
|
456 |
+
return None, "No audio file provided"
|
457 |
+
|
458 |
+
if not SPEECHBRAIN_AVAILABLE:
|
459 |
+
return None, "SpeechBrain not available for transcription"
|
460 |
+
|
461 |
+
try:
|
462 |
+
# Check if it's a video file
|
463 |
+
file_extension = os.path.splitext(audio_file)[1].lower()
|
464 |
+
if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
|
465 |
+
processed_audio, status = extract_audio_from_video(audio_file)
|
466 |
+
if not processed_audio:
|
467 |
+
return None, status
|
468 |
+
else:
|
469 |
+
processed_audio = audio_file
|
470 |
+
|
471 |
+
# Get transcription
|
472 |
+
transcript = asr_model.transcribe_file(processed_audio)
|
473 |
+
|
474 |
+
# Clean up temporary file if created
|
475 |
+
if processed_audio != audio_file and os.path.exists(processed_audio):
|
476 |
+
try:
|
477 |
+
os.unlink(processed_audio)
|
478 |
+
except:
|
479 |
+
pass
|
480 |
+
|
481 |
+
return transcript, "Transcription completed successfully"
|
482 |
+
|
483 |
+
except Exception as e:
|
484 |
+
logger.error(f"Error in transcription: {e}")
|
485 |
+
return None, f"Transcription error: {str(e)}"
|
486 |
+
|
487 |
def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
|
488 |
"""Transcribe audio with timestamps, sentiment, and metadata"""
|
489 |
if not audio_file:
|
|
|
1257 |
)
|
1258 |
|
1259 |
quick_progress = gr.Markdown("")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1260 |
|
1261 |
# Event handlers
|
1262 |
def on_analyze_file(file, age_val, gender_val, notes):
|
|
|
1285 |
else:
|
1286 |
return f"Transcription failed: {status}", "", status
|
1287 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1288 |
def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
|
1289 |
"""Handle targeted analysis"""
|
1290 |
result = targeted_analysis(transcript, question, age_val, gender_val, notes)
|
|
|
1485 |
outputs=[output, transcript_input, transcription_status]
|
1486 |
)
|
1487 |
|
|
|
|
|
|
|
|
|
|
|
|
|
1488 |
targeted_analyze_btn.click(
|
1489 |
on_targeted_analyze,
|
1490 |
inputs=[transcript_input, custom_question, age, gender, slp_notes],
|
|
|
1505 |
|
1506 |
if __name__ == "__main__":
|
1507 |
print("π Starting Enhanced Speech Sample Analysis Tool...")
|
1508 |
+
print("π Features: Basic Analysis, Targeted Questions, Quick Multi-Analysis")
|
1509 |
+
print("π€ Transcription: Audio/Video support with basic transcription")
|
1510 |
print("π Analysis: Complex sentences, figurative language, pragmatic skills, cognitive-linguistic factors")
|
1511 |
|
1512 |
if not ANTHROPIC_API_KEY:
|