SreekarB commited on
Commit
7766eb3
Β·
verified Β·
1 Parent(s): f0267f4

Upload simple_casl_app.py

Browse files
Files changed (1) hide show
  1. simple_casl_app.py +36 -75
simple_casl_app.py CHANGED
@@ -450,6 +450,40 @@ def perform_speaker_diarization(audio_path):
450
  logger.error(f"Error in diarization: {e}")
451
  return None, f"Diarization error: {str(e)}"
452
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
453
  def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
454
  """Transcribe audio with timestamps, sentiment, and metadata"""
455
  if not audio_file:
@@ -1223,60 +1257,6 @@ with gr.Blocks(title="Enhanced Speech Sample Analysis", theme=gr.themes.Soft())
1223
  )
1224
 
1225
  quick_progress = gr.Markdown("")
1226
-
1227
- # Tab 4: Advanced Transcription
1228
- with gr.Tab("🎀 Advanced Transcription"):
1229
- with gr.Row():
1230
- with gr.Column(scale=1):
1231
- gr.Markdown("### Audio/Video Upload")
1232
- gr.Markdown("**Supported formats:** MP4, AVI, MOV, MKV, WMV, FLV, WAV, MP3, M4A, FLAC, OGG")
1233
-
1234
- transcription_file_input = gr.File(
1235
- label="Upload Audio or Video File",
1236
- file_types=["audio", "video"]
1237
- )
1238
-
1239
- enable_diarization = gr.Checkbox(
1240
- label="Enable Speaker Diarization",
1241
- value=True,
1242
- info="Identify different speakers in the audio"
1243
- )
1244
-
1245
- transcribe_advanced_btn = gr.Button(
1246
- "🎀 Transcribe with Metadata",
1247
- variant="primary",
1248
- size="lg"
1249
- )
1250
-
1251
- transcription_status = gr.Markdown("")
1252
-
1253
- with gr.Column(scale=2):
1254
- gr.Markdown("### Rich Transcript with Metadata")
1255
-
1256
- rich_transcript_display = gr.Textbox(
1257
- label="Transcription with Speakers, Timestamps, Sentiment & Emotion",
1258
- lines=15,
1259
- max_lines=20
1260
- )
1261
-
1262
- with gr.Row():
1263
- with gr.Column():
1264
- gr.Markdown("### Speech Metrics")
1265
-
1266
- transcription_metrics_display = gr.Textbox(
1267
- label="SLP Metrics",
1268
- lines=10,
1269
- max_lines=15
1270
- )
1271
-
1272
- with gr.Column():
1273
- gr.Markdown("### Word Frequency")
1274
-
1275
- transcription_word_freq_display = gr.Dataframe(
1276
- headers=["Word", "Frequency"],
1277
- label="Most Frequent Words",
1278
- interactive=False
1279
- )
1280
 
1281
  # Event handlers
1282
  def on_analyze_file(file, age_val, gender_val, notes):
@@ -1305,19 +1285,6 @@ with gr.Blocks(title="Enhanced Speech Sample Analysis", theme=gr.themes.Soft())
1305
  else:
1306
  return f"Transcription failed: {status}", "", status
1307
 
1308
- def on_transcribe_advanced(audio_file, enable_diarization):
1309
- """Handle advanced transcription"""
1310
- if not audio_file:
1311
- return "Please upload an audio/video file first.", "", "No file provided"
1312
-
1313
- transcript, status = transcribe_audio_with_metadata(audio_file.name, enable_diarization)
1314
- if transcript:
1315
- metrics = calculate_slp_metrics(transcript)
1316
- word_freq_data = metrics.get('word_frequency', {})
1317
- return transcript, status, metrics, word_freq_data
1318
- else:
1319
- return f"Transcription failed: {status}", "", {}, {}
1320
-
1321
  def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
1322
  """Handle targeted analysis"""
1323
  result = targeted_analysis(transcript, question, age_val, gender_val, notes)
@@ -1518,12 +1485,6 @@ with gr.Blocks(title="Enhanced Speech Sample Analysis", theme=gr.themes.Soft())
1518
  outputs=[output, transcript_input, transcription_status]
1519
  )
1520
 
1521
- transcribe_advanced_btn.click(
1522
- on_transcribe_advanced,
1523
- inputs=[transcription_file_input, enable_diarization],
1524
- outputs=[rich_transcript_display, transcription_status, transcription_metrics_display, transcription_word_freq_display]
1525
- )
1526
-
1527
  targeted_analyze_btn.click(
1528
  on_targeted_analyze,
1529
  inputs=[transcript_input, custom_question, age, gender, slp_notes],
@@ -1544,8 +1505,8 @@ with gr.Blocks(title="Enhanced Speech Sample Analysis", theme=gr.themes.Soft())
1544
 
1545
  if __name__ == "__main__":
1546
  print("πŸš€ Starting Enhanced Speech Sample Analysis Tool...")
1547
- print("πŸ“Š Features: Basic Analysis, Targeted Questions, Quick Multi-Analysis, Advanced Transcription")
1548
- print("🎀 Transcription: Audio/Video support with speaker diarization, sentiment, and emotion analysis")
1549
  print("πŸ“ˆ Analysis: Complex sentences, figurative language, pragmatic skills, cognitive-linguistic factors")
1550
 
1551
  if not ANTHROPIC_API_KEY:
 
450
  logger.error(f"Error in diarization: {e}")
451
  return None, f"Diarization error: {str(e)}"
452
 
453
+ def transcribe_audio(audio_file):
454
+ """Simple transcription function for basic audio/video files"""
455
+ if not audio_file:
456
+ return None, "No audio file provided"
457
+
458
+ if not SPEECHBRAIN_AVAILABLE:
459
+ return None, "SpeechBrain not available for transcription"
460
+
461
+ try:
462
+ # Check if it's a video file
463
+ file_extension = os.path.splitext(audio_file)[1].lower()
464
+ if file_extension in ['.mp4', '.avi', '.mov', '.mkv', '.wmv', '.flv']:
465
+ processed_audio, status = extract_audio_from_video(audio_file)
466
+ if not processed_audio:
467
+ return None, status
468
+ else:
469
+ processed_audio = audio_file
470
+
471
+ # Get transcription
472
+ transcript = asr_model.transcribe_file(processed_audio)
473
+
474
+ # Clean up temporary file if created
475
+ if processed_audio != audio_file and os.path.exists(processed_audio):
476
+ try:
477
+ os.unlink(processed_audio)
478
+ except:
479
+ pass
480
+
481
+ return transcript, "Transcription completed successfully"
482
+
483
+ except Exception as e:
484
+ logger.error(f"Error in transcription: {e}")
485
+ return None, f"Transcription error: {str(e)}"
486
+
487
  def transcribe_audio_with_metadata(audio_file, enable_diarization=True):
488
  """Transcribe audio with timestamps, sentiment, and metadata"""
489
  if not audio_file:
 
1257
  )
1258
 
1259
  quick_progress = gr.Markdown("")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1260
 
1261
  # Event handlers
1262
  def on_analyze_file(file, age_val, gender_val, notes):
 
1285
  else:
1286
  return f"Transcription failed: {status}", "", status
1287
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1288
  def on_targeted_analyze(transcript, question, age_val, gender_val, notes):
1289
  """Handle targeted analysis"""
1290
  result = targeted_analysis(transcript, question, age_val, gender_val, notes)
 
1485
  outputs=[output, transcript_input, transcription_status]
1486
  )
1487
 
 
 
 
 
 
 
1488
  targeted_analyze_btn.click(
1489
  on_targeted_analyze,
1490
  inputs=[transcript_input, custom_question, age, gender, slp_notes],
 
1505
 
1506
  if __name__ == "__main__":
1507
  print("πŸš€ Starting Enhanced Speech Sample Analysis Tool...")
1508
+ print("πŸ“Š Features: Basic Analysis, Targeted Questions, Quick Multi-Analysis")
1509
+ print("🎀 Transcription: Audio/Video support with basic transcription")
1510
  print("πŸ“ˆ Analysis: Complex sentences, figurative language, pragmatic skills, cognitive-linguistic factors")
1511
 
1512
  if not ANTHROPIC_API_KEY: