sudhanm commited on
Commit
751fdfd
Β·
verified Β·
1 Parent(s): 5954007

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -49
app.py CHANGED
@@ -313,55 +313,99 @@ def transcribe_audio(audio_path, language_choice):
313
 
314
  # ---------------- FEEDBACK SYSTEM ---------------- #
315
 
 
 
 
 
 
 
 
 
316
  def create_feedback(intended, actual, lang_choice):
317
- """Create simple feedback comparison"""
318
  # Get transliterations
319
  intended_roman = transliterate_with_qwen(intended, lang_choice)
320
  actual_roman = transliterate_with_qwen(actual, lang_choice)
321
 
 
 
 
 
322
  # Calculate accuracy
323
- intended_words = intended.strip().split()
324
- actual_words = actual.strip().split()
325
 
326
  # Simple word-level accuracy
327
  sm = difflib.SequenceMatcher(None, intended_words, actual_words)
328
  accuracy = sm.ratio() * 100
329
 
330
- # Create feedback HTML
331
- feedback_html = f"""
332
- <div style='font-family: Arial, sans-serif; padding: 20px;'>
333
- <h3 style='color: #2c3e50; text-align: center;'>πŸ“Š Pronunciation Analysis</h3>
334
-
335
- <table style='width: 100%; border-collapse: collapse; margin: 20px 0;'>
336
- <tr style='background: #f8f9fa;'>
337
- <td style='padding: 15px; font-weight: bold; border: 1px solid #ddd;'>Target</td>
338
- <td style='padding: 15px; border: 1px solid #ddd; font-family: monospace;'>{intended}</td>
339
- </tr>
340
- <tr style='background: #f8f9fa;'>
341
- <td style='padding: 15px; font-weight: bold; border: 1px solid #ddd;'>Romanized</td>
342
- <td style='padding: 15px; border: 1px solid #ddd; font-family: monospace; color: #666;'>{intended_roman}</td>
343
- </tr>
344
- <tr>
345
- <td style='padding: 15px; font-weight: bold; border: 1px solid #ddd;'>You Said</td>
346
- <td style='padding: 15px; border: 1px solid #ddd; font-family: monospace;'>{actual}</td>
347
- </tr>
348
- <tr>
349
- <td style='padding: 15px; font-weight: bold; border: 1px solid #ddd;'>Your Romanized</td>
350
- <td style='padding: 15px; border: 1px solid #ddd; font-family: monospace; color: #666;'>{actual_roman}</td>
351
- </tr>
352
- </table>
353
-
354
- <div style='text-align: center; padding: 20px; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; border-radius: 10px;'>
355
- <h4 style='margin: 0 0 10px 0;'>Accuracy Score</h4>
356
- <div style='font-size: 36px; font-weight: bold;'>{accuracy:.0f}%</div>
357
- <div style='margin-top: 10px;'>
358
- {'πŸŽ‰ Excellent!' if accuracy >= 90 else 'πŸ‘ Good job!' if accuracy >= 70 else 'πŸ“š Keep practicing!'}
359
- </div>
360
- </div>
361
- </div>
362
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
363
 
364
- return feedback_html, accuracy
 
 
 
 
 
 
 
 
 
 
 
 
365
 
366
  # ---------------- MAIN FUNCTION ---------------- #
367
 
@@ -369,7 +413,7 @@ def create_feedback(intended, actual, lang_choice):
369
  def analyze_pronunciation(audio, lang_choice, intended_text):
370
  """Main function to analyze pronunciation"""
371
  if audio is None or not intended_text.strip():
372
- return "⚠️ Please record audio and generate a sentence first.", "", "", ""
373
 
374
  try:
375
  # Extract original sentence (remove romanization if present)
@@ -382,7 +426,7 @@ def analyze_pronunciation(audio, lang_choice, intended_text):
382
  actual_text = transcribe_audio(audio, lang_choice)
383
 
384
  if not actual_text.strip():
385
- return "⚠️ No speech detected. Please try recording again.", "", "", ""
386
 
387
  # Calculate metrics
388
  wer_val = jiwer.wer(intended_sentence, actual_text)
@@ -391,13 +435,13 @@ def analyze_pronunciation(audio, lang_choice, intended_text):
391
  # Get romanizations
392
  actual_roman = transliterate_with_qwen(actual_text, lang_choice)
393
 
394
- # Create feedback
395
- feedback_html, accuracy = create_feedback(intended_sentence, actual_text, lang_choice)
396
 
397
- return actual_text, actual_roman, f"{wer_val:.1%}", feedback_html
398
 
399
  except Exception as e:
400
- return f"❌ Error: {str(e)}", "", "", ""
401
 
402
  # ---------------- HELPERS ---------------- #
403
 
@@ -415,18 +459,18 @@ def get_random_sentence_with_transliteration(language_choice):
415
  with gr.Blocks(title="AI Pronunciation Coach", theme=gr.themes.Soft()) as demo:
416
  gr.Markdown("""
417
  # πŸŽ™οΈ AI Pronunciation Coach
418
- ### Practice English, Tamil & Malayalam with AI feedback powered by Gemma-3-4B
419
 
420
  **Features:**
421
- - ✨ **Smart Transliteration**: Natural Thanglish/Manglish using Gemma-3-4B-IT
422
  - 🎯 **Accurate Recognition**: Language-specific Whisper models
423
- - πŸ“Š **Instant Feedback**: Real-time pronunciation analysis
424
 
425
  **How to use:**
426
  1. Select your language
427
  2. Generate a practice sentence
428
  3. Record yourself reading it aloud
429
- 4. Get instant feedback!
430
  """)
431
 
432
  with gr.Row():
@@ -457,7 +501,24 @@ with gr.Blocks(title="AI Pronunciation Coach", theme=gr.themes.Soft()) as demo:
457
  actual_roman_out = gr.Textbox(label="πŸ”€ Your Pronunciation (Romanized)", interactive=False)
458
  wer_out = gr.Textbox(label="πŸ“Š Word Error Rate", interactive=False)
459
 
460
- feedback_display = gr.HTML()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
 
462
  # Event handlers
463
  gen_btn.click(
@@ -469,7 +530,7 @@ with gr.Blocks(title="AI Pronunciation Coach", theme=gr.themes.Soft()) as demo:
469
  analyze_btn.click(
470
  fn=analyze_pronunciation,
471
  inputs=[audio_input, lang_choice, intended_display],
472
- outputs=[actual_out, actual_roman_out, wer_out, feedback_display]
473
  )
474
 
475
  if __name__ == "__main__":
 
313
 
314
  # ---------------- FEEDBACK SYSTEM ---------------- #
315
 
316
+ def normalize_text_for_comparison(text):
317
+ """Remove punctuation and normalize text for fair comparison"""
318
+ import string
319
+ # Remove punctuation and extra spaces
320
+ text = text.translate(str.maketrans('', '', string.punctuation))
321
+ text = ' '.join(text.split()) # Normalize spaces
322
+ return text.lower()
323
+
324
  def create_feedback(intended, actual, lang_choice):
325
+ """Create simple feedback comparison with tables"""
326
  # Get transliterations
327
  intended_roman = transliterate_with_qwen(intended, lang_choice)
328
  actual_roman = transliterate_with_qwen(actual, lang_choice)
329
 
330
+ # Normalize for comparison (remove punctuation)
331
+ intended_normalized = normalize_text_for_comparison(intended)
332
+ actual_normalized = normalize_text_for_comparison(actual)
333
+
334
  # Calculate accuracy
335
+ intended_words = intended_normalized.split()
336
+ actual_words = actual_normalized.split()
337
 
338
  # Simple word-level accuracy
339
  sm = difflib.SequenceMatcher(None, intended_words, actual_words)
340
  accuracy = sm.ratio() * 100
341
 
342
+ # Create comparison data for table
343
+ comparison_data = [
344
+ ["Target Text", intended],
345
+ ["Target (Romanized)", intended_roman],
346
+ ["Your Speech", actual],
347
+ ["Your Speech (Romanized)", actual_roman],
348
+ ["Accuracy Score", f"{accuracy:.1f}%"]
349
+ ]
350
+
351
+ # Find incorrect words for pronunciation table
352
+ wrong_pronunciations = []
353
+
354
+ # Get word-level differences
355
+ for tag, i1, i2, j1, j2 in sm.get_opcodes():
356
+ if tag == 'replace':
357
+ # Words that were pronounced differently
358
+ for idx in range(max(i2-i1, j2-j1)):
359
+ expected_word = intended_words[i1 + idx] if (i1 + idx) < i2 else ""
360
+ actual_word = actual_words[j1 + idx] if (j1 + idx) < j2 else ""
361
+
362
+ if expected_word and actual_word and expected_word != actual_word:
363
+ # Get romanized versions
364
+ expected_roman = transliterate_with_qwen(expected_word, lang_choice)
365
+ actual_roman = transliterate_with_qwen(actual_word, lang_choice)
366
+
367
+ wrong_pronunciations.append([
368
+ expected_word,
369
+ expected_roman,
370
+ actual_word,
371
+ actual_roman
372
+ ])
373
+ elif tag == 'delete':
374
+ # Missing words
375
+ for idx in range(i2-i1):
376
+ expected_word = intended_words[i1 + idx]
377
+ expected_roman = transliterate_with_qwen(expected_word, lang_choice)
378
+ wrong_pronunciations.append([
379
+ expected_word,
380
+ expected_roman,
381
+ "(Not spoken)",
382
+ ""
383
+ ])
384
+ elif tag == 'insert':
385
+ # Extra words
386
+ for idx in range(j2-j1):
387
+ actual_word = actual_words[j1 + idx]
388
+ actual_roman = transliterate_with_qwen(actual_word, lang_choice)
389
+ wrong_pronunciations.append([
390
+ "(Not expected)",
391
+ "",
392
+ actual_word,
393
+ actual_roman
394
+ ])
395
 
396
+ # Create motivational message
397
+ if accuracy >= 95:
398
+ message = "πŸŽ‰ Outstanding! Perfect pronunciation!"
399
+ elif accuracy >= 85:
400
+ message = "🌟 Excellent! Very natural sounding!"
401
+ elif accuracy >= 70:
402
+ message = "πŸ‘ Good job! Your pronunciation is improving!"
403
+ elif accuracy >= 50:
404
+ message = "πŸ“š Getting there! Focus on the highlighted sounds!"
405
+ else:
406
+ message = "πŸ’ͺ Keep practicing! Every attempt makes you better!"
407
+
408
+ return comparison_data, wrong_pronunciations, message, accuracy
409
 
410
  # ---------------- MAIN FUNCTION ---------------- #
411
 
 
413
  def analyze_pronunciation(audio, lang_choice, intended_text):
414
  """Main function to analyze pronunciation"""
415
  if audio is None or not intended_text.strip():
416
+ return "⚠️ Please record audio and generate a sentence first.", "", "", [], [], ""
417
 
418
  try:
419
  # Extract original sentence (remove romanization if present)
 
426
  actual_text = transcribe_audio(audio, lang_choice)
427
 
428
  if not actual_text.strip():
429
+ return "⚠️ No speech detected. Please try recording again.", "", "", [], [], ""
430
 
431
  # Calculate metrics
432
  wer_val = jiwer.wer(intended_sentence, actual_text)
 
435
  # Get romanizations
436
  actual_roman = transliterate_with_qwen(actual_text, lang_choice)
437
 
438
+ # Create feedback tables
439
+ comparison_data, wrong_pronunciations, message, accuracy = create_feedback(intended_sentence, actual_text, lang_choice)
440
 
441
+ return actual_text, actual_roman, f"{wer_val:.1%}", comparison_data, wrong_pronunciations, message
442
 
443
  except Exception as e:
444
+ return f"❌ Error: {str(e)}", "", "", [], [], ""
445
 
446
  # ---------------- HELPERS ---------------- #
447
 
 
459
  with gr.Blocks(title="AI Pronunciation Coach", theme=gr.themes.Soft()) as demo:
460
  gr.Markdown("""
461
  # πŸŽ™οΈ AI Pronunciation Coach
462
+ ### Practice English, Tamil & Malayalam with AI feedback powered by top open-source LLMs
463
 
464
  **Features:**
465
+ - ✨ **Advanced Transliteration**: Natural Thanglish/Manglish using Qwen2.5-7B/Llama3.1-8B
466
  - 🎯 **Accurate Recognition**: Language-specific Whisper models
467
+ - πŸ“Š **Smart Analysis**: Punctuation-aware comparison with correction tables
468
 
469
  **How to use:**
470
  1. Select your language
471
  2. Generate a practice sentence
472
  3. Record yourself reading it aloud
473
+ 4. Get instant feedback with detailed analysis!
474
  """)
475
 
476
  with gr.Row():
 
501
  actual_roman_out = gr.Textbox(label="πŸ”€ Your Pronunciation (Romanized)", interactive=False)
502
  wer_out = gr.Textbox(label="πŸ“Š Word Error Rate", interactive=False)
503
 
504
+ # Analysis tables
505
+ gr.Markdown("### πŸ“Š Analysis Results")
506
+
507
+ with gr.Row():
508
+ with gr.Column():
509
+ comparison_table = gr.Dataframe(
510
+ headers=["Metric", "Value"],
511
+ label="πŸ“‹ Overall Comparison",
512
+ interactive=False
513
+ )
514
+ with gr.Column():
515
+ pronunciation_table = gr.Dataframe(
516
+ headers=["Expected Word", "Expected (Romanized)", "You Said", "You Said (Romanized)"],
517
+ label="❌ Pronunciation Corrections Needed",
518
+ interactive=False
519
+ )
520
+
521
+ feedback_message = gr.Textbox(label="πŸ’¬ Feedback", interactive=False)
522
 
523
  # Event handlers
524
  gen_btn.click(
 
530
  analyze_btn.click(
531
  fn=analyze_pronunciation,
532
  inputs=[audio_input, lang_choice, intended_display],
533
+ outputs=[actual_out, actual_roman_out, wer_out, comparison_table, pronunciation_table, feedback_message]
534
  )
535
 
536
  if __name__ == "__main__":