SreekarB commited on
Commit
2e9c5e8
·
verified ·
1 Parent(s): 309ccf7

Update annotated_casl_app.py

Browse files
Files changed (1) hide show
  1. annotated_casl_app.py +111 -153
annotated_casl_app.py CHANGED
@@ -137,7 +137,10 @@ def combine_sections_smartly(sections_dict):
137
 
138
 
139
  def call_claude_api_quick_analysis(prompt):
140
- """Call Claude API for quick focused analysis - single response only"""
 
 
 
141
  if not ANTHROPIC_API_KEY:
142
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
143
 
@@ -168,7 +171,16 @@ def call_claude_api_quick_analysis(prompt):
168
 
169
  if response.status_code == 200:
170
  response_json = response.json()
171
- return response_json['content'][0]['text']
 
 
 
 
 
 
 
 
 
172
  else:
173
  logger.error(f"Claude API error: {response.status_code} - {response.text}")
174
  return f"❌ Claude API Error: {response.status_code}"
@@ -1545,140 +1557,88 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
1545
  - Count [REPETITION] markers: Categorize by type (word, phrase, sound)
1546
  - Count [REVISION] markers: Analyze self-correction patterns
1547
  - Count [PAUSE] markers: Assess hesitation frequency
1548
- - Calculate total disfluency rate
 
 
1549
 
1550
  B. Word Retrieval Issues:
1551
- - Count [CIRCUMLOCUTION] markers: List each roundabout description
1552
- - Count [INCOMPLETE] markers: Analyze abandoned thought patterns
1553
- - Count [GENERIC] markers: Calculate specificity ratio
1554
- - Count [WORD_SEARCH] markers: Identify retrieval difficulty areas
1555
-
1556
- C. Grammatical Errors:
1557
- - Count [GRAM_ERROR] markers by subcategory (verb tense, subject-verb agreement, etc.)
1558
- - Count [SYNTAX_ERROR] markers: Analyze word order problems
1559
- - Count [MORPH_ERROR] markers: Categorize morphological mistakes
1560
- - Count [RUN_ON] markers: Assess sentence boundary awareness
1561
-
1562
- 2. LANGUAGE SKILLS ASSESSMENT (with specific evidence):
1563
 
1564
- A. Lexical/Semantic Skills:
1565
- - Use calculated Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
1566
- - Count [SIMPLE_VOCAB] vs [COMPLEX_VOCAB] markers
1567
- - Assess vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
1568
- - Count [SEMANTIC_ERROR] markers and analyze patterns
1569
 
1570
- B. Syntactic Skills:
1571
- - Count [SIMPLE_SENT], [COMPLEX_SENT], [COMPOUND_SENT] markers
1572
- - Calculate sentence complexity ratios
1573
- - Assess clause complexity and embedding
1574
-
1575
- C. Supralinguistic Skills:
1576
- - Identify cause-effect relationships, inferences, non-literal language
1577
- - Assess problem-solving language and metalinguistic awareness
1578
-
1579
- 3. COMPLEX SENTENCE ANALYSIS (with exact counts):
1580
-
1581
- A. Coordinating Conjunctions:
1582
- - Count and cite EVERY use of: and, but, or, so, yet, for, nor
1583
- - Analyze patterns and age-appropriateness
1584
 
1585
- B. Subordinating Conjunctions:
1586
- - Count and cite EVERY use of: because, although, while, since, if, when, where, that, which, who
1587
- - Analyze clause complexity and embedding depth
 
 
 
1588
 
1589
- C. Sentence Structure Analysis:
1590
- - Use calculated MLU: {linguistic_metrics.get('mlu_words', 0)} words, {linguistic_metrics.get('mlu_morphemes', 0)} morphemes
1591
- - Calculate complexity ratios
 
1592
 
1593
- 4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
1594
 
1595
- A. Similes and Metaphors:
1596
- - Count [FIGURATIVE] markers for similes (using "like" or "as")
1597
- - Count [FIGURATIVE] markers for metaphors (direct comparisons)
 
 
1598
 
1599
- B. Idioms and Non-literal Language:
1600
- - Count and analyze idiomatic expressions
1601
- - Assess comprehension and appropriate use
 
1602
 
1603
- 5. PRAGMATIC LANGUAGE ASSESSMENT (with specific examples):
 
 
 
 
1604
 
1605
- A. Discourse Management:
1606
- - Count [TOPIC_SHIFT] markers: Assess transition appropriateness
1607
- - Count [TANGENT] markers: Analyze tangential speech patterns
1608
- - Count [COHERENCE_BREAK] markers: Assess logical flow
 
 
1609
 
1610
- B. Referential Communication:
1611
- - Count [PRONOUN_REF] markers: Analyze referential clarity
1612
- - Assess communicative effectiveness
 
 
1613
 
1614
- 6. VOCABULARY AND SEMANTIC ANALYSIS (with quantification):
 
 
 
 
1615
 
1616
- A. Vocabulary Diversity:
1617
- - Total words: {linguistic_metrics.get('total_words', 0)}
 
1618
  - Unique words: {linguistic_metrics.get('unique_words', 0)}
1619
- - Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
1620
- - Vocabulary sophistication: {linguistic_metrics.get('vocabulary_sophistication', 0)}
1621
-
1622
- B. Semantic Relationships:
1623
- - Analyze word frequency patterns
1624
- - Assess semantic precision and relationships
1625
-
1626
- 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (with counts):
1627
 
1628
- A. Morphological Markers:
1629
- - Count [MORPH_ERROR] markers and categorize
1630
- - Analyze morpheme use patterns
1631
- - Assess morphological complexity
1632
 
1633
- B. Phonological Patterns:
1634
- - Identify speech sound patterns from transcript
1635
- - Assess syllable structure complexity
1636
 
1637
- 8. COGNITIVE-LINGUISTIC FACTORS (with evidence):
1638
-
1639
- A. Working Memory:
1640
- - Assess sentence length complexity using average: {linguistic_metrics.get('avg_sentence_length', 0)} words
1641
- - Analyze information retention patterns
1642
-
1643
- B. Processing Efficiency:
1644
- - Analyze linguistic complexity and word-finding patterns
1645
- - Assess cognitive demands of language structures
1646
-
1647
- C. Executive Function:
1648
- - Count self-correction patterns ([REVISION] markers)
1649
- - Assess planning and organization in discourse
1650
-
1651
- 9. FLUENCY AND RHYTHM ANALYSIS (with quantification):
1652
-
1653
- A. Disfluency Patterns:
1654
- - Total fluency issues: {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)}
1655
- - Calculate disfluency rate per 100 words
1656
- - Analyze impact on communication
1657
-
1658
- B. Language Flow:
1659
- - Assess sentence length variability: std = {linguistic_metrics.get('sentence_length_std', 0)}
1660
- - Analyze linguistic markers of hesitation
1661
-
1662
- 10. QUANTITATIVE METRICS:
1663
- - Total words: {linguistic_metrics.get('total_words', 0)}
1664
- - Total sentences: {linguistic_metrics.get('total_sentences', 0)}
1665
- - MLU (words): {linguistic_metrics.get('mlu_words', 0)}
1666
- - MLU (morphemes): {linguistic_metrics.get('mlu_morphemes', 0)}
1667
- - Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
1668
- - Grammar error rate: Calculate from marker counts
1669
- - Vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
1670
-
1671
- CRITICAL REQUIREMENTS:
1672
- - Use the provided calculated metrics in your analysis
1673
- - Provide EXACT counts for every marker type
1674
- - Calculate precise percentages and show your work
1675
- - Give specific examples from the transcript
1676
- - If annotation is incomplete, supplement with analysis of the original transcript
1677
- - Complete ALL 8 sections - use <CONTINUE> if needed
1678
- - Focus on objective data only - NO clinical interpretations
1679
  """
1680
 
1681
- return call_claude_api_with_continuation(analysis_prompt)
1682
 
1683
  def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_callback=None):
1684
  """Complete pipeline: annotate then analyze with progressive updates"""
@@ -1690,6 +1650,7 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
1690
  if progress_callback:
1691
  progress_callback("🏷️ Step 1: Annotating transcript with linguistic markers...")
1692
 
 
1693
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1694
 
1695
  if annotated_transcript.startswith("❌"):
@@ -1697,7 +1658,7 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
1697
 
1698
  # Return annotated transcript immediately
1699
  if progress_callback:
1700
- progress_callback("Step 1 Complete: Annotation finished! Starting analysis...")
1701
 
1702
  # Check if annotation was incomplete
1703
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
@@ -1709,12 +1670,12 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
1709
  # Step 2: Analyze annotated transcript with original as backup
1710
  logger.info("Step 2: Analyzing annotated transcript...")
1711
  if progress_callback:
1712
- progress_callback("📊 Step 2: Analyzing annotated transcript (this may take several minutes)...")
1713
 
1714
  analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
1715
 
1716
  if progress_callback:
1717
- progress_callback("Analysis Complete!")
1718
 
1719
  return annotated_transcript, analysis_note + analysis_result
1720
 
@@ -1726,7 +1687,7 @@ def progressive_analysis_pipeline(transcript_content, age, gender, slp_notes):
1726
 
1727
  # Step 1: Annotate transcript
1728
  logger.info("Step 1: Annotating transcript with linguistic markers...")
1729
- yield "", "", "🏷️ Step 1: Annotating transcript with linguistic markers..."
1730
 
1731
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1732
 
@@ -1735,19 +1696,19 @@ def progressive_analysis_pipeline(transcript_content, age, gender, slp_notes):
1735
  return
1736
 
1737
  # Return annotated transcript immediately after completion
1738
- yield annotated_transcript, "", "Step 1 Complete! Starting analysis..."
1739
 
1740
  # Check if annotation was incomplete
1741
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1742
  logger.warning("Annotation incomplete, proceeding with analysis")
1743
- analysis_note = "⚠️ Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
1744
- yield annotated_transcript, "", "⚠️ Annotation incomplete, continuing with analysis..."
1745
  else:
1746
  analysis_note = ""
1747
 
1748
  # Step 2: Analyze annotated transcript
1749
  logger.info("Step 2: Analyzing annotated transcript...")
1750
- yield annotated_transcript, "", "📊 Step 2: Analyzing annotated transcript (this may take several minutes)..."
1751
 
1752
  analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
1753
 
@@ -1806,10 +1767,9 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1806
  lines=3
1807
  )
1808
 
1809
- example_btn = gr.Button("📄 Load Example Transcript", variant="secondary", size="sm")
1810
-
1811
- # Single main analysis button
1812
- ultimate_analysis_btn = gr.Button("🚀 Run Complete Speech Analysis", variant="primary", size="lg")
1813
 
1814
  with gr.Column(scale=3):
1815
  status_display = gr.Markdown("Ready to analyze transcript")
@@ -1828,7 +1788,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1828
  show_copy_button=True
1829
  )
1830
 
1831
- with gr.Tab("🏷️ Annotation Only"):
1832
  gr.Markdown("### Step 1: Annotate transcript with linguistic markers")
1833
 
1834
  with gr.Row():
@@ -1852,8 +1812,9 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1852
  lines=3
1853
  )
1854
 
1855
- example_btn_2 = gr.Button("📄 Load Example Transcript", variant="secondary", size="sm")
1856
- annotate_btn = gr.Button("🏷️ Annotate Transcript", variant="secondary")
 
1857
 
1858
  with gr.Column():
1859
  annotation_output = gr.Textbox(
@@ -1905,8 +1866,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1905
  q5_btn = gr.Button("Word finding issues?", size="sm", variant="secondary")
1906
  q6_btn = gr.Button("Fluency problems?", size="sm", variant="secondary")
1907
 
1908
- example_btn_4 = gr.Button("📄 Load Example Transcript", variant="secondary", size="sm")
1909
- ask_question_btn = gr.Button("Ask Question", variant="primary")
1910
 
1911
  with gr.Column():
1912
  question_output = gr.Textbox(
@@ -1915,7 +1876,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1915
  show_copy_button=True
1916
  )
1917
 
1918
- with gr.Tab("🎯 Targeted Analysis"):
1919
  gr.Markdown("### Focus on specific areas of speech and language")
1920
 
1921
  with gr.Row():
@@ -1952,8 +1913,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1952
  lines=2
1953
  )
1954
 
1955
- example_btn_5 = gr.Button("📄 Load Example Transcript", variant="secondary", size="sm")
1956
- targeted_analysis_btn = gr.Button("🎯 Run Targeted Analysis", variant="primary")
1957
 
1958
  with gr.Column():
1959
  targeted_output = gr.Textbox(
@@ -1991,11 +1952,11 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1991
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1992
 
1993
  if annotated_transcript.startswith("❌"):
1994
- return annotated_transcript, "Annotation failed"
1995
  elif annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1996
- return annotated_transcript, "⚠️ Annotation incomplete but proceeding"
1997
  else:
1998
- return annotated_transcript, "Annotation complete! Click 'Run Analysis' to continue."
1999
 
2000
  def run_analysis_step(annotated_transcript, original_transcript, age, gender, slp_notes):
2001
  """Run the analysis step on the annotated transcript"""
@@ -2006,11 +1967,12 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2006
 
2007
  # Check if annotation was incomplete
2008
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
2009
- analysis_note = "⚠️ Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
2010
  else:
2011
  analysis_note = ""
2012
 
2013
  analysis_result = analyze_with_backup(annotated_transcript, original_transcript, age, gender, slp_notes)
 
2014
  return analysis_note + analysis_result
2015
 
2016
  def run_manual_count_only(annotated_transcript):
@@ -2175,8 +2137,12 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2175
  - Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
2176
  * Categorize types (word, phrase, sound level)
2177
  * Provide examples and count summary
2178
- - Total disfluency assessment: Use verified total of {category_totals['fluency_issues']}
2179
- * Rate: {category_totals['fluency_issues']/total_words*100:.2f} per 100 words
 
 
 
 
2180
  * Provide objective rate calculation
2181
 
2182
  B. Word Retrieval Issues:
@@ -2196,7 +2162,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2196
  A. Vocabulary Analysis (use verified data):
2197
  - Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
2198
  - Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
2199
- - Sophistication ratio: Use verified ratio of {category_totals['vocab_sophistication_ratio']:.3f}
2200
  - Type-Token Ratio: Use verified TTR from basic metrics
2201
  - Provide examples of each vocabulary level from transcript
2202
 
@@ -2274,7 +2240,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2274
  # Step 2: Run analysis
2275
  analysis_result = run_analysis_step(annotated_transcript, transcript_content, age, gender, slp_notes)
2276
 
2277
- return annotated_transcript, analysis_result, "Complete analysis finished!"
2278
 
2279
  def run_complete_speech_analysis(transcript_content, age, gender, slp_notes):
2280
  """Run the complete speech analysis pipeline with ultimate analysis"""
@@ -2290,7 +2256,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2290
  # Step 2: Run ultimate analysis
2291
  ultimate_result = run_ultimate_analysis(annotated_transcript, transcript_content, age, gender, slp_notes)
2292
 
2293
- return annotated_transcript, ultimate_result, "Complete speech analysis finished!"
2294
 
2295
  # Single main event handler
2296
  ultimate_analysis_btn.click(
@@ -2319,12 +2285,4 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2319
  fn=analyze_targeted_area,
2320
  inputs=[transcript_input_5, analysis_area, age_input_5, gender_input_5, slp_notes_input_5],
2321
  outputs=[targeted_output]
2322
- )
2323
-
2324
- if __name__ == "__main__":
2325
- demo.launch(
2326
- server_name="0.0.0.0",
2327
- server_port=7860,
2328
- share=True,
2329
- show_error=True
2330
  )
 
137
 
138
 
139
  def call_claude_api_quick_analysis(prompt):
140
+ """Call Claude API for quick focused analysis - single response only
141
+ Responses are cleaned to remove asterisks, hashtags, and convert simple tables to lists
142
+ to match formatting used in the main analysis pipeline.
143
+ """
144
  if not ANTHROPIC_API_KEY:
145
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
146
 
 
171
 
172
  if response.status_code == 200:
173
  response_json = response.json()
174
+ response_text = response_json['content'][0]['text']
175
+
176
+ # Clean formatting (remove asterisks, hashtags, convert simple tables) so
177
+ # Targeted Analysis and Quick Questions match the main analysis output
178
+ try:
179
+ cleaned = clean_output_formatting(response_text)
180
+ except Exception:
181
+ # If cleaning fails for any reason, fall back to raw response
182
+ cleaned = response_text
183
+ return cleaned
184
  else:
185
  logger.error(f"Claude API error: {response.status_code} - {response.text}")
186
  return f"❌ Claude API Error: {response.status_code}"
 
1557
  - Count [REPETITION] markers: Categorize by type (word, phrase, sound)
1558
  - Count [REVISION] markers: Analyze self-correction patterns
1559
  - Count [PAUSE] markers: Assess hesitation frequency
1560
+ - Total disfluency assessment: Use verified total of {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)}
1561
+ * Rate: {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)/linguistic_metrics.get('total_words', 1)*100:.2f} per 100 words
1562
+ * Provide objective rate calculation
1563
 
1564
  B. Word Retrieval Issues:
1565
+ - Circumlocutions: Count and analyze from transcript
1566
+ - Incomplete thoughts: Identify abandoned utterances
1567
+ - Generic language use: Count vague terms
1568
+ - Word-finding efficiency: Assess retrieval success rate
 
 
 
 
 
 
 
 
1569
 
1570
+ C. Grammatical Errors (use verified counts):
1571
+ - Grammar errors: Use verified count of {marker_counts.get('GRAM_ERROR', 0)}
1572
+ - Syntax errors: Use verified count of {marker_counts.get('SYNTAX_ERROR', 0)}
1573
+ - Morphological errors: Use verified count of {marker_counts.get('MORPH_ERROR', 0)}
1574
+ - Calculate overall grammatical accuracy rate
1575
 
1576
+ 2. LANGUAGE SKILLS ASSESSMENT
 
 
 
 
 
 
 
 
 
 
 
 
 
1577
 
1578
+ A. Vocabulary Analysis (use verified data):
1579
+ - Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
1580
+ - Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
1581
+ - Sophistication ratio: Use verified ratio of {category_totals.get('vocab_sophistication_ratio', 0):.3f}
1582
+ - Type-Token Ratio: Use verified TTR from basic metrics
1583
+ - Provide examples of each vocabulary level from transcript
1584
 
1585
+ B. Grammar and Morphology:
1586
+ - Error pattern analysis using verified counts
1587
+ - Pattern analysis only
1588
+ - Morphological complexity evaluation
1589
 
1590
+ 3. COMPLEX SENTENCE ANALYSIS (use verified counts)
1591
 
1592
+ A. Sentence Structure Distribution:
1593
+ - Simple sentences: Use verified count of {marker_counts.get('SIMPLE_SENT', 0)}
1594
+ - Complex sentences: Use verified count of {marker_counts.get('COMPLEX_SENT', 0)}
1595
+ - Compound sentences: Use verified count of {marker_counts.get('COMPOUND_SENT', 0)}
1596
+ - Calculate percentages of each type
1597
 
1598
+ B. Syntactic Complexity:
1599
+ - MLU analysis: Use verified MLU of {linguistic_metrics.get('mlu_words', 0):.2f} words
1600
+ - Average sentence length: Use verified length of {linguistic_metrics.get('avg_sentence_length', 0):.2f} words
1601
+ - Subordination and coordination patterns
1602
 
1603
+ 4. FIGURATIVE LANGUAGE ANALYSIS
1604
+ - Figurative expressions: Use verified count of {marker_counts.get('FIGURATIVE', 0)}
1605
+ - Metaphor and idiom identification from transcript
1606
+ - Age-appropriate development assessment
1607
+ - Abstract language abilities
1608
 
1609
+ 5. PRAGMATIC LANGUAGE ASSESSMENT
1610
+ - Topic shifts: Use verified count of {marker_counts.get('TOPIC_SHIFT', 0)}
1611
+ - Tangential speech: Use verified count of {marker_counts.get('TANGENT', 0)}
1612
+ - Coherence breaks: Use verified count of {marker_counts.get('COHERENCE_BREAK', 0)}
1613
+ - Referential clarity: Use verified count of {marker_counts.get('PRONOUN_REF', 0)}
1614
+ - Overall conversational patterns observed
1615
 
1616
+ 6. VOCABULARY AND SEMANTIC ANALYSIS
1617
+ - Semantic errors: Use verified count of {marker_counts.get('SEMANTIC_ERROR', 0)}
1618
+ - Lexical diversity: Use verified measures from stats summary
1619
+ - Word association patterns from transcript analysis
1620
+ - Semantic precision and appropriateness
1621
 
1622
+ 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS
1623
+ - Morphological complexity assessment
1624
+ - Derivational and inflectional morphology patterns
1625
+ - Error analysis using verified counts
1626
+ - Pattern analysis only
1627
 
1628
+ 8. QUANTITATIVE METRICS AND NLP FEATURES (use ALL verified data)
1629
+ - Total words: {total_words}
1630
+ - Total sentences: {linguistic_metrics.get('total_sentences', 0)}
1631
  - Unique words: {linguistic_metrics.get('unique_words', 0)}
1632
+ - MLU words: {linguistic_metrics.get('mlu_words', 0):.2f}
1633
+ - MLU morphemes: {linguistic_metrics.get('mlu_morphemes', 0):.2f}
1634
+ - All error rates and ratios from verified counts
 
 
 
 
 
1635
 
 
 
 
 
1636
 
 
 
 
1637
 
1638
+ CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1639
  """
1640
 
1641
+ return call_claude_api_with_continuation(final_prompt)
1642
 
1643
  def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_callback=None):
1644
  """Complete pipeline: annotate then analyze with progressive updates"""
 
1650
  if progress_callback:
1651
  progress_callback("🏷️ Step 1: Annotating transcript with linguistic markers...")
1652
 
1653
+
1654
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1655
 
1656
  if annotated_transcript.startswith("❌"):
 
1658
 
1659
  # Return annotated transcript immediately
1660
  if progress_callback:
1661
+ progress_callback("Step 1 Complete: Annotation finished! Starting analysis...")
1662
 
1663
  # Check if annotation was incomplete
1664
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
 
1670
  # Step 2: Analyze annotated transcript with original as backup
1671
  logger.info("Step 2: Analyzing annotated transcript...")
1672
  if progress_callback:
1673
+ progress_callback("Step 2: Analyzing annotated transcript (this may take several minutes)...")
1674
 
1675
  analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
1676
 
1677
  if progress_callback:
1678
+ progress_callback("Analysis Complete!")
1679
 
1680
  return annotated_transcript, analysis_note + analysis_result
1681
 
 
1687
 
1688
  # Step 1: Annotate transcript
1689
  logger.info("Step 1: Annotating transcript with linguistic markers...")
1690
+ yield "", "", "Step 1: Annotating transcript with linguistic markers..."
1691
 
1692
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1693
 
 
1696
  return
1697
 
1698
  # Return annotated transcript immediately after completion
1699
+ yield annotated_transcript, "", "Step 1 Complete! Starting analysis..."
1700
 
1701
  # Check if annotation was incomplete
1702
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1703
  logger.warning("Annotation incomplete, proceeding with analysis")
1704
+ analysis_note = "Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
1705
+ yield annotated_transcript, "", "Annotation incomplete, continuing with analysis..."
1706
  else:
1707
  analysis_note = ""
1708
 
1709
  # Step 2: Analyze annotated transcript
1710
  logger.info("Step 2: Analyzing annotated transcript...")
1711
+ yield annotated_transcript, "", "Step 2: Analyzing annotated transcript (this may take several minutes)..."
1712
 
1713
  analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
1714
 
 
1767
  lines=3
1768
  )
1769
 
1770
+ with gr.Row():
1771
+ example_btn = gr.Button("Load Example Transcript", variant="secondary", size="sm")
1772
+ ultimate_analysis_btn = gr.Button("Run Complete Speech Analysis", variant="primary", size="lg")
 
1773
 
1774
  with gr.Column(scale=3):
1775
  status_display = gr.Markdown("Ready to analyze transcript")
 
1788
  show_copy_button=True
1789
  )
1790
 
1791
+ with gr.Tab("Annotation Only"):
1792
  gr.Markdown("### Step 1: Annotate transcript with linguistic markers")
1793
 
1794
  with gr.Row():
 
1812
  lines=3
1813
  )
1814
 
1815
+ with gr.Row():
1816
+ example_btn_2 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
1817
+ annotate_btn = gr.Button("Annotate Transcript", variant="secondary")
1818
 
1819
  with gr.Column():
1820
  annotation_output = gr.Textbox(
 
1866
  q5_btn = gr.Button("Word finding issues?", size="sm", variant="secondary")
1867
  q6_btn = gr.Button("Fluency problems?", size="sm", variant="secondary")
1868
 
1869
+ example_btn_4 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
1870
+ ask_question_btn = gr.Button("Ask Question", variant="primary")
1871
 
1872
  with gr.Column():
1873
  question_output = gr.Textbox(
 
1876
  show_copy_button=True
1877
  )
1878
 
1879
+ with gr.Tab("Targeted Analysis"):
1880
  gr.Markdown("### Focus on specific areas of speech and language")
1881
 
1882
  with gr.Row():
 
1913
  lines=2
1914
  )
1915
 
1916
+ example_btn_5 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
1917
+ targeted_analysis_btn = gr.Button("Run Targeted Analysis", variant="primary")
1918
 
1919
  with gr.Column():
1920
  targeted_output = gr.Textbox(
 
1952
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1953
 
1954
  if annotated_transcript.startswith("❌"):
1955
+ return annotated_transcript, "Annotation failed"
1956
  elif annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1957
+ return annotated_transcript, "Annotation incomplete but proceeding"
1958
  else:
1959
+ return annotated_transcript, "Annotation complete! Click 'Run Analysis' to continue."
1960
 
1961
  def run_analysis_step(annotated_transcript, original_transcript, age, gender, slp_notes):
1962
  """Run the analysis step on the annotated transcript"""
 
1967
 
1968
  # Check if annotation was incomplete
1969
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1970
+ analysis_note = "Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
1971
  else:
1972
  analysis_note = ""
1973
 
1974
  analysis_result = analyze_with_backup(annotated_transcript, original_transcript, age, gender, slp_notes)
1975
+
1976
  return analysis_note + analysis_result
1977
 
1978
  def run_manual_count_only(annotated_transcript):
 
2137
  - Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
2138
  * Categorize types (word, phrase, sound level)
2139
  * Provide examples and count summary
2140
+ - Revisions: Use verified count of {marker_counts.get('REVISION', 0)}
2141
+ * Analyze self-correction patterns
2142
+ - Pauses: Use verified count of {marker_counts.get('PAUSE', 0)}
2143
+ * Assess hesitation frequency
2144
+ - Total disfluency assessment: Use verified total of {category_totals.get('fluency_issues', 0)}
2145
+ * Rate: {category_totals.get('fluency_issues', 0)/linguistic_metrics.get('total_words', 1)*100:.2f} per 100 words
2146
  * Provide objective rate calculation
2147
 
2148
  B. Word Retrieval Issues:
 
2162
  A. Vocabulary Analysis (use verified data):
2163
  - Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
2164
  - Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
2165
+ - Sophistication ratio: Use verified ratio of {category_totals.get('vocab_sophistication_ratio', 0):.3f}
2166
  - Type-Token Ratio: Use verified TTR from basic metrics
2167
  - Provide examples of each vocabulary level from transcript
2168
 
 
2240
  # Step 2: Run analysis
2241
  analysis_result = run_analysis_step(annotated_transcript, transcript_content, age, gender, slp_notes)
2242
 
2243
+ return annotated_transcript, analysis_result, "Complete analysis finished!"
2244
 
2245
  def run_complete_speech_analysis(transcript_content, age, gender, slp_notes):
2246
  """Run the complete speech analysis pipeline with ultimate analysis"""
 
2256
  # Step 2: Run ultimate analysis
2257
  ultimate_result = run_ultimate_analysis(annotated_transcript, transcript_content, age, gender, slp_notes)
2258
 
2259
+ return annotated_transcript, ultimate_result, "Complete speech analysis finished!"
2260
 
2261
  # Single main event handler
2262
  ultimate_analysis_btn.click(
 
2285
  fn=analyze_targeted_area,
2286
  inputs=[transcript_input_5, analysis_area, age_input_5, gender_input_5, slp_notes_input_5],
2287
  outputs=[targeted_output]
 
 
 
 
 
 
 
 
2288
  )