SreekarB commited on
Commit
c349eca
Β·
verified Β·
1 Parent(s): 7b778e1

Update annotated_casl_app.py

Browse files
Files changed (1) hide show
  1. annotated_casl_app.py +154 -111
annotated_casl_app.py CHANGED
@@ -5,6 +5,7 @@ import logging
5
  import requests
6
  import re
7
  import time
 
8
  # Configure logging
9
  logging.basicConfig(level=logging.INFO)
10
  logger = logging.getLogger(__name__)
@@ -136,10 +137,7 @@ def combine_sections_smartly(sections_dict):
136
 
137
 
138
  def call_claude_api_quick_analysis(prompt):
139
- """Call Claude API for quick focused analysis - single response only
140
- Responses are cleaned to remove asterisks, hashtags, and convert simple tables to lists
141
- to match formatting used in the main analysis pipeline.
142
- """
143
  if not ANTHROPIC_API_KEY:
144
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
145
 
@@ -170,16 +168,7 @@ def call_claude_api_quick_analysis(prompt):
170
 
171
  if response.status_code == 200:
172
  response_json = response.json()
173
- response_text = response_json['content'][0]['text']
174
-
175
- # Clean formatting (remove asterisks, hashtags, convert simple tables) so
176
- # Targeted Analysis and Quick Questions match the main analysis output
177
- try:
178
- cleaned = clean_output_formatting(response_text)
179
- except Exception:
180
- # If cleaning fails for any reason, fall back to raw response
181
- cleaned = response_text
182
- return cleaned
183
  else:
184
  logger.error(f"Claude API error: {response.status_code} - {response.text}")
185
  return f"❌ Claude API Error: {response.status_code}"
@@ -1556,88 +1545,140 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
1556
  - Count [REPETITION] markers: Categorize by type (word, phrase, sound)
1557
  - Count [REVISION] markers: Analyze self-correction patterns
1558
  - Count [PAUSE] markers: Assess hesitation frequency
1559
- - Total disfluency assessment: Use verified total of {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)}
1560
- * Rate: {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)/linguistic_metrics.get('total_words', 1)*100:.2f} per 100 words
1561
- * Provide objective rate calculation
1562
 
1563
  B. Word Retrieval Issues:
1564
- - Circumlocutions: Count and analyze from transcript
1565
- - Incomplete thoughts: Identify abandoned utterances
1566
- - Generic language use: Count vague terms
1567
- - Word-finding efficiency: Assess retrieval success rate
1568
 
1569
- C. Grammatical Errors (use verified counts):
1570
- - Grammar errors: Use verified count of {marker_counts.get('GRAM_ERROR', 0)}
1571
- - Syntax errors: Use verified count of {marker_counts.get('SYNTAX_ERROR', 0)}
1572
- - Morphological errors: Use verified count of {marker_counts.get('MORPH_ERROR', 0)}
1573
- - Calculate overall grammatical accuracy rate
1574
 
1575
- 2. LANGUAGE SKILLS ASSESSMENT
1576
 
1577
- A. Vocabulary Analysis (use verified data):
1578
- - Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
1579
- - Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
1580
- - Sophistication ratio: Use verified ratio of {category_totals.get('vocab_sophistication_ratio', 0):.3f}
1581
- - Type-Token Ratio: Use verified TTR from basic metrics
1582
- - Provide examples of each vocabulary level from transcript
1583
 
1584
- B. Grammar and Morphology:
1585
- - Error pattern analysis using verified counts
1586
- - Pattern analysis only
1587
- - Morphological complexity evaluation
1588
 
1589
- 3. COMPLEX SENTENCE ANALYSIS (use verified counts)
 
 
1590
 
1591
- A. Sentence Structure Distribution:
1592
- - Simple sentences: Use verified count of {marker_counts.get('SIMPLE_SENT', 0)}
1593
- - Complex sentences: Use verified count of {marker_counts.get('COMPLEX_SENT', 0)}
1594
- - Compound sentences: Use verified count of {marker_counts.get('COMPOUND_SENT', 0)}
1595
- - Calculate percentages of each type
1596
 
1597
- B. Syntactic Complexity:
1598
- - MLU analysis: Use verified MLU of {linguistic_metrics.get('mlu_words', 0):.2f} words
1599
- - Average sentence length: Use verified length of {linguistic_metrics.get('avg_sentence_length', 0):.2f} words
1600
- - Subordination and coordination patterns
1601
 
1602
- 4. FIGURATIVE LANGUAGE ANALYSIS
1603
- - Figurative expressions: Use verified count of {marker_counts.get('FIGURATIVE', 0)}
1604
- - Metaphor and idiom identification from transcript
1605
- - Age-appropriate development assessment
1606
- - Abstract language abilities
1607
 
1608
- 5. PRAGMATIC LANGUAGE ASSESSMENT
1609
- - Topic shifts: Use verified count of {marker_counts.get('TOPIC_SHIFT', 0)}
1610
- - Tangential speech: Use verified count of {marker_counts.get('TANGENT', 0)}
1611
- - Coherence breaks: Use verified count of {marker_counts.get('COHERENCE_BREAK', 0)}
1612
- - Referential clarity: Use verified count of {marker_counts.get('PRONOUN_REF', 0)}
1613
- - Overall conversational patterns observed
1614
 
1615
- 6. VOCABULARY AND SEMANTIC ANALYSIS
1616
- - Semantic errors: Use verified count of {marker_counts.get('SEMANTIC_ERROR', 0)}
1617
- - Lexical diversity: Use verified measures from stats summary
1618
- - Word association patterns from transcript analysis
1619
- - Semantic precision and appropriateness
1620
 
1621
- 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS
1622
- - Morphological complexity assessment
1623
- - Derivational and inflectional morphology patterns
1624
- - Error analysis using verified counts
1625
- - Pattern analysis only
1626
 
1627
- 8. QUANTITATIVE METRICS AND NLP FEATURES (use ALL verified data)
1628
- - Total words: {total_words}
1629
- - Total sentences: {linguistic_metrics.get('total_sentences', 0)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1630
  - Unique words: {linguistic_metrics.get('unique_words', 0)}
1631
- - MLU words: {linguistic_metrics.get('mlu_words', 0):.2f}
1632
- - MLU morphemes: {linguistic_metrics.get('mlu_morphemes', 0):.2f}
1633
- - All error rates and ratios from verified counts
 
 
 
 
 
1634
 
 
 
 
 
1635
 
 
 
 
1636
 
1637
- CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1638
  """
1639
 
1640
- return call_claude_api_with_continuation(final_prompt)
1641
 
1642
  def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_callback=None):
1643
  """Complete pipeline: annotate then analyze with progressive updates"""
@@ -1649,7 +1690,6 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
1649
  if progress_callback:
1650
  progress_callback("🏷️ Step 1: Annotating transcript with linguistic markers...")
1651
 
1652
-
1653
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1654
 
1655
  if annotated_transcript.startswith("❌"):
@@ -1657,7 +1697,7 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
1657
 
1658
  # Return annotated transcript immediately
1659
  if progress_callback:
1660
- progress_callback("Step 1 Complete: Annotation finished! Starting analysis...")
1661
 
1662
  # Check if annotation was incomplete
1663
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
@@ -1669,12 +1709,12 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
1669
  # Step 2: Analyze annotated transcript with original as backup
1670
  logger.info("Step 2: Analyzing annotated transcript...")
1671
  if progress_callback:
1672
- progress_callback("Step 2: Analyzing annotated transcript (this may take several minutes)...")
1673
 
1674
  analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
1675
 
1676
  if progress_callback:
1677
- progress_callback("Analysis Complete!")
1678
 
1679
  return annotated_transcript, analysis_note + analysis_result
1680
 
@@ -1686,7 +1726,7 @@ def progressive_analysis_pipeline(transcript_content, age, gender, slp_notes):
1686
 
1687
  # Step 1: Annotate transcript
1688
  logger.info("Step 1: Annotating transcript with linguistic markers...")
1689
- yield "", "", "Step 1: Annotating transcript with linguistic markers..."
1690
 
1691
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1692
 
@@ -1695,19 +1735,19 @@ def progressive_analysis_pipeline(transcript_content, age, gender, slp_notes):
1695
  return
1696
 
1697
  # Return annotated transcript immediately after completion
1698
- yield annotated_transcript, "", "Step 1 Complete! Starting analysis..."
1699
 
1700
  # Check if annotation was incomplete
1701
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1702
  logger.warning("Annotation incomplete, proceeding with analysis")
1703
- analysis_note = "Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
1704
- yield annotated_transcript, "", "Annotation incomplete, continuing with analysis..."
1705
  else:
1706
  analysis_note = ""
1707
 
1708
  # Step 2: Analyze annotated transcript
1709
  logger.info("Step 2: Analyzing annotated transcript...")
1710
- yield annotated_transcript, "", "Step 2: Analyzing annotated transcript (this may take several minutes)..."
1711
 
1712
  analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
1713
 
@@ -1766,9 +1806,10 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1766
  lines=3
1767
  )
1768
 
1769
- with gr.Row():
1770
- example_btn = gr.Button("Load Example Transcript", variant="secondary", size="sm")
1771
- ultimate_analysis_btn = gr.Button("Run Complete Speech Analysis", variant="primary", size="lg")
 
1772
 
1773
  with gr.Column(scale=3):
1774
  status_display = gr.Markdown("Ready to analyze transcript")
@@ -1787,7 +1828,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1787
  show_copy_button=True
1788
  )
1789
 
1790
- with gr.Tab("Annotation Only"):
1791
  gr.Markdown("### Step 1: Annotate transcript with linguistic markers")
1792
 
1793
  with gr.Row():
@@ -1811,9 +1852,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1811
  lines=3
1812
  )
1813
 
1814
- with gr.Row():
1815
- example_btn_2 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
1816
- annotate_btn = gr.Button("Annotate Transcript", variant="secondary")
1817
 
1818
  with gr.Column():
1819
  annotation_output = gr.Textbox(
@@ -1865,8 +1905,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1865
  q5_btn = gr.Button("Word finding issues?", size="sm", variant="secondary")
1866
  q6_btn = gr.Button("Fluency problems?", size="sm", variant="secondary")
1867
 
1868
- example_btn_4 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
1869
- ask_question_btn = gr.Button("Ask Question", variant="primary")
1870
 
1871
  with gr.Column():
1872
  question_output = gr.Textbox(
@@ -1875,7 +1915,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1875
  show_copy_button=True
1876
  )
1877
 
1878
- with gr.Tab("Targeted Analysis"):
1879
  gr.Markdown("### Focus on specific areas of speech and language")
1880
 
1881
  with gr.Row():
@@ -1912,8 +1952,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1912
  lines=2
1913
  )
1914
 
1915
- example_btn_5 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
1916
- targeted_analysis_btn = gr.Button("Run Targeted Analysis", variant="primary")
1917
 
1918
  with gr.Column():
1919
  targeted_output = gr.Textbox(
@@ -1951,11 +1991,11 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1951
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1952
 
1953
  if annotated_transcript.startswith("❌"):
1954
- return annotated_transcript, "Annotation failed"
1955
  elif annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1956
- return annotated_transcript, "Annotation incomplete but proceeding"
1957
  else:
1958
- return annotated_transcript, "Annotation complete! Click 'Run Analysis' to continue."
1959
 
1960
  def run_analysis_step(annotated_transcript, original_transcript, age, gender, slp_notes):
1961
  """Run the analysis step on the annotated transcript"""
@@ -1966,12 +2006,11 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
1966
 
1967
  # Check if annotation was incomplete
1968
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1969
- analysis_note = "Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
1970
  else:
1971
  analysis_note = ""
1972
 
1973
  analysis_result = analyze_with_backup(annotated_transcript, original_transcript, age, gender, slp_notes)
1974
-
1975
  return analysis_note + analysis_result
1976
 
1977
  def run_manual_count_only(annotated_transcript):
@@ -2136,12 +2175,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2136
  - Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
2137
  * Categorize types (word, phrase, sound level)
2138
  * Provide examples and count summary
2139
- - Revisions: Use verified count of {marker_counts.get('REVISION', 0)}
2140
- * Analyze self-correction patterns
2141
- - Pauses: Use verified count of {marker_counts.get('PAUSE', 0)}
2142
- * Assess hesitation frequency
2143
- - Total disfluency assessment: Use verified total of {category_totals.get('fluency_issues', 0)}
2144
- * Rate: {category_totals.get('fluency_issues', 0)/linguistic_metrics.get('total_words', 1)*100:.2f} per 100 words
2145
  * Provide objective rate calculation
2146
 
2147
  B. Word Retrieval Issues:
@@ -2161,7 +2196,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2161
  A. Vocabulary Analysis (use verified data):
2162
  - Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
2163
  - Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
2164
- - Sophistication ratio: Use verified ratio of {category_totals.get('vocab_sophistication_ratio', 0):.3f}
2165
  - Type-Token Ratio: Use verified TTR from basic metrics
2166
  - Provide examples of each vocabulary level from transcript
2167
 
@@ -2239,7 +2274,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2239
  # Step 2: Run analysis
2240
  analysis_result = run_analysis_step(annotated_transcript, transcript_content, age, gender, slp_notes)
2241
 
2242
- return annotated_transcript, analysis_result, "Complete analysis finished!"
2243
 
2244
  def run_complete_speech_analysis(transcript_content, age, gender, slp_notes):
2245
  """Run the complete speech analysis pipeline with ultimate analysis"""
@@ -2255,7 +2290,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2255
  # Step 2: Run ultimate analysis
2256
  ultimate_result = run_ultimate_analysis(annotated_transcript, transcript_content, age, gender, slp_notes)
2257
 
2258
- return annotated_transcript, ultimate_result, "Complete speech analysis finished!"
2259
 
2260
  # Single main event handler
2261
  ultimate_analysis_btn.click(
@@ -2284,4 +2319,12 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
2284
  fn=analyze_targeted_area,
2285
  inputs=[transcript_input_5, analysis_area, age_input_5, gender_input_5, slp_notes_input_5],
2286
  outputs=[targeted_output]
 
 
 
 
 
 
 
 
2287
  )
 
5
  import requests
6
  import re
7
  import time
8
+
9
  # Configure logging
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
 
137
 
138
 
139
  def call_claude_api_quick_analysis(prompt):
140
+ """Call Claude API for quick focused analysis - single response only"""
 
 
 
141
  if not ANTHROPIC_API_KEY:
142
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
143
 
 
168
 
169
  if response.status_code == 200:
170
  response_json = response.json()
171
+ return response_json['content'][0]['text']
 
 
 
 
 
 
 
 
 
172
  else:
173
  logger.error(f"Claude API error: {response.status_code} - {response.text}")
174
  return f"❌ Claude API Error: {response.status_code}"
 
1545
  - Count [REPETITION] markers: Categorize by type (word, phrase, sound)
1546
  - Count [REVISION] markers: Analyze self-correction patterns
1547
  - Count [PAUSE] markers: Assess hesitation frequency
1548
+ - Calculate total disfluency rate
 
 
1549
 
1550
  B. Word Retrieval Issues:
1551
+ - Count [CIRCUMLOCUTION] markers: List each roundabout description
1552
+ - Count [INCOMPLETE] markers: Analyze abandoned thought patterns
1553
+ - Count [GENERIC] markers: Calculate specificity ratio
1554
+ - Count [WORD_SEARCH] markers: Identify retrieval difficulty areas
1555
 
1556
+ C. Grammatical Errors:
1557
+ - Count [GRAM_ERROR] markers by subcategory (verb tense, subject-verb agreement, etc.)
1558
+ - Count [SYNTAX_ERROR] markers: Analyze word order problems
1559
+ - Count [MORPH_ERROR] markers: Categorize morphological mistakes
1560
+ - Count [RUN_ON] markers: Assess sentence boundary awareness
1561
 
1562
+ 2. LANGUAGE SKILLS ASSESSMENT (with specific evidence):
1563
 
1564
+ A. Lexical/Semantic Skills:
1565
+ - Use calculated Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
1566
+ - Count [SIMPLE_VOCAB] vs [COMPLEX_VOCAB] markers
1567
+ - Assess vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
1568
+ - Count [SEMANTIC_ERROR] markers and analyze patterns
 
1569
 
1570
+ B. Syntactic Skills:
1571
+ - Count [SIMPLE_SENT], [COMPLEX_SENT], [COMPOUND_SENT] markers
1572
+ - Calculate sentence complexity ratios
1573
+ - Assess clause complexity and embedding
1574
 
1575
+ C. Supralinguistic Skills:
1576
+ - Identify cause-effect relationships, inferences, non-literal language
1577
+ - Assess problem-solving language and metalinguistic awareness
1578
 
1579
+ 3. COMPLEX SENTENCE ANALYSIS (with exact counts):
 
 
 
 
1580
 
1581
+ A. Coordinating Conjunctions:
1582
+ - Count and cite EVERY use of: and, but, or, so, yet, for, nor
1583
+ - Analyze patterns and age-appropriateness
 
1584
 
1585
+ B. Subordinating Conjunctions:
1586
+ - Count and cite EVERY use of: because, although, while, since, if, when, where, that, which, who
1587
+ - Analyze clause complexity and embedding depth
 
 
1588
 
1589
+ C. Sentence Structure Analysis:
1590
+ - Use calculated MLU: {linguistic_metrics.get('mlu_words', 0)} words, {linguistic_metrics.get('mlu_morphemes', 0)} morphemes
1591
+ - Calculate complexity ratios
 
 
 
1592
 
1593
+ 4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
 
 
 
 
1594
 
1595
+ A. Similes and Metaphors:
1596
+ - Count [FIGURATIVE] markers for similes (using "like" or "as")
1597
+ - Count [FIGURATIVE] markers for metaphors (direct comparisons)
 
 
1598
 
1599
+ B. Idioms and Non-literal Language:
1600
+ - Count and analyze idiomatic expressions
1601
+ - Assess comprehension and appropriate use
1602
+
1603
+ 5. PRAGMATIC LANGUAGE ASSESSMENT (with specific examples):
1604
+
1605
+ A. Discourse Management:
1606
+ - Count [TOPIC_SHIFT] markers: Assess transition appropriateness
1607
+ - Count [TANGENT] markers: Analyze tangential speech patterns
1608
+ - Count [COHERENCE_BREAK] markers: Assess logical flow
1609
+
1610
+ B. Referential Communication:
1611
+ - Count [PRONOUN_REF] markers: Analyze referential clarity
1612
+ - Assess communicative effectiveness
1613
+
1614
+ 6. VOCABULARY AND SEMANTIC ANALYSIS (with quantification):
1615
+
1616
+ A. Vocabulary Diversity:
1617
+ - Total words: {linguistic_metrics.get('total_words', 0)}
1618
  - Unique words: {linguistic_metrics.get('unique_words', 0)}
1619
+ - Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
1620
+ - Vocabulary sophistication: {linguistic_metrics.get('vocabulary_sophistication', 0)}
1621
+
1622
+ B. Semantic Relationships:
1623
+ - Analyze word frequency patterns
1624
+ - Assess semantic precision and relationships
1625
+
1626
+ 7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (with counts):
1627
 
1628
+ A. Morphological Markers:
1629
+ - Count [MORPH_ERROR] markers and categorize
1630
+ - Analyze morpheme use patterns
1631
+ - Assess morphological complexity
1632
 
1633
+ B. Phonological Patterns:
1634
+ - Identify speech sound patterns from transcript
1635
+ - Assess syllable structure complexity
1636
 
1637
+ 8. COGNITIVE-LINGUISTIC FACTORS (with evidence):
1638
+
1639
+ A. Working Memory:
1640
+ - Assess sentence length complexity using average: {linguistic_metrics.get('avg_sentence_length', 0)} words
1641
+ - Analyze information retention patterns
1642
+
1643
+ B. Processing Efficiency:
1644
+ - Analyze linguistic complexity and word-finding patterns
1645
+ - Assess cognitive demands of language structures
1646
+
1647
+ C. Executive Function:
1648
+ - Count self-correction patterns ([REVISION] markers)
1649
+ - Assess planning and organization in discourse
1650
+
1651
+ 9. FLUENCY AND RHYTHM ANALYSIS (with quantification):
1652
+
1653
+ A. Disfluency Patterns:
1654
+ - Total fluency issues: {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)}
1655
+ - Calculate disfluency rate per 100 words
1656
+ - Analyze impact on communication
1657
+
1658
+ B. Language Flow:
1659
+ - Assess sentence length variability: std = {linguistic_metrics.get('sentence_length_std', 0)}
1660
+ - Analyze linguistic markers of hesitation
1661
+
1662
+ 10. QUANTITATIVE METRICS:
1663
+ - Total words: {linguistic_metrics.get('total_words', 0)}
1664
+ - Total sentences: {linguistic_metrics.get('total_sentences', 0)}
1665
+ - MLU (words): {linguistic_metrics.get('mlu_words', 0)}
1666
+ - MLU (morphemes): {linguistic_metrics.get('mlu_morphemes', 0)}
1667
+ - Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
1668
+ - Grammar error rate: Calculate from marker counts
1669
+ - Vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
1670
+
1671
+ CRITICAL REQUIREMENTS:
1672
+ - Use the provided calculated metrics in your analysis
1673
+ - Provide EXACT counts for every marker type
1674
+ - Calculate precise percentages and show your work
1675
+ - Give specific examples from the transcript
1676
+ - If annotation is incomplete, supplement with analysis of the original transcript
1677
+ - Complete ALL 8 sections - use <CONTINUE> if needed
1678
+ - Focus on objective data only - NO clinical interpretations
1679
  """
1680
 
1681
+ return call_claude_api_with_continuation(analysis_prompt)
1682
 
1683
  def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_callback=None):
1684
  """Complete pipeline: annotate then analyze with progressive updates"""
 
1690
  if progress_callback:
1691
  progress_callback("🏷️ Step 1: Annotating transcript with linguistic markers...")
1692
 
 
1693
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1694
 
1695
  if annotated_transcript.startswith("❌"):
 
1697
 
1698
  # Return annotated transcript immediately
1699
  if progress_callback:
1700
+ progress_callback("βœ… Step 1 Complete: Annotation finished! Starting analysis...")
1701
 
1702
  # Check if annotation was incomplete
1703
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
 
1709
  # Step 2: Analyze annotated transcript with original as backup
1710
  logger.info("Step 2: Analyzing annotated transcript...")
1711
  if progress_callback:
1712
+ progress_callback("πŸ“Š Step 2: Analyzing annotated transcript (this may take several minutes)...")
1713
 
1714
  analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
1715
 
1716
  if progress_callback:
1717
+ progress_callback("βœ… Analysis Complete!")
1718
 
1719
  return annotated_transcript, analysis_note + analysis_result
1720
 
 
1726
 
1727
  # Step 1: Annotate transcript
1728
  logger.info("Step 1: Annotating transcript with linguistic markers...")
1729
+ yield "", "", "🏷️ Step 1: Annotating transcript with linguistic markers..."
1730
 
1731
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1732
 
 
1735
  return
1736
 
1737
  # Return annotated transcript immediately after completion
1738
+ yield annotated_transcript, "", "βœ… Step 1 Complete! Starting analysis..."
1739
 
1740
  # Check if annotation was incomplete
1741
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1742
  logger.warning("Annotation incomplete, proceeding with analysis")
1743
+ analysis_note = "⚠️ Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
1744
+ yield annotated_transcript, "", "⚠️ Annotation incomplete, continuing with analysis..."
1745
  else:
1746
  analysis_note = ""
1747
 
1748
  # Step 2: Analyze annotated transcript
1749
  logger.info("Step 2: Analyzing annotated transcript...")
1750
+ yield annotated_transcript, "", "πŸ“Š Step 2: Analyzing annotated transcript (this may take several minutes)..."
1751
 
1752
  analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
1753
 
 
1806
  lines=3
1807
  )
1808
 
1809
+ example_btn = gr.Button("πŸ“„ Load Example Transcript", variant="secondary", size="sm")
1810
+
1811
+ # Single main analysis button
1812
+ ultimate_analysis_btn = gr.Button("πŸš€ Run Complete Speech Analysis", variant="primary", size="lg")
1813
 
1814
  with gr.Column(scale=3):
1815
  status_display = gr.Markdown("Ready to analyze transcript")
 
1828
  show_copy_button=True
1829
  )
1830
 
1831
+ with gr.Tab("🏷️ Annotation Only"):
1832
  gr.Markdown("### Step 1: Annotate transcript with linguistic markers")
1833
 
1834
  with gr.Row():
 
1852
  lines=3
1853
  )
1854
 
1855
+ example_btn_2 = gr.Button("πŸ“„ Load Example Transcript", variant="secondary", size="sm")
1856
+ annotate_btn = gr.Button("🏷️ Annotate Transcript", variant="secondary")
 
1857
 
1858
  with gr.Column():
1859
  annotation_output = gr.Textbox(
 
1905
  q5_btn = gr.Button("Word finding issues?", size="sm", variant="secondary")
1906
  q6_btn = gr.Button("Fluency problems?", size="sm", variant="secondary")
1907
 
1908
+ example_btn_4 = gr.Button("πŸ“„ Load Example Transcript", variant="secondary", size="sm")
1909
+ ask_question_btn = gr.Button("❓ Ask Question", variant="primary")
1910
 
1911
  with gr.Column():
1912
  question_output = gr.Textbox(
 
1915
  show_copy_button=True
1916
  )
1917
 
1918
+ with gr.Tab("🎯 Targeted Analysis"):
1919
  gr.Markdown("### Focus on specific areas of speech and language")
1920
 
1921
  with gr.Row():
 
1952
  lines=2
1953
  )
1954
 
1955
+ example_btn_5 = gr.Button("πŸ“„ Load Example Transcript", variant="secondary", size="sm")
1956
+ targeted_analysis_btn = gr.Button("🎯 Run Targeted Analysis", variant="primary")
1957
 
1958
  with gr.Column():
1959
  targeted_output = gr.Textbox(
 
1991
  annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
1992
 
1993
  if annotated_transcript.startswith("❌"):
1994
+ return annotated_transcript, "❌ Annotation failed"
1995
  elif annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
1996
+ return annotated_transcript, "⚠️ Annotation incomplete but proceeding"
1997
  else:
1998
+ return annotated_transcript, "βœ… Annotation complete! Click 'Run Analysis' to continue."
1999
 
2000
  def run_analysis_step(annotated_transcript, original_transcript, age, gender, slp_notes):
2001
  """Run the analysis step on the annotated transcript"""
 
2006
 
2007
  # Check if annotation was incomplete
2008
  if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
2009
+ analysis_note = "⚠️ Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
2010
  else:
2011
  analysis_note = ""
2012
 
2013
  analysis_result = analyze_with_backup(annotated_transcript, original_transcript, age, gender, slp_notes)
 
2014
  return analysis_note + analysis_result
2015
 
2016
  def run_manual_count_only(annotated_transcript):
 
2175
  - Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
2176
  * Categorize types (word, phrase, sound level)
2177
  * Provide examples and count summary
2178
+ - Total disfluency assessment: Use verified total of {category_totals['fluency_issues']}
2179
+ * Rate: {category_totals['fluency_issues']/total_words*100:.2f} per 100 words
 
 
 
 
2180
  * Provide objective rate calculation
2181
 
2182
  B. Word Retrieval Issues:
 
2196
  A. Vocabulary Analysis (use verified data):
2197
  - Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
2198
  - Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
2199
+ - Sophistication ratio: Use verified ratio of {category_totals['vocab_sophistication_ratio']:.3f}
2200
  - Type-Token Ratio: Use verified TTR from basic metrics
2201
  - Provide examples of each vocabulary level from transcript
2202
 
 
2274
  # Step 2: Run analysis
2275
  analysis_result = run_analysis_step(annotated_transcript, transcript_content, age, gender, slp_notes)
2276
 
2277
+ return annotated_transcript, analysis_result, "βœ… Complete analysis finished!"
2278
 
2279
  def run_complete_speech_analysis(transcript_content, age, gender, slp_notes):
2280
  """Run the complete speech analysis pipeline with ultimate analysis"""
 
2290
  # Step 2: Run ultimate analysis
2291
  ultimate_result = run_ultimate_analysis(annotated_transcript, transcript_content, age, gender, slp_notes)
2292
 
2293
+ return annotated_transcript, ultimate_result, "βœ… Complete speech analysis finished!"
2294
 
2295
  # Single main event handler
2296
  ultimate_analysis_btn.click(
 
2319
  fn=analyze_targeted_area,
2320
  inputs=[transcript_input_5, analysis_area, age_input_5, gender_input_5, slp_notes_input_5],
2321
  outputs=[targeted_output]
2322
+ )
2323
+
2324
+ if __name__ == "__main__":
2325
+ demo.launch(
2326
+ server_name="0.0.0.0",
2327
+ server_port=7860,
2328
+ share=True,
2329
+ show_error=True
2330
  )