Spaces:
Sleeping
Sleeping
Update annotated_casl_app.py
Browse files- annotated_casl_app.py +111 -153
annotated_casl_app.py
CHANGED
|
@@ -137,7 +137,10 @@ def combine_sections_smartly(sections_dict):
|
|
| 137 |
|
| 138 |
|
| 139 |
def call_claude_api_quick_analysis(prompt):
|
| 140 |
-
"""Call Claude API for quick focused analysis - single response only
|
|
|
|
|
|
|
|
|
|
| 141 |
if not ANTHROPIC_API_KEY:
|
| 142 |
return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 143 |
|
|
@@ -168,7 +171,16 @@ def call_claude_api_quick_analysis(prompt):
|
|
| 168 |
|
| 169 |
if response.status_code == 200:
|
| 170 |
response_json = response.json()
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
else:
|
| 173 |
logger.error(f"Claude API error: {response.status_code} - {response.text}")
|
| 174 |
return f"❌ Claude API Error: {response.status_code}"
|
|
@@ -1545,140 +1557,88 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
|
|
| 1545 |
- Count [REPETITION] markers: Categorize by type (word, phrase, sound)
|
| 1546 |
- Count [REVISION] markers: Analyze self-correction patterns
|
| 1547 |
- Count [PAUSE] markers: Assess hesitation frequency
|
| 1548 |
-
-
|
|
|
|
|
|
|
| 1549 |
|
| 1550 |
B. Word Retrieval Issues:
|
| 1551 |
-
- Count
|
| 1552 |
-
-
|
| 1553 |
-
-
|
| 1554 |
-
-
|
| 1555 |
-
|
| 1556 |
-
C. Grammatical Errors:
|
| 1557 |
-
- Count [GRAM_ERROR] markers by subcategory (verb tense, subject-verb agreement, etc.)
|
| 1558 |
-
- Count [SYNTAX_ERROR] markers: Analyze word order problems
|
| 1559 |
-
- Count [MORPH_ERROR] markers: Categorize morphological mistakes
|
| 1560 |
-
- Count [RUN_ON] markers: Assess sentence boundary awareness
|
| 1561 |
-
|
| 1562 |
-
2. LANGUAGE SKILLS ASSESSMENT (with specific evidence):
|
| 1563 |
|
| 1564 |
-
|
| 1565 |
-
- Use
|
| 1566 |
-
-
|
| 1567 |
-
-
|
| 1568 |
-
-
|
| 1569 |
|
| 1570 |
-
|
| 1571 |
-
- Count [SIMPLE_SENT], [COMPLEX_SENT], [COMPOUND_SENT] markers
|
| 1572 |
-
- Calculate sentence complexity ratios
|
| 1573 |
-
- Assess clause complexity and embedding
|
| 1574 |
-
|
| 1575 |
-
C. Supralinguistic Skills:
|
| 1576 |
-
- Identify cause-effect relationships, inferences, non-literal language
|
| 1577 |
-
- Assess problem-solving language and metalinguistic awareness
|
| 1578 |
-
|
| 1579 |
-
3. COMPLEX SENTENCE ANALYSIS (with exact counts):
|
| 1580 |
-
|
| 1581 |
-
A. Coordinating Conjunctions:
|
| 1582 |
-
- Count and cite EVERY use of: and, but, or, so, yet, for, nor
|
| 1583 |
-
- Analyze patterns and age-appropriateness
|
| 1584 |
|
| 1585 |
-
|
| 1586 |
-
-
|
| 1587 |
-
-
|
|
|
|
|
|
|
|
|
|
| 1588 |
|
| 1589 |
-
|
| 1590 |
-
-
|
| 1591 |
-
-
|
|
|
|
| 1592 |
|
| 1593 |
-
|
| 1594 |
|
| 1595 |
-
A.
|
| 1596 |
-
-
|
| 1597 |
-
-
|
|
|
|
|
|
|
| 1598 |
|
| 1599 |
-
B.
|
| 1600 |
-
-
|
| 1601 |
-
-
|
|
|
|
| 1602 |
|
| 1603 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1604 |
|
| 1605 |
-
|
| 1606 |
-
-
|
| 1607 |
-
-
|
| 1608 |
-
-
|
|
|
|
|
|
|
| 1609 |
|
| 1610 |
-
|
| 1611 |
-
-
|
| 1612 |
-
-
|
|
|
|
|
|
|
| 1613 |
|
| 1614 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1615 |
|
| 1616 |
-
|
| 1617 |
-
- Total words: {
|
|
|
|
| 1618 |
- Unique words: {linguistic_metrics.get('unique_words', 0)}
|
| 1619 |
-
-
|
| 1620 |
-
-
|
| 1621 |
-
|
| 1622 |
-
B. Semantic Relationships:
|
| 1623 |
-
- Analyze word frequency patterns
|
| 1624 |
-
- Assess semantic precision and relationships
|
| 1625 |
-
|
| 1626 |
-
7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (with counts):
|
| 1627 |
|
| 1628 |
-
A. Morphological Markers:
|
| 1629 |
-
- Count [MORPH_ERROR] markers and categorize
|
| 1630 |
-
- Analyze morpheme use patterns
|
| 1631 |
-
- Assess morphological complexity
|
| 1632 |
|
| 1633 |
-
B. Phonological Patterns:
|
| 1634 |
-
- Identify speech sound patterns from transcript
|
| 1635 |
-
- Assess syllable structure complexity
|
| 1636 |
|
| 1637 |
-
|
| 1638 |
-
|
| 1639 |
-
A. Working Memory:
|
| 1640 |
-
- Assess sentence length complexity using average: {linguistic_metrics.get('avg_sentence_length', 0)} words
|
| 1641 |
-
- Analyze information retention patterns
|
| 1642 |
-
|
| 1643 |
-
B. Processing Efficiency:
|
| 1644 |
-
- Analyze linguistic complexity and word-finding patterns
|
| 1645 |
-
- Assess cognitive demands of language structures
|
| 1646 |
-
|
| 1647 |
-
C. Executive Function:
|
| 1648 |
-
- Count self-correction patterns ([REVISION] markers)
|
| 1649 |
-
- Assess planning and organization in discourse
|
| 1650 |
-
|
| 1651 |
-
9. FLUENCY AND RHYTHM ANALYSIS (with quantification):
|
| 1652 |
-
|
| 1653 |
-
A. Disfluency Patterns:
|
| 1654 |
-
- Total fluency issues: {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)}
|
| 1655 |
-
- Calculate disfluency rate per 100 words
|
| 1656 |
-
- Analyze impact on communication
|
| 1657 |
-
|
| 1658 |
-
B. Language Flow:
|
| 1659 |
-
- Assess sentence length variability: std = {linguistic_metrics.get('sentence_length_std', 0)}
|
| 1660 |
-
- Analyze linguistic markers of hesitation
|
| 1661 |
-
|
| 1662 |
-
10. QUANTITATIVE METRICS:
|
| 1663 |
-
- Total words: {linguistic_metrics.get('total_words', 0)}
|
| 1664 |
-
- Total sentences: {linguistic_metrics.get('total_sentences', 0)}
|
| 1665 |
-
- MLU (words): {linguistic_metrics.get('mlu_words', 0)}
|
| 1666 |
-
- MLU (morphemes): {linguistic_metrics.get('mlu_morphemes', 0)}
|
| 1667 |
-
- Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
|
| 1668 |
-
- Grammar error rate: Calculate from marker counts
|
| 1669 |
-
- Vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
|
| 1670 |
-
|
| 1671 |
-
CRITICAL REQUIREMENTS:
|
| 1672 |
-
- Use the provided calculated metrics in your analysis
|
| 1673 |
-
- Provide EXACT counts for every marker type
|
| 1674 |
-
- Calculate precise percentages and show your work
|
| 1675 |
-
- Give specific examples from the transcript
|
| 1676 |
-
- If annotation is incomplete, supplement with analysis of the original transcript
|
| 1677 |
-
- Complete ALL 8 sections - use <CONTINUE> if needed
|
| 1678 |
-
- Focus on objective data only - NO clinical interpretations
|
| 1679 |
"""
|
| 1680 |
|
| 1681 |
-
return call_claude_api_with_continuation(
|
| 1682 |
|
| 1683 |
def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_callback=None):
|
| 1684 |
"""Complete pipeline: annotate then analyze with progressive updates"""
|
|
@@ -1690,6 +1650,7 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
|
|
| 1690 |
if progress_callback:
|
| 1691 |
progress_callback("🏷️ Step 1: Annotating transcript with linguistic markers...")
|
| 1692 |
|
|
|
|
| 1693 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1694 |
|
| 1695 |
if annotated_transcript.startswith("❌"):
|
|
@@ -1697,7 +1658,7 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
|
|
| 1697 |
|
| 1698 |
# Return annotated transcript immediately
|
| 1699 |
if progress_callback:
|
| 1700 |
-
progress_callback("
|
| 1701 |
|
| 1702 |
# Check if annotation was incomplete
|
| 1703 |
if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
|
|
@@ -1709,12 +1670,12 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
|
|
| 1709 |
# Step 2: Analyze annotated transcript with original as backup
|
| 1710 |
logger.info("Step 2: Analyzing annotated transcript...")
|
| 1711 |
if progress_callback:
|
| 1712 |
-
progress_callback("
|
| 1713 |
|
| 1714 |
analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 1715 |
|
| 1716 |
if progress_callback:
|
| 1717 |
-
progress_callback("
|
| 1718 |
|
| 1719 |
return annotated_transcript, analysis_note + analysis_result
|
| 1720 |
|
|
@@ -1726,7 +1687,7 @@ def progressive_analysis_pipeline(transcript_content, age, gender, slp_notes):
|
|
| 1726 |
|
| 1727 |
# Step 1: Annotate transcript
|
| 1728 |
logger.info("Step 1: Annotating transcript with linguistic markers...")
|
| 1729 |
-
yield "", "", "
|
| 1730 |
|
| 1731 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1732 |
|
|
@@ -1735,19 +1696,19 @@ def progressive_analysis_pipeline(transcript_content, age, gender, slp_notes):
|
|
| 1735 |
return
|
| 1736 |
|
| 1737 |
# Return annotated transcript immediately after completion
|
| 1738 |
-
yield annotated_transcript, "", "
|
| 1739 |
|
| 1740 |
# Check if annotation was incomplete
|
| 1741 |
if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
|
| 1742 |
logger.warning("Annotation incomplete, proceeding with analysis")
|
| 1743 |
-
analysis_note = "
|
| 1744 |
-
yield annotated_transcript, "", "
|
| 1745 |
else:
|
| 1746 |
analysis_note = ""
|
| 1747 |
|
| 1748 |
# Step 2: Analyze annotated transcript
|
| 1749 |
logger.info("Step 2: Analyzing annotated transcript...")
|
| 1750 |
-
yield annotated_transcript, "", "
|
| 1751 |
|
| 1752 |
analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 1753 |
|
|
@@ -1806,10 +1767,9 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1806 |
lines=3
|
| 1807 |
)
|
| 1808 |
|
| 1809 |
-
|
| 1810 |
-
|
| 1811 |
-
|
| 1812 |
-
ultimate_analysis_btn = gr.Button("🚀 Run Complete Speech Analysis", variant="primary", size="lg")
|
| 1813 |
|
| 1814 |
with gr.Column(scale=3):
|
| 1815 |
status_display = gr.Markdown("Ready to analyze transcript")
|
|
@@ -1828,7 +1788,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1828 |
show_copy_button=True
|
| 1829 |
)
|
| 1830 |
|
| 1831 |
-
with gr.Tab("
|
| 1832 |
gr.Markdown("### Step 1: Annotate transcript with linguistic markers")
|
| 1833 |
|
| 1834 |
with gr.Row():
|
|
@@ -1852,8 +1812,9 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1852 |
lines=3
|
| 1853 |
)
|
| 1854 |
|
| 1855 |
-
|
| 1856 |
-
|
|
|
|
| 1857 |
|
| 1858 |
with gr.Column():
|
| 1859 |
annotation_output = gr.Textbox(
|
|
@@ -1905,8 +1866,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1905 |
q5_btn = gr.Button("Word finding issues?", size="sm", variant="secondary")
|
| 1906 |
q6_btn = gr.Button("Fluency problems?", size="sm", variant="secondary")
|
| 1907 |
|
| 1908 |
-
example_btn_4 = gr.Button("
|
| 1909 |
-
ask_question_btn = gr.Button("
|
| 1910 |
|
| 1911 |
with gr.Column():
|
| 1912 |
question_output = gr.Textbox(
|
|
@@ -1915,7 +1876,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1915 |
show_copy_button=True
|
| 1916 |
)
|
| 1917 |
|
| 1918 |
-
with gr.Tab("
|
| 1919 |
gr.Markdown("### Focus on specific areas of speech and language")
|
| 1920 |
|
| 1921 |
with gr.Row():
|
|
@@ -1952,8 +1913,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1952 |
lines=2
|
| 1953 |
)
|
| 1954 |
|
| 1955 |
-
example_btn_5 = gr.Button("
|
| 1956 |
-
targeted_analysis_btn = gr.Button("
|
| 1957 |
|
| 1958 |
with gr.Column():
|
| 1959 |
targeted_output = gr.Textbox(
|
|
@@ -1991,11 +1952,11 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1991 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1992 |
|
| 1993 |
if annotated_transcript.startswith("❌"):
|
| 1994 |
-
return annotated_transcript, "
|
| 1995 |
elif annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
|
| 1996 |
-
return annotated_transcript, "
|
| 1997 |
else:
|
| 1998 |
-
return annotated_transcript, "
|
| 1999 |
|
| 2000 |
def run_analysis_step(annotated_transcript, original_transcript, age, gender, slp_notes):
|
| 2001 |
"""Run the analysis step on the annotated transcript"""
|
|
@@ -2006,11 +1967,12 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2006 |
|
| 2007 |
# Check if annotation was incomplete
|
| 2008 |
if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
|
| 2009 |
-
analysis_note = "
|
| 2010 |
else:
|
| 2011 |
analysis_note = ""
|
| 2012 |
|
| 2013 |
analysis_result = analyze_with_backup(annotated_transcript, original_transcript, age, gender, slp_notes)
|
|
|
|
| 2014 |
return analysis_note + analysis_result
|
| 2015 |
|
| 2016 |
def run_manual_count_only(annotated_transcript):
|
|
@@ -2175,8 +2137,12 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2175 |
- Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
|
| 2176 |
* Categorize types (word, phrase, sound level)
|
| 2177 |
* Provide examples and count summary
|
| 2178 |
-
-
|
| 2179 |
-
*
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2180 |
* Provide objective rate calculation
|
| 2181 |
|
| 2182 |
B. Word Retrieval Issues:
|
|
@@ -2196,7 +2162,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2196 |
A. Vocabulary Analysis (use verified data):
|
| 2197 |
- Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
|
| 2198 |
- Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
|
| 2199 |
-
- Sophistication ratio: Use verified ratio of {category_totals
|
| 2200 |
- Type-Token Ratio: Use verified TTR from basic metrics
|
| 2201 |
- Provide examples of each vocabulary level from transcript
|
| 2202 |
|
|
@@ -2274,7 +2240,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2274 |
# Step 2: Run analysis
|
| 2275 |
analysis_result = run_analysis_step(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 2276 |
|
| 2277 |
-
return annotated_transcript, analysis_result, "
|
| 2278 |
|
| 2279 |
def run_complete_speech_analysis(transcript_content, age, gender, slp_notes):
|
| 2280 |
"""Run the complete speech analysis pipeline with ultimate analysis"""
|
|
@@ -2290,7 +2256,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2290 |
# Step 2: Run ultimate analysis
|
| 2291 |
ultimate_result = run_ultimate_analysis(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 2292 |
|
| 2293 |
-
return annotated_transcript, ultimate_result, "
|
| 2294 |
|
| 2295 |
# Single main event handler
|
| 2296 |
ultimate_analysis_btn.click(
|
|
@@ -2319,12 +2285,4 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2319 |
fn=analyze_targeted_area,
|
| 2320 |
inputs=[transcript_input_5, analysis_area, age_input_5, gender_input_5, slp_notes_input_5],
|
| 2321 |
outputs=[targeted_output]
|
| 2322 |
-
)
|
| 2323 |
-
|
| 2324 |
-
if __name__ == "__main__":
|
| 2325 |
-
demo.launch(
|
| 2326 |
-
server_name="0.0.0.0",
|
| 2327 |
-
server_port=7860,
|
| 2328 |
-
share=True,
|
| 2329 |
-
show_error=True
|
| 2330 |
)
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
def call_claude_api_quick_analysis(prompt):
|
| 140 |
+
"""Call Claude API for quick focused analysis - single response only
|
| 141 |
+
Responses are cleaned to remove asterisks, hashtags, and convert simple tables to lists
|
| 142 |
+
to match formatting used in the main analysis pipeline.
|
| 143 |
+
"""
|
| 144 |
if not ANTHROPIC_API_KEY:
|
| 145 |
return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 146 |
|
|
|
|
| 171 |
|
| 172 |
if response.status_code == 200:
|
| 173 |
response_json = response.json()
|
| 174 |
+
response_text = response_json['content'][0]['text']
|
| 175 |
+
|
| 176 |
+
# Clean formatting (remove asterisks, hashtags, convert simple tables) so
|
| 177 |
+
# Targeted Analysis and Quick Questions match the main analysis output
|
| 178 |
+
try:
|
| 179 |
+
cleaned = clean_output_formatting(response_text)
|
| 180 |
+
except Exception:
|
| 181 |
+
# If cleaning fails for any reason, fall back to raw response
|
| 182 |
+
cleaned = response_text
|
| 183 |
+
return cleaned
|
| 184 |
else:
|
| 185 |
logger.error(f"Claude API error: {response.status_code} - {response.text}")
|
| 186 |
return f"❌ Claude API Error: {response.status_code}"
|
|
|
|
| 1557 |
- Count [REPETITION] markers: Categorize by type (word, phrase, sound)
|
| 1558 |
- Count [REVISION] markers: Analyze self-correction patterns
|
| 1559 |
- Count [PAUSE] markers: Assess hesitation frequency
|
| 1560 |
+
- Total disfluency assessment: Use verified total of {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)}
|
| 1561 |
+
* Rate: {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)/linguistic_metrics.get('total_words', 1)*100:.2f} per 100 words
|
| 1562 |
+
* Provide objective rate calculation
|
| 1563 |
|
| 1564 |
B. Word Retrieval Issues:
|
| 1565 |
+
- Circumlocutions: Count and analyze from transcript
|
| 1566 |
+
- Incomplete thoughts: Identify abandoned utterances
|
| 1567 |
+
- Generic language use: Count vague terms
|
| 1568 |
+
- Word-finding efficiency: Assess retrieval success rate
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1569 |
|
| 1570 |
+
C. Grammatical Errors (use verified counts):
|
| 1571 |
+
- Grammar errors: Use verified count of {marker_counts.get('GRAM_ERROR', 0)}
|
| 1572 |
+
- Syntax errors: Use verified count of {marker_counts.get('SYNTAX_ERROR', 0)}
|
| 1573 |
+
- Morphological errors: Use verified count of {marker_counts.get('MORPH_ERROR', 0)}
|
| 1574 |
+
- Calculate overall grammatical accuracy rate
|
| 1575 |
|
| 1576 |
+
2. LANGUAGE SKILLS ASSESSMENT
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1577 |
|
| 1578 |
+
A. Vocabulary Analysis (use verified data):
|
| 1579 |
+
- Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
|
| 1580 |
+
- Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
|
| 1581 |
+
- Sophistication ratio: Use verified ratio of {category_totals.get('vocab_sophistication_ratio', 0):.3f}
|
| 1582 |
+
- Type-Token Ratio: Use verified TTR from basic metrics
|
| 1583 |
+
- Provide examples of each vocabulary level from transcript
|
| 1584 |
|
| 1585 |
+
B. Grammar and Morphology:
|
| 1586 |
+
- Error pattern analysis using verified counts
|
| 1587 |
+
- Pattern analysis only
|
| 1588 |
+
- Morphological complexity evaluation
|
| 1589 |
|
| 1590 |
+
3. COMPLEX SENTENCE ANALYSIS (use verified counts)
|
| 1591 |
|
| 1592 |
+
A. Sentence Structure Distribution:
|
| 1593 |
+
- Simple sentences: Use verified count of {marker_counts.get('SIMPLE_SENT', 0)}
|
| 1594 |
+
- Complex sentences: Use verified count of {marker_counts.get('COMPLEX_SENT', 0)}
|
| 1595 |
+
- Compound sentences: Use verified count of {marker_counts.get('COMPOUND_SENT', 0)}
|
| 1596 |
+
- Calculate percentages of each type
|
| 1597 |
|
| 1598 |
+
B. Syntactic Complexity:
|
| 1599 |
+
- MLU analysis: Use verified MLU of {linguistic_metrics.get('mlu_words', 0):.2f} words
|
| 1600 |
+
- Average sentence length: Use verified length of {linguistic_metrics.get('avg_sentence_length', 0):.2f} words
|
| 1601 |
+
- Subordination and coordination patterns
|
| 1602 |
|
| 1603 |
+
4. FIGURATIVE LANGUAGE ANALYSIS
|
| 1604 |
+
- Figurative expressions: Use verified count of {marker_counts.get('FIGURATIVE', 0)}
|
| 1605 |
+
- Metaphor and idiom identification from transcript
|
| 1606 |
+
- Age-appropriate development assessment
|
| 1607 |
+
- Abstract language abilities
|
| 1608 |
|
| 1609 |
+
5. PRAGMATIC LANGUAGE ASSESSMENT
|
| 1610 |
+
- Topic shifts: Use verified count of {marker_counts.get('TOPIC_SHIFT', 0)}
|
| 1611 |
+
- Tangential speech: Use verified count of {marker_counts.get('TANGENT', 0)}
|
| 1612 |
+
- Coherence breaks: Use verified count of {marker_counts.get('COHERENCE_BREAK', 0)}
|
| 1613 |
+
- Referential clarity: Use verified count of {marker_counts.get('PRONOUN_REF', 0)}
|
| 1614 |
+
- Overall conversational patterns observed
|
| 1615 |
|
| 1616 |
+
6. VOCABULARY AND SEMANTIC ANALYSIS
|
| 1617 |
+
- Semantic errors: Use verified count of {marker_counts.get('SEMANTIC_ERROR', 0)}
|
| 1618 |
+
- Lexical diversity: Use verified measures from stats summary
|
| 1619 |
+
- Word association patterns from transcript analysis
|
| 1620 |
+
- Semantic precision and appropriateness
|
| 1621 |
|
| 1622 |
+
7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS
|
| 1623 |
+
- Morphological complexity assessment
|
| 1624 |
+
- Derivational and inflectional morphology patterns
|
| 1625 |
+
- Error analysis using verified counts
|
| 1626 |
+
- Pattern analysis only
|
| 1627 |
|
| 1628 |
+
8. QUANTITATIVE METRICS AND NLP FEATURES (use ALL verified data)
|
| 1629 |
+
- Total words: {total_words}
|
| 1630 |
+
- Total sentences: {linguistic_metrics.get('total_sentences', 0)}
|
| 1631 |
- Unique words: {linguistic_metrics.get('unique_words', 0)}
|
| 1632 |
+
- MLU words: {linguistic_metrics.get('mlu_words', 0):.2f}
|
| 1633 |
+
- MLU morphemes: {linguistic_metrics.get('mlu_morphemes', 0):.2f}
|
| 1634 |
+
- All error rates and ratios from verified counts
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1635 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1636 |
|
|
|
|
|
|
|
|
|
|
| 1637 |
|
| 1638 |
+
CRITICAL: Complete ALL 13 sections using verified data and specific transcript examples.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1639 |
"""
|
| 1640 |
|
| 1641 |
+
return call_claude_api_with_continuation(final_prompt)
|
| 1642 |
|
| 1643 |
def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_callback=None):
|
| 1644 |
"""Complete pipeline: annotate then analyze with progressive updates"""
|
|
|
|
| 1650 |
if progress_callback:
|
| 1651 |
progress_callback("🏷️ Step 1: Annotating transcript with linguistic markers...")
|
| 1652 |
|
| 1653 |
+
|
| 1654 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1655 |
|
| 1656 |
if annotated_transcript.startswith("❌"):
|
|
|
|
| 1658 |
|
| 1659 |
# Return annotated transcript immediately
|
| 1660 |
if progress_callback:
|
| 1661 |
+
progress_callback("Step 1 Complete: Annotation finished! Starting analysis...")
|
| 1662 |
|
| 1663 |
# Check if annotation was incomplete
|
| 1664 |
if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
|
|
|
|
| 1670 |
# Step 2: Analyze annotated transcript with original as backup
|
| 1671 |
logger.info("Step 2: Analyzing annotated transcript...")
|
| 1672 |
if progress_callback:
|
| 1673 |
+
progress_callback("Step 2: Analyzing annotated transcript (this may take several minutes)...")
|
| 1674 |
|
| 1675 |
analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 1676 |
|
| 1677 |
if progress_callback:
|
| 1678 |
+
progress_callback("Analysis Complete!")
|
| 1679 |
|
| 1680 |
return annotated_transcript, analysis_note + analysis_result
|
| 1681 |
|
|
|
|
| 1687 |
|
| 1688 |
# Step 1: Annotate transcript
|
| 1689 |
logger.info("Step 1: Annotating transcript with linguistic markers...")
|
| 1690 |
+
yield "", "", "Step 1: Annotating transcript with linguistic markers..."
|
| 1691 |
|
| 1692 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1693 |
|
|
|
|
| 1696 |
return
|
| 1697 |
|
| 1698 |
# Return annotated transcript immediately after completion
|
| 1699 |
+
yield annotated_transcript, "", "Step 1 Complete! Starting analysis..."
|
| 1700 |
|
| 1701 |
# Check if annotation was incomplete
|
| 1702 |
if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
|
| 1703 |
logger.warning("Annotation incomplete, proceeding with analysis")
|
| 1704 |
+
analysis_note = "Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
|
| 1705 |
+
yield annotated_transcript, "", "Annotation incomplete, continuing with analysis..."
|
| 1706 |
else:
|
| 1707 |
analysis_note = ""
|
| 1708 |
|
| 1709 |
# Step 2: Analyze annotated transcript
|
| 1710 |
logger.info("Step 2: Analyzing annotated transcript...")
|
| 1711 |
+
yield annotated_transcript, "", "Step 2: Analyzing annotated transcript (this may take several minutes)..."
|
| 1712 |
|
| 1713 |
analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 1714 |
|
|
|
|
| 1767 |
lines=3
|
| 1768 |
)
|
| 1769 |
|
| 1770 |
+
with gr.Row():
|
| 1771 |
+
example_btn = gr.Button("Load Example Transcript", variant="secondary", size="sm")
|
| 1772 |
+
ultimate_analysis_btn = gr.Button("Run Complete Speech Analysis", variant="primary", size="lg")
|
|
|
|
| 1773 |
|
| 1774 |
with gr.Column(scale=3):
|
| 1775 |
status_display = gr.Markdown("Ready to analyze transcript")
|
|
|
|
| 1788 |
show_copy_button=True
|
| 1789 |
)
|
| 1790 |
|
| 1791 |
+
with gr.Tab("Annotation Only"):
|
| 1792 |
gr.Markdown("### Step 1: Annotate transcript with linguistic markers")
|
| 1793 |
|
| 1794 |
with gr.Row():
|
|
|
|
| 1812 |
lines=3
|
| 1813 |
)
|
| 1814 |
|
| 1815 |
+
with gr.Row():
|
| 1816 |
+
example_btn_2 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
|
| 1817 |
+
annotate_btn = gr.Button("Annotate Transcript", variant="secondary")
|
| 1818 |
|
| 1819 |
with gr.Column():
|
| 1820 |
annotation_output = gr.Textbox(
|
|
|
|
| 1866 |
q5_btn = gr.Button("Word finding issues?", size="sm", variant="secondary")
|
| 1867 |
q6_btn = gr.Button("Fluency problems?", size="sm", variant="secondary")
|
| 1868 |
|
| 1869 |
+
example_btn_4 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
|
| 1870 |
+
ask_question_btn = gr.Button("Ask Question", variant="primary")
|
| 1871 |
|
| 1872 |
with gr.Column():
|
| 1873 |
question_output = gr.Textbox(
|
|
|
|
| 1876 |
show_copy_button=True
|
| 1877 |
)
|
| 1878 |
|
| 1879 |
+
with gr.Tab("Targeted Analysis"):
|
| 1880 |
gr.Markdown("### Focus on specific areas of speech and language")
|
| 1881 |
|
| 1882 |
with gr.Row():
|
|
|
|
| 1913 |
lines=2
|
| 1914 |
)
|
| 1915 |
|
| 1916 |
+
example_btn_5 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
|
| 1917 |
+
targeted_analysis_btn = gr.Button("Run Targeted Analysis", variant="primary")
|
| 1918 |
|
| 1919 |
with gr.Column():
|
| 1920 |
targeted_output = gr.Textbox(
|
|
|
|
| 1952 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1953 |
|
| 1954 |
if annotated_transcript.startswith("❌"):
|
| 1955 |
+
return annotated_transcript, "Annotation failed"
|
| 1956 |
elif annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
|
| 1957 |
+
return annotated_transcript, "Annotation incomplete but proceeding"
|
| 1958 |
else:
|
| 1959 |
+
return annotated_transcript, "Annotation complete! Click 'Run Analysis' to continue."
|
| 1960 |
|
| 1961 |
def run_analysis_step(annotated_transcript, original_transcript, age, gender, slp_notes):
|
| 1962 |
"""Run the analysis step on the annotated transcript"""
|
|
|
|
| 1967 |
|
| 1968 |
# Check if annotation was incomplete
|
| 1969 |
if annotated_transcript.startswith("⚠️ ANNOTATION INCOMPLETE"):
|
| 1970 |
+
analysis_note = "Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
|
| 1971 |
else:
|
| 1972 |
analysis_note = ""
|
| 1973 |
|
| 1974 |
analysis_result = analyze_with_backup(annotated_transcript, original_transcript, age, gender, slp_notes)
|
| 1975 |
+
|
| 1976 |
return analysis_note + analysis_result
|
| 1977 |
|
| 1978 |
def run_manual_count_only(annotated_transcript):
|
|
|
|
| 2137 |
- Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
|
| 2138 |
* Categorize types (word, phrase, sound level)
|
| 2139 |
* Provide examples and count summary
|
| 2140 |
+
- Revisions: Use verified count of {marker_counts.get('REVISION', 0)}
|
| 2141 |
+
* Analyze self-correction patterns
|
| 2142 |
+
- Pauses: Use verified count of {marker_counts.get('PAUSE', 0)}
|
| 2143 |
+
* Assess hesitation frequency
|
| 2144 |
+
- Total disfluency assessment: Use verified total of {category_totals.get('fluency_issues', 0)}
|
| 2145 |
+
* Rate: {category_totals.get('fluency_issues', 0)/linguistic_metrics.get('total_words', 1)*100:.2f} per 100 words
|
| 2146 |
* Provide objective rate calculation
|
| 2147 |
|
| 2148 |
B. Word Retrieval Issues:
|
|
|
|
| 2162 |
A. Vocabulary Analysis (use verified data):
|
| 2163 |
- Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
|
| 2164 |
- Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
|
| 2165 |
+
- Sophistication ratio: Use verified ratio of {category_totals.get('vocab_sophistication_ratio', 0):.3f}
|
| 2166 |
- Type-Token Ratio: Use verified TTR from basic metrics
|
| 2167 |
- Provide examples of each vocabulary level from transcript
|
| 2168 |
|
|
|
|
| 2240 |
# Step 2: Run analysis
|
| 2241 |
analysis_result = run_analysis_step(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 2242 |
|
| 2243 |
+
return annotated_transcript, analysis_result, "Complete analysis finished!"
|
| 2244 |
|
| 2245 |
def run_complete_speech_analysis(transcript_content, age, gender, slp_notes):
|
| 2246 |
"""Run the complete speech analysis pipeline with ultimate analysis"""
|
|
|
|
| 2256 |
# Step 2: Run ultimate analysis
|
| 2257 |
ultimate_result = run_ultimate_analysis(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 2258 |
|
| 2259 |
+
return annotated_transcript, ultimate_result, "Complete speech analysis finished!"
|
| 2260 |
|
| 2261 |
# Single main event handler
|
| 2262 |
ultimate_analysis_btn.click(
|
|
|
|
| 2285 |
fn=analyze_targeted_area,
|
| 2286 |
inputs=[transcript_input_5, analysis_area, age_input_5, gender_input_5, slp_notes_input_5],
|
| 2287 |
outputs=[targeted_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2288 |
)
|