Spaces:
Sleeping
Sleeping
Update annotated_casl_app.py
Browse files- annotated_casl_app.py +154 -111
annotated_casl_app.py
CHANGED
|
@@ -5,6 +5,7 @@ import logging
|
|
| 5 |
import requests
|
| 6 |
import re
|
| 7 |
import time
|
|
|
|
| 8 |
# Configure logging
|
| 9 |
logging.basicConfig(level=logging.INFO)
|
| 10 |
logger = logging.getLogger(__name__)
|
|
@@ -136,10 +137,7 @@ def combine_sections_smartly(sections_dict):
|
|
| 136 |
|
| 137 |
|
| 138 |
def call_claude_api_quick_analysis(prompt):
|
| 139 |
-
"""Call Claude API for quick focused analysis - single response only
|
| 140 |
-
Responses are cleaned to remove asterisks, hashtags, and convert simple tables to lists
|
| 141 |
-
to match formatting used in the main analysis pipeline.
|
| 142 |
-
"""
|
| 143 |
if not ANTHROPIC_API_KEY:
|
| 144 |
return "β Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 145 |
|
|
@@ -170,16 +168,7 @@ def call_claude_api_quick_analysis(prompt):
|
|
| 170 |
|
| 171 |
if response.status_code == 200:
|
| 172 |
response_json = response.json()
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
# Clean formatting (remove asterisks, hashtags, convert simple tables) so
|
| 176 |
-
# Targeted Analysis and Quick Questions match the main analysis output
|
| 177 |
-
try:
|
| 178 |
-
cleaned = clean_output_formatting(response_text)
|
| 179 |
-
except Exception:
|
| 180 |
-
# If cleaning fails for any reason, fall back to raw response
|
| 181 |
-
cleaned = response_text
|
| 182 |
-
return cleaned
|
| 183 |
else:
|
| 184 |
logger.error(f"Claude API error: {response.status_code} - {response.text}")
|
| 185 |
return f"β Claude API Error: {response.status_code}"
|
|
@@ -1556,88 +1545,140 @@ def analyze_with_backup(annotated_transcript, original_transcript, age, gender,
|
|
| 1556 |
- Count [REPETITION] markers: Categorize by type (word, phrase, sound)
|
| 1557 |
- Count [REVISION] markers: Analyze self-correction patterns
|
| 1558 |
- Count [PAUSE] markers: Assess hesitation frequency
|
| 1559 |
-
-
|
| 1560 |
-
* Rate: {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)/linguistic_metrics.get('total_words', 1)*100:.2f} per 100 words
|
| 1561 |
-
* Provide objective rate calculation
|
| 1562 |
|
| 1563 |
B. Word Retrieval Issues:
|
| 1564 |
-
-
|
| 1565 |
-
-
|
| 1566 |
-
-
|
| 1567 |
-
-
|
| 1568 |
|
| 1569 |
-
C. Grammatical Errors
|
| 1570 |
-
-
|
| 1571 |
-
-
|
| 1572 |
-
-
|
| 1573 |
-
-
|
| 1574 |
|
| 1575 |
-
2. LANGUAGE SKILLS ASSESSMENT
|
| 1576 |
|
| 1577 |
-
A.
|
| 1578 |
-
-
|
| 1579 |
-
-
|
| 1580 |
-
-
|
| 1581 |
-
-
|
| 1582 |
-
- Provide examples of each vocabulary level from transcript
|
| 1583 |
|
| 1584 |
-
B.
|
| 1585 |
-
-
|
| 1586 |
-
-
|
| 1587 |
-
-
|
| 1588 |
|
| 1589 |
-
|
|
|
|
|
|
|
| 1590 |
|
| 1591 |
-
|
| 1592 |
-
- Simple sentences: Use verified count of {marker_counts.get('SIMPLE_SENT', 0)}
|
| 1593 |
-
- Complex sentences: Use verified count of {marker_counts.get('COMPLEX_SENT', 0)}
|
| 1594 |
-
- Compound sentences: Use verified count of {marker_counts.get('COMPOUND_SENT', 0)}
|
| 1595 |
-
- Calculate percentages of each type
|
| 1596 |
|
| 1597 |
-
|
| 1598 |
-
-
|
| 1599 |
-
-
|
| 1600 |
-
- Subordination and coordination patterns
|
| 1601 |
|
| 1602 |
-
|
| 1603 |
-
-
|
| 1604 |
-
-
|
| 1605 |
-
- Age-appropriate development assessment
|
| 1606 |
-
- Abstract language abilities
|
| 1607 |
|
| 1608 |
-
|
| 1609 |
-
-
|
| 1610 |
-
-
|
| 1611 |
-
- Coherence breaks: Use verified count of {marker_counts.get('COHERENCE_BREAK', 0)}
|
| 1612 |
-
- Referential clarity: Use verified count of {marker_counts.get('PRONOUN_REF', 0)}
|
| 1613 |
-
- Overall conversational patterns observed
|
| 1614 |
|
| 1615 |
-
|
| 1616 |
-
- Semantic errors: Use verified count of {marker_counts.get('SEMANTIC_ERROR', 0)}
|
| 1617 |
-
- Lexical diversity: Use verified measures from stats summary
|
| 1618 |
-
- Word association patterns from transcript analysis
|
| 1619 |
-
- Semantic precision and appropriateness
|
| 1620 |
|
| 1621 |
-
|
| 1622 |
-
-
|
| 1623 |
-
-
|
| 1624 |
-
- Error analysis using verified counts
|
| 1625 |
-
- Pattern analysis only
|
| 1626 |
|
| 1627 |
-
|
| 1628 |
-
-
|
| 1629 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1630 |
- Unique words: {linguistic_metrics.get('unique_words', 0)}
|
| 1631 |
-
-
|
| 1632 |
-
-
|
| 1633 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1634 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1635 |
|
|
|
|
|
|
|
|
|
|
| 1636 |
|
| 1637 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1638 |
"""
|
| 1639 |
|
| 1640 |
-
return call_claude_api_with_continuation(
|
| 1641 |
|
| 1642 |
def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_callback=None):
|
| 1643 |
"""Complete pipeline: annotate then analyze with progressive updates"""
|
|
@@ -1649,7 +1690,6 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
|
|
| 1649 |
if progress_callback:
|
| 1650 |
progress_callback("π·οΈ Step 1: Annotating transcript with linguistic markers...")
|
| 1651 |
|
| 1652 |
-
|
| 1653 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1654 |
|
| 1655 |
if annotated_transcript.startswith("β"):
|
|
@@ -1657,7 +1697,7 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
|
|
| 1657 |
|
| 1658 |
# Return annotated transcript immediately
|
| 1659 |
if progress_callback:
|
| 1660 |
-
progress_callback("Step 1 Complete: Annotation finished! Starting analysis...")
|
| 1661 |
|
| 1662 |
# Check if annotation was incomplete
|
| 1663 |
if annotated_transcript.startswith("β οΈ ANNOTATION INCOMPLETE"):
|
|
@@ -1669,12 +1709,12 @@ def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_
|
|
| 1669 |
# Step 2: Analyze annotated transcript with original as backup
|
| 1670 |
logger.info("Step 2: Analyzing annotated transcript...")
|
| 1671 |
if progress_callback:
|
| 1672 |
-
progress_callback("Step 2: Analyzing annotated transcript (this may take several minutes)...")
|
| 1673 |
|
| 1674 |
analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 1675 |
|
| 1676 |
if progress_callback:
|
| 1677 |
-
progress_callback("Analysis Complete!")
|
| 1678 |
|
| 1679 |
return annotated_transcript, analysis_note + analysis_result
|
| 1680 |
|
|
@@ -1686,7 +1726,7 @@ def progressive_analysis_pipeline(transcript_content, age, gender, slp_notes):
|
|
| 1686 |
|
| 1687 |
# Step 1: Annotate transcript
|
| 1688 |
logger.info("Step 1: Annotating transcript with linguistic markers...")
|
| 1689 |
-
yield "", "", "Step 1: Annotating transcript with linguistic markers..."
|
| 1690 |
|
| 1691 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1692 |
|
|
@@ -1695,19 +1735,19 @@ def progressive_analysis_pipeline(transcript_content, age, gender, slp_notes):
|
|
| 1695 |
return
|
| 1696 |
|
| 1697 |
# Return annotated transcript immediately after completion
|
| 1698 |
-
yield annotated_transcript, "", "Step 1 Complete! Starting analysis..."
|
| 1699 |
|
| 1700 |
# Check if annotation was incomplete
|
| 1701 |
if annotated_transcript.startswith("β οΈ ANNOTATION INCOMPLETE"):
|
| 1702 |
logger.warning("Annotation incomplete, proceeding with analysis")
|
| 1703 |
-
analysis_note = "Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
|
| 1704 |
-
yield annotated_transcript, "", "Annotation incomplete, continuing with analysis..."
|
| 1705 |
else:
|
| 1706 |
analysis_note = ""
|
| 1707 |
|
| 1708 |
# Step 2: Analyze annotated transcript
|
| 1709 |
logger.info("Step 2: Analyzing annotated transcript...")
|
| 1710 |
-
yield annotated_transcript, "", "Step 2: Analyzing annotated transcript (this may take several minutes)..."
|
| 1711 |
|
| 1712 |
analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 1713 |
|
|
@@ -1766,9 +1806,10 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1766 |
lines=3
|
| 1767 |
)
|
| 1768 |
|
| 1769 |
-
|
| 1770 |
-
|
| 1771 |
-
|
|
|
|
| 1772 |
|
| 1773 |
with gr.Column(scale=3):
|
| 1774 |
status_display = gr.Markdown("Ready to analyze transcript")
|
|
@@ -1787,7 +1828,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1787 |
show_copy_button=True
|
| 1788 |
)
|
| 1789 |
|
| 1790 |
-
with gr.Tab("Annotation Only"):
|
| 1791 |
gr.Markdown("### Step 1: Annotate transcript with linguistic markers")
|
| 1792 |
|
| 1793 |
with gr.Row():
|
|
@@ -1811,9 +1852,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1811 |
lines=3
|
| 1812 |
)
|
| 1813 |
|
| 1814 |
-
|
| 1815 |
-
|
| 1816 |
-
annotate_btn = gr.Button("Annotate Transcript", variant="secondary")
|
| 1817 |
|
| 1818 |
with gr.Column():
|
| 1819 |
annotation_output = gr.Textbox(
|
|
@@ -1865,8 +1905,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1865 |
q5_btn = gr.Button("Word finding issues?", size="sm", variant="secondary")
|
| 1866 |
q6_btn = gr.Button("Fluency problems?", size="sm", variant="secondary")
|
| 1867 |
|
| 1868 |
-
example_btn_4 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
|
| 1869 |
-
ask_question_btn = gr.Button("Ask Question", variant="primary")
|
| 1870 |
|
| 1871 |
with gr.Column():
|
| 1872 |
question_output = gr.Textbox(
|
|
@@ -1875,7 +1915,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1875 |
show_copy_button=True
|
| 1876 |
)
|
| 1877 |
|
| 1878 |
-
with gr.Tab("Targeted Analysis"):
|
| 1879 |
gr.Markdown("### Focus on specific areas of speech and language")
|
| 1880 |
|
| 1881 |
with gr.Row():
|
|
@@ -1912,8 +1952,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1912 |
lines=2
|
| 1913 |
)
|
| 1914 |
|
| 1915 |
-
example_btn_5 = gr.Button("Load Example Transcript", variant="secondary", size="sm")
|
| 1916 |
-
targeted_analysis_btn = gr.Button("Run Targeted Analysis", variant="primary")
|
| 1917 |
|
| 1918 |
with gr.Column():
|
| 1919 |
targeted_output = gr.Textbox(
|
|
@@ -1951,11 +1991,11 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1951 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1952 |
|
| 1953 |
if annotated_transcript.startswith("β"):
|
| 1954 |
-
return annotated_transcript, "Annotation failed"
|
| 1955 |
elif annotated_transcript.startswith("β οΈ ANNOTATION INCOMPLETE"):
|
| 1956 |
-
return annotated_transcript, "Annotation incomplete but proceeding"
|
| 1957 |
else:
|
| 1958 |
-
return annotated_transcript, "Annotation complete! Click 'Run Analysis' to continue."
|
| 1959 |
|
| 1960 |
def run_analysis_step(annotated_transcript, original_transcript, age, gender, slp_notes):
|
| 1961 |
"""Run the analysis step on the annotated transcript"""
|
|
@@ -1966,12 +2006,11 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 1966 |
|
| 1967 |
# Check if annotation was incomplete
|
| 1968 |
if annotated_transcript.startswith("β οΈ ANNOTATION INCOMPLETE"):
|
| 1969 |
-
analysis_note = "Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
|
| 1970 |
else:
|
| 1971 |
analysis_note = ""
|
| 1972 |
|
| 1973 |
analysis_result = analyze_with_backup(annotated_transcript, original_transcript, age, gender, slp_notes)
|
| 1974 |
-
|
| 1975 |
return analysis_note + analysis_result
|
| 1976 |
|
| 1977 |
def run_manual_count_only(annotated_transcript):
|
|
@@ -2136,12 +2175,8 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2136 |
- Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
|
| 2137 |
* Categorize types (word, phrase, sound level)
|
| 2138 |
* Provide examples and count summary
|
| 2139 |
-
-
|
| 2140 |
-
*
|
| 2141 |
-
- Pauses: Use verified count of {marker_counts.get('PAUSE', 0)}
|
| 2142 |
-
* Assess hesitation frequency
|
| 2143 |
-
- Total disfluency assessment: Use verified total of {category_totals.get('fluency_issues', 0)}
|
| 2144 |
-
* Rate: {category_totals.get('fluency_issues', 0)/linguistic_metrics.get('total_words', 1)*100:.2f} per 100 words
|
| 2145 |
* Provide objective rate calculation
|
| 2146 |
|
| 2147 |
B. Word Retrieval Issues:
|
|
@@ -2161,7 +2196,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2161 |
A. Vocabulary Analysis (use verified data):
|
| 2162 |
- Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
|
| 2163 |
- Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
|
| 2164 |
-
- Sophistication ratio: Use verified ratio of {category_totals
|
| 2165 |
- Type-Token Ratio: Use verified TTR from basic metrics
|
| 2166 |
- Provide examples of each vocabulary level from transcript
|
| 2167 |
|
|
@@ -2239,7 +2274,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2239 |
# Step 2: Run analysis
|
| 2240 |
analysis_result = run_analysis_step(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 2241 |
|
| 2242 |
-
return annotated_transcript, analysis_result, "Complete analysis finished!"
|
| 2243 |
|
| 2244 |
def run_complete_speech_analysis(transcript_content, age, gender, slp_notes):
|
| 2245 |
"""Run the complete speech analysis pipeline with ultimate analysis"""
|
|
@@ -2255,7 +2290,7 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2255 |
# Step 2: Run ultimate analysis
|
| 2256 |
ultimate_result = run_ultimate_analysis(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 2257 |
|
| 2258 |
-
return annotated_transcript, ultimate_result, "Complete speech analysis finished!"
|
| 2259 |
|
| 2260 |
# Single main event handler
|
| 2261 |
ultimate_analysis_btn.click(
|
|
@@ -2284,4 +2319,12 @@ with gr.Blocks(title="Speech Analysis", theme=gr.themes.Soft()) as demo:
|
|
| 2284 |
fn=analyze_targeted_area,
|
| 2285 |
inputs=[transcript_input_5, analysis_area, age_input_5, gender_input_5, slp_notes_input_5],
|
| 2286 |
outputs=[targeted_output]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2287 |
)
|
|
|
|
| 5 |
import requests
|
| 6 |
import re
|
| 7 |
import time
|
| 8 |
+
|
| 9 |
# Configure logging
|
| 10 |
logging.basicConfig(level=logging.INFO)
|
| 11 |
logger = logging.getLogger(__name__)
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
def call_claude_api_quick_analysis(prompt):
|
| 140 |
+
"""Call Claude API for quick focused analysis - single response only"""
|
|
|
|
|
|
|
|
|
|
| 141 |
if not ANTHROPIC_API_KEY:
|
| 142 |
return "β Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
|
| 143 |
|
|
|
|
| 168 |
|
| 169 |
if response.status_code == 200:
|
| 170 |
response_json = response.json()
|
| 171 |
+
return response_json['content'][0]['text']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
else:
|
| 173 |
logger.error(f"Claude API error: {response.status_code} - {response.text}")
|
| 174 |
return f"β Claude API Error: {response.status_code}"
|
|
|
|
| 1545 |
- Count [REPETITION] markers: Categorize by type (word, phrase, sound)
|
| 1546 |
- Count [REVISION] markers: Analyze self-correction patterns
|
| 1547 |
- Count [PAUSE] markers: Assess hesitation frequency
|
| 1548 |
+
- Calculate total disfluency rate
|
|
|
|
|
|
|
| 1549 |
|
| 1550 |
B. Word Retrieval Issues:
|
| 1551 |
+
- Count [CIRCUMLOCUTION] markers: List each roundabout description
|
| 1552 |
+
- Count [INCOMPLETE] markers: Analyze abandoned thought patterns
|
| 1553 |
+
- Count [GENERIC] markers: Calculate specificity ratio
|
| 1554 |
+
- Count [WORD_SEARCH] markers: Identify retrieval difficulty areas
|
| 1555 |
|
| 1556 |
+
C. Grammatical Errors:
|
| 1557 |
+
- Count [GRAM_ERROR] markers by subcategory (verb tense, subject-verb agreement, etc.)
|
| 1558 |
+
- Count [SYNTAX_ERROR] markers: Analyze word order problems
|
| 1559 |
+
- Count [MORPH_ERROR] markers: Categorize morphological mistakes
|
| 1560 |
+
- Count [RUN_ON] markers: Assess sentence boundary awareness
|
| 1561 |
|
| 1562 |
+
2. LANGUAGE SKILLS ASSESSMENT (with specific evidence):
|
| 1563 |
|
| 1564 |
+
A. Lexical/Semantic Skills:
|
| 1565 |
+
- Use calculated Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
|
| 1566 |
+
- Count [SIMPLE_VOCAB] vs [COMPLEX_VOCAB] markers
|
| 1567 |
+
- Assess vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
|
| 1568 |
+
- Count [SEMANTIC_ERROR] markers and analyze patterns
|
|
|
|
| 1569 |
|
| 1570 |
+
B. Syntactic Skills:
|
| 1571 |
+
- Count [SIMPLE_SENT], [COMPLEX_SENT], [COMPOUND_SENT] markers
|
| 1572 |
+
- Calculate sentence complexity ratios
|
| 1573 |
+
- Assess clause complexity and embedding
|
| 1574 |
|
| 1575 |
+
C. Supralinguistic Skills:
|
| 1576 |
+
- Identify cause-effect relationships, inferences, non-literal language
|
| 1577 |
+
- Assess problem-solving language and metalinguistic awareness
|
| 1578 |
|
| 1579 |
+
3. COMPLEX SENTENCE ANALYSIS (with exact counts):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1580 |
|
| 1581 |
+
A. Coordinating Conjunctions:
|
| 1582 |
+
- Count and cite EVERY use of: and, but, or, so, yet, for, nor
|
| 1583 |
+
- Analyze patterns and age-appropriateness
|
|
|
|
| 1584 |
|
| 1585 |
+
B. Subordinating Conjunctions:
|
| 1586 |
+
- Count and cite EVERY use of: because, although, while, since, if, when, where, that, which, who
|
| 1587 |
+
- Analyze clause complexity and embedding depth
|
|
|
|
|
|
|
| 1588 |
|
| 1589 |
+
C. Sentence Structure Analysis:
|
| 1590 |
+
- Use calculated MLU: {linguistic_metrics.get('mlu_words', 0)} words, {linguistic_metrics.get('mlu_morphemes', 0)} morphemes
|
| 1591 |
+
- Calculate complexity ratios
|
|
|
|
|
|
|
|
|
|
| 1592 |
|
| 1593 |
+
4. FIGURATIVE LANGUAGE ANALYSIS (with exact counts):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1594 |
|
| 1595 |
+
A. Similes and Metaphors:
|
| 1596 |
+
- Count [FIGURATIVE] markers for similes (using "like" or "as")
|
| 1597 |
+
- Count [FIGURATIVE] markers for metaphors (direct comparisons)
|
|
|
|
|
|
|
| 1598 |
|
| 1599 |
+
B. Idioms and Non-literal Language:
|
| 1600 |
+
- Count and analyze idiomatic expressions
|
| 1601 |
+
- Assess comprehension and appropriate use
|
| 1602 |
+
|
| 1603 |
+
5. PRAGMATIC LANGUAGE ASSESSMENT (with specific examples):
|
| 1604 |
+
|
| 1605 |
+
A. Discourse Management:
|
| 1606 |
+
- Count [TOPIC_SHIFT] markers: Assess transition appropriateness
|
| 1607 |
+
- Count [TANGENT] markers: Analyze tangential speech patterns
|
| 1608 |
+
- Count [COHERENCE_BREAK] markers: Assess logical flow
|
| 1609 |
+
|
| 1610 |
+
B. Referential Communication:
|
| 1611 |
+
- Count [PRONOUN_REF] markers: Analyze referential clarity
|
| 1612 |
+
- Assess communicative effectiveness
|
| 1613 |
+
|
| 1614 |
+
6. VOCABULARY AND SEMANTIC ANALYSIS (with quantification):
|
| 1615 |
+
|
| 1616 |
+
A. Vocabulary Diversity:
|
| 1617 |
+
- Total words: {linguistic_metrics.get('total_words', 0)}
|
| 1618 |
- Unique words: {linguistic_metrics.get('unique_words', 0)}
|
| 1619 |
+
- Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
|
| 1620 |
+
- Vocabulary sophistication: {linguistic_metrics.get('vocabulary_sophistication', 0)}
|
| 1621 |
+
|
| 1622 |
+
B. Semantic Relationships:
|
| 1623 |
+
- Analyze word frequency patterns
|
| 1624 |
+
- Assess semantic precision and relationships
|
| 1625 |
+
|
| 1626 |
+
7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS (with counts):
|
| 1627 |
|
| 1628 |
+
A. Morphological Markers:
|
| 1629 |
+
- Count [MORPH_ERROR] markers and categorize
|
| 1630 |
+
- Analyze morpheme use patterns
|
| 1631 |
+
- Assess morphological complexity
|
| 1632 |
|
| 1633 |
+
B. Phonological Patterns:
|
| 1634 |
+
- Identify speech sound patterns from transcript
|
| 1635 |
+
- Assess syllable structure complexity
|
| 1636 |
|
| 1637 |
+
8. COGNITIVE-LINGUISTIC FACTORS (with evidence):
|
| 1638 |
+
|
| 1639 |
+
A. Working Memory:
|
| 1640 |
+
- Assess sentence length complexity using average: {linguistic_metrics.get('avg_sentence_length', 0)} words
|
| 1641 |
+
- Analyze information retention patterns
|
| 1642 |
+
|
| 1643 |
+
B. Processing Efficiency:
|
| 1644 |
+
- Analyze linguistic complexity and word-finding patterns
|
| 1645 |
+
- Assess cognitive demands of language structures
|
| 1646 |
+
|
| 1647 |
+
C. Executive Function:
|
| 1648 |
+
- Count self-correction patterns ([REVISION] markers)
|
| 1649 |
+
- Assess planning and organization in discourse
|
| 1650 |
+
|
| 1651 |
+
9. FLUENCY AND RHYTHM ANALYSIS (with quantification):
|
| 1652 |
+
|
| 1653 |
+
A. Disfluency Patterns:
|
| 1654 |
+
- Total fluency issues: {marker_analysis.get('category_totals', {}).get('fluency_issues', 0)}
|
| 1655 |
+
- Calculate disfluency rate per 100 words
|
| 1656 |
+
- Analyze impact on communication
|
| 1657 |
+
|
| 1658 |
+
B. Language Flow:
|
| 1659 |
+
- Assess sentence length variability: std = {linguistic_metrics.get('sentence_length_std', 0)}
|
| 1660 |
+
- Analyze linguistic markers of hesitation
|
| 1661 |
+
|
| 1662 |
+
10. QUANTITATIVE METRICS:
|
| 1663 |
+
- Total words: {linguistic_metrics.get('total_words', 0)}
|
| 1664 |
+
- Total sentences: {linguistic_metrics.get('total_sentences', 0)}
|
| 1665 |
+
- MLU (words): {linguistic_metrics.get('mlu_words', 0)}
|
| 1666 |
+
- MLU (morphemes): {linguistic_metrics.get('mlu_morphemes', 0)}
|
| 1667 |
+
- Type-Token Ratio: {linguistic_metrics.get('type_token_ratio', 0)}
|
| 1668 |
+
- Grammar error rate: Calculate from marker counts
|
| 1669 |
+
- Vocabulary sophistication ratio: {marker_analysis.get('category_totals', {}).get('vocab_sophistication_ratio', 0):.3f}
|
| 1670 |
+
|
| 1671 |
+
CRITICAL REQUIREMENTS:
|
| 1672 |
+
- Use the provided calculated metrics in your analysis
|
| 1673 |
+
- Provide EXACT counts for every marker type
|
| 1674 |
+
- Calculate precise percentages and show your work
|
| 1675 |
+
- Give specific examples from the transcript
|
| 1676 |
+
- If annotation is incomplete, supplement with analysis of the original transcript
|
| 1677 |
+
- Complete ALL 8 sections - use <CONTINUE> if needed
|
| 1678 |
+
- Focus on objective data only - NO clinical interpretations
|
| 1679 |
"""
|
| 1680 |
|
| 1681 |
+
return call_claude_api_with_continuation(analysis_prompt)
|
| 1682 |
|
| 1683 |
def full_analysis_pipeline(transcript_content, age, gender, slp_notes, progress_callback=None):
|
| 1684 |
"""Complete pipeline: annotate then analyze with progressive updates"""
|
|
|
|
| 1690 |
if progress_callback:
|
| 1691 |
progress_callback("π·οΈ Step 1: Annotating transcript with linguistic markers...")
|
| 1692 |
|
|
|
|
| 1693 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1694 |
|
| 1695 |
if annotated_transcript.startswith("β"):
|
|
|
|
| 1697 |
|
| 1698 |
# Return annotated transcript immediately
|
| 1699 |
if progress_callback:
|
| 1700 |
+
progress_callback("β
Step 1 Complete: Annotation finished! Starting analysis...")
|
| 1701 |
|
| 1702 |
# Check if annotation was incomplete
|
| 1703 |
if annotated_transcript.startswith("β οΈ ANNOTATION INCOMPLETE"):
|
|
|
|
| 1709 |
# Step 2: Analyze annotated transcript with original as backup
|
| 1710 |
logger.info("Step 2: Analyzing annotated transcript...")
|
| 1711 |
if progress_callback:
|
| 1712 |
+
progress_callback("π Step 2: Analyzing annotated transcript (this may take several minutes)...")
|
| 1713 |
|
| 1714 |
analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 1715 |
|
| 1716 |
if progress_callback:
|
| 1717 |
+
progress_callback("β
Analysis Complete!")
|
| 1718 |
|
| 1719 |
return annotated_transcript, analysis_note + analysis_result
|
| 1720 |
|
|
|
|
| 1726 |
|
| 1727 |
# Step 1: Annotate transcript
|
| 1728 |
logger.info("Step 1: Annotating transcript with linguistic markers...")
|
| 1729 |
+
yield "", "", "π·οΈ Step 1: Annotating transcript with linguistic markers..."
|
| 1730 |
|
| 1731 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1732 |
|
|
|
|
| 1735 |
return
|
| 1736 |
|
| 1737 |
# Return annotated transcript immediately after completion
|
| 1738 |
+
yield annotated_transcript, "", "β
Step 1 Complete! Starting analysis..."
|
| 1739 |
|
| 1740 |
# Check if annotation was incomplete
|
| 1741 |
if annotated_transcript.startswith("β οΈ ANNOTATION INCOMPLETE"):
|
| 1742 |
logger.warning("Annotation incomplete, proceeding with analysis")
|
| 1743 |
+
analysis_note = "β οΈ Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
|
| 1744 |
+
yield annotated_transcript, "", "β οΈ Annotation incomplete, continuing with analysis..."
|
| 1745 |
else:
|
| 1746 |
analysis_note = ""
|
| 1747 |
|
| 1748 |
# Step 2: Analyze annotated transcript
|
| 1749 |
logger.info("Step 2: Analyzing annotated transcript...")
|
| 1750 |
+
yield annotated_transcript, "", "π Step 2: Analyzing annotated transcript (this may take several minutes)..."
|
| 1751 |
|
| 1752 |
analysis_result = analyze_with_backup(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 1753 |
|
|
|
|
| 1806 |
lines=3
|
| 1807 |
)
|
| 1808 |
|
| 1809 |
+
example_btn = gr.Button("π Load Example Transcript", variant="secondary", size="sm")
|
| 1810 |
+
|
| 1811 |
+
# Single main analysis button
|
| 1812 |
+
ultimate_analysis_btn = gr.Button("π Run Complete Speech Analysis", variant="primary", size="lg")
|
| 1813 |
|
| 1814 |
with gr.Column(scale=3):
|
| 1815 |
status_display = gr.Markdown("Ready to analyze transcript")
|
|
|
|
| 1828 |
show_copy_button=True
|
| 1829 |
)
|
| 1830 |
|
| 1831 |
+
with gr.Tab("π·οΈ Annotation Only"):
|
| 1832 |
gr.Markdown("### Step 1: Annotate transcript with linguistic markers")
|
| 1833 |
|
| 1834 |
with gr.Row():
|
|
|
|
| 1852 |
lines=3
|
| 1853 |
)
|
| 1854 |
|
| 1855 |
+
example_btn_2 = gr.Button("π Load Example Transcript", variant="secondary", size="sm")
|
| 1856 |
+
annotate_btn = gr.Button("π·οΈ Annotate Transcript", variant="secondary")
|
|
|
|
| 1857 |
|
| 1858 |
with gr.Column():
|
| 1859 |
annotation_output = gr.Textbox(
|
|
|
|
| 1905 |
q5_btn = gr.Button("Word finding issues?", size="sm", variant="secondary")
|
| 1906 |
q6_btn = gr.Button("Fluency problems?", size="sm", variant="secondary")
|
| 1907 |
|
| 1908 |
+
example_btn_4 = gr.Button("π Load Example Transcript", variant="secondary", size="sm")
|
| 1909 |
+
ask_question_btn = gr.Button("β Ask Question", variant="primary")
|
| 1910 |
|
| 1911 |
with gr.Column():
|
| 1912 |
question_output = gr.Textbox(
|
|
|
|
| 1915 |
show_copy_button=True
|
| 1916 |
)
|
| 1917 |
|
| 1918 |
+
with gr.Tab("π― Targeted Analysis"):
|
| 1919 |
gr.Markdown("### Focus on specific areas of speech and language")
|
| 1920 |
|
| 1921 |
with gr.Row():
|
|
|
|
| 1952 |
lines=2
|
| 1953 |
)
|
| 1954 |
|
| 1955 |
+
example_btn_5 = gr.Button("π Load Example Transcript", variant="secondary", size="sm")
|
| 1956 |
+
targeted_analysis_btn = gr.Button("π― Run Targeted Analysis", variant="primary")
|
| 1957 |
|
| 1958 |
with gr.Column():
|
| 1959 |
targeted_output = gr.Textbox(
|
|
|
|
| 1991 |
annotated_transcript = annotate_transcript(transcript_content, age, gender, slp_notes)
|
| 1992 |
|
| 1993 |
if annotated_transcript.startswith("β"):
|
| 1994 |
+
return annotated_transcript, "β Annotation failed"
|
| 1995 |
elif annotated_transcript.startswith("β οΈ ANNOTATION INCOMPLETE"):
|
| 1996 |
+
return annotated_transcript, "β οΈ Annotation incomplete but proceeding"
|
| 1997 |
else:
|
| 1998 |
+
return annotated_transcript, "β
Annotation complete! Click 'Run Analysis' to continue."
|
| 1999 |
|
| 2000 |
def run_analysis_step(annotated_transcript, original_transcript, age, gender, slp_notes):
|
| 2001 |
"""Run the analysis step on the annotated transcript"""
|
|
|
|
| 2006 |
|
| 2007 |
# Check if annotation was incomplete
|
| 2008 |
if annotated_transcript.startswith("β οΈ ANNOTATION INCOMPLETE"):
|
| 2009 |
+
analysis_note = "β οΈ Note: Annotation was incomplete. Analysis primarily based on original transcript.\n\n"
|
| 2010 |
else:
|
| 2011 |
analysis_note = ""
|
| 2012 |
|
| 2013 |
analysis_result = analyze_with_backup(annotated_transcript, original_transcript, age, gender, slp_notes)
|
|
|
|
| 2014 |
return analysis_note + analysis_result
|
| 2015 |
|
| 2016 |
def run_manual_count_only(annotated_transcript):
|
|
|
|
| 2175 |
- Repetitions: Use verified count of {marker_counts.get('REPETITION', 0)}
|
| 2176 |
* Categorize types (word, phrase, sound level)
|
| 2177 |
* Provide examples and count summary
|
| 2178 |
+
- Total disfluency assessment: Use verified total of {category_totals['fluency_issues']}
|
| 2179 |
+
* Rate: {category_totals['fluency_issues']/total_words*100:.2f} per 100 words
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2180 |
* Provide objective rate calculation
|
| 2181 |
|
| 2182 |
B. Word Retrieval Issues:
|
|
|
|
| 2196 |
A. Vocabulary Analysis (use verified data):
|
| 2197 |
- Simple vocabulary: Use verified count of {marker_counts.get('SIMPLE_VOCAB', 0)}
|
| 2198 |
- Complex vocabulary: Use verified count of {marker_counts.get('COMPLEX_VOCAB', 0)}
|
| 2199 |
+
- Sophistication ratio: Use verified ratio of {category_totals['vocab_sophistication_ratio']:.3f}
|
| 2200 |
- Type-Token Ratio: Use verified TTR from basic metrics
|
| 2201 |
- Provide examples of each vocabulary level from transcript
|
| 2202 |
|
|
|
|
| 2274 |
# Step 2: Run analysis
|
| 2275 |
analysis_result = run_analysis_step(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 2276 |
|
| 2277 |
+
return annotated_transcript, analysis_result, "β
Complete analysis finished!"
|
| 2278 |
|
| 2279 |
def run_complete_speech_analysis(transcript_content, age, gender, slp_notes):
|
| 2280 |
"""Run the complete speech analysis pipeline with ultimate analysis"""
|
|
|
|
| 2290 |
# Step 2: Run ultimate analysis
|
| 2291 |
ultimate_result = run_ultimate_analysis(annotated_transcript, transcript_content, age, gender, slp_notes)
|
| 2292 |
|
| 2293 |
+
return annotated_transcript, ultimate_result, "β
Complete speech analysis finished!"
|
| 2294 |
|
| 2295 |
# Single main event handler
|
| 2296 |
ultimate_analysis_btn.click(
|
|
|
|
| 2319 |
fn=analyze_targeted_area,
|
| 2320 |
inputs=[transcript_input_5, analysis_area, age_input_5, gender_input_5, slp_notes_input_5],
|
| 2321 |
outputs=[targeted_output]
|
| 2322 |
+
)
|
| 2323 |
+
|
| 2324 |
+
if __name__ == "__main__":
|
| 2325 |
+
demo.launch(
|
| 2326 |
+
server_name="0.0.0.0",
|
| 2327 |
+
server_port=7860,
|
| 2328 |
+
share=True,
|
| 2329 |
+
show_error=True
|
| 2330 |
)
|