Spaces:

SreekarB
/

SLPAnalysis

Running

App Files Files Community

SreekarB commited on Jul 10

Commit

f30897e

verified ·

1 Parent(s): b5be089

Upload simple_casl_app.py

Browse files

Files changed (1) hide show

simple_casl_app.py +133 -192

simple_casl_app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import requests
 import re
 import tempfile
 import numpy as np
 # Configure logging
 logging.basicConfig(level=logging.INFO)
@@ -106,8 +107,8 @@ if ANTHROPIC_API_KEY:
 else:
     logger.warning("Claude API key not found - using demo mode")
-def validate_analysis_completeness(response_text):
-    """Validate that all 12 sections are present in the analysis exactly once"""
     required_sections = [
         "1. SPEECH FACTORS",
         "2. LANGUAGE SKILLS ASSESSMENT",
@@ -123,107 +124,69 @@ def validate_analysis_completeness(response_text):
         "12. PROGNOSIS AND SUMMARY"
     ]
-    missing_sections = []
-    duplicate_sections = []
-    section_counts = {}
-    for section in required_sections:
-        count = response_text.count(section)
-        section_counts[section] = count
-        if count == 0:
-            missing_sections.append(section)
-        elif count > 1:
-            duplicate_sections.append(section)
-    # Log detailed validation results
-    print(f"\n=== COMPREHENSIVE VALIDATION ===")
-    print(f"Total response length: {len(response_text)} characters")
-    print(f"Missing sections: {missing_sections}")
-    print(f"Duplicate sections: {duplicate_sections}")
-    print(f"Section counts: {section_counts}")
-    if missing_sections:
-        print(f"\n⚠️  MISSING SECTIONS: {missing_sections}")
-        return False, missing_sections, duplicate_sections, section_counts
-    elif duplicate_sections:
-        print(f"\n⚠️  DUPLICATE SECTIONS: {duplicate_sections}")
-        return False, missing_sections, duplicate_sections, section_counts
-    else:
-        print(f"\n✅ ALL 12 SECTIONS PRESENT EXACTLY ONCE")
-        return True, missing_sections, duplicate_sections, section_counts
-def fix_incomplete_analysis(response_text, missing_sections):
-    """Attempt to fix incomplete analysis by requesting missing sections"""
-    if not missing_sections:
-        return response_text
-    if not ANTHROPIC_API_KEY:
-        return response_text + "\n\n❌ Cannot fix incomplete analysis - API key not configured"
-    try:
-        # Create a focused prompt for missing sections
-        missing_sections_text = "\n".join([f"- {section}" for section in missing_sections])
-        fix_prompt = f"""
-The following sections are missing from the CASL analysis. Please provide ONLY these missing sections:
-{missing_sections_text}
-IMPORTANT:
-- Provide ONLY the missing sections listed above
-- Do not repeat any sections that are already present
-- Use the exact section headers as shown above
-- Make each section comprehensive and detailed
-- Ensure clinical accuracy and appropriate depth for SLP assessment
-"""
-        headers = {
-            "Content-Type": "application/json",
-            "x-api-key": ANTHROPIC_API_KEY,
-            "anthropic-version": "2023-06-01"
-        }
-        data = {
-            "model": "claude-3-5-sonnet-20241022",
-            "max_tokens": 4096,
-            "messages": [
-                {
-                    "role": "user",
-                    "content": fix_prompt
-                }
-            ]
-        }
-        response = requests.post(
-            "https://api.anthropic.com/v1/messages",
-            headers=headers,
-            json=data,
-            timeout=90
-        )
-        if response.status_code == 200:
-            response_json = response.json()
-            fix_text = response_json['content'][0]['text']
-            # Combine original response with fix
-            complete_response = response_text + "\n\n" + fix_text
-            print(f"\n=== FIXED INCOMPLETE ANALYSIS ===")
-            print(f"Added missing sections: {missing_sections}")
-            print(f"Fix text length: {len(fix_text)} characters")
-            print("=" * 50)
-            return complete_response
-        else:
-            logger.error(f"Error fixing incomplete analysis: {response.status_code}")
-            return response_text + f"\n\n❌ Error fixing incomplete analysis: {response.status_code}"
-    except Exception as e:
-        logger.error(f"Error in fix_incomplete_analysis: {str(e)}")
-        return response_text + f"\n\n❌ Error fixing incomplete analysis: {str(e)}"
-def call_claude_api_with_continuation(prompt, max_continuations=3):
-    """Call Claude API with continuation prompting to ensure complete responses"""
     if not ANTHROPIC_API_KEY:
         return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
@@ -243,20 +206,29 @@ def call_claude_api_with_continuation(prompt, max_continuations=3):
         "12. PROGNOSIS AND SUMMARY"
     ]
     try:
-        response_parts = []  # Store each part as a separate item
         continuation_count = 0
-        completed_sections = set()  # Track which sections have been completed
         # Add continuation instruction to original prompt
         initial_prompt = prompt + "\n\nIMPORTANT: If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Ensure you complete all sections of the analysis."
-        while continuation_count <= max_continuations:
             if continuation_count == 0:
                 current_prompt = initial_prompt
             else:
                 # For continuations, provide context about what was already covered
-                current_prompt = prompt + f"\n\nContinue from where you left off (continuation {continuation_count + 1} of {max_continuations}):\n\nIMPORTANT: Do not repeat what you've already written. Continue with the next section or complete any unfinished sections. If you're done, do not include <CONTINUE>. Provide the remaining analysis sections. Make sure to complete ALL 12 sections of the analysis."
             headers = {
                 "Content-Type": "application/json",
@@ -294,26 +266,54 @@ def call_claude_api_with_continuation(prompt, max_continuations=3):
                 print(f"Last 200 chars: {response_text[-200:]}...")
                 print("=" * 50)
-                # Store this part
-                response_parts.append(response_text)
-                # Check which sections are present in this part
-                for section in required_sections:
-                    if section in response_text:
-                        completed_sections.add(section)
                 # Check if response indicates continuation is needed
                 needs_continuation = "<CONTINUE>" in response_text
                 print(f"Needs continuation: {needs_continuation}")
-                print(f"Continuation count: {continuation_count}/{max_continuations}")
-                print(f"Completed sections: {len(completed_sections)}/12")
-                print(f"Missing sections: {[s for s in required_sections if s not in completed_sections]}")
-                # Continue if <CONTINUE> is present and we haven't reached max
-                if needs_continuation and continuation_count < max_continuations:
                     continuation_count += 1
-                    logger.info(f"Continuing analysis (attempt {continuation_count}/{max_continuations})")
                     continue
                 else:
                     break
@@ -325,42 +325,29 @@ def call_claude_api_with_continuation(prompt, max_continuations=3):
         logger.error(f"Error calling Claude API: {str(e)}")
         return f"❌ Error: {str(e)}"
-    # Combine all parts and clean up
-    full_response = "\n\n".join(response_parts)
-    full_response = full_response.replace("<CONTINUE>", "")
-    # Validate completeness
-    missing_sections = []
-    duplicate_sections = []
-    section_counts = {}
-    for section in required_sections:
-        count = full_response.count(section)
-        section_counts[section] = count
-        if count == 0:
-            missing_sections.append(section)
-        elif count > 1:
-            duplicate_sections.append(section)
-    # Log validation results
-    print(f"\n=== VALIDATION RESULTS ===")
-    print(f"Total response length: {len(full_response)} characters")
-    print(f"Number of parts: {len(response_parts)}")
-    print(f"Missing sections: {missing_sections}")
-    print(f"Duplicate sections: {duplicate_sections}")
-    print(f"Section counts: {section_counts}")
     print("=" * 50)
-    # Add completion indicator
-    if len(response_parts) > 1:
-        full_response += f"\n\n[Analysis completed in {len(response_parts)} parts]"
-    # Print the entire final response for debugging
-    print(f"\n=== ENTIRE FINAL RESPONSE ===")
-    print(full_response)
-    print("=" * 50)
-    return full_response
 def call_claude_api(prompt):
     """Call Claude API directly (legacy function for backward compatibility)"""
@@ -873,30 +860,6 @@ def analyze_transcript_content(transcript_content, age, gender, slp_notes):
     # Get analysis from Claude API
     result = call_claude_api_with_continuation(prompt, max_continuations=5)
-    # Validate completeness and fix if needed
-    is_complete, missing_sections, duplicate_sections, section_counts = validate_analysis_completeness(result)
-    if not is_complete:
-        print(f"\n🔧 ATTEMPTING TO FIX INCOMPLETE ANALYSIS...")
-        print(f"Missing sections: {missing_sections}")
-        print(f"Duplicate sections: {duplicate_sections}")
-        # Try to fix missing sections
-        if missing_sections:
-            result = fix_incomplete_analysis(result, missing_sections)
-            # Re-validate after fix
-            is_complete_after_fix, missing_after_fix, duplicate_after_fix, counts_after_fix = validate_analysis_completeness(result)
-            if not is_complete_after_fix:
-                print(f"\n⚠️  ANALYSIS STILL INCOMPLETE AFTER FIX ATTEMPT")
-                print(f"Still missing: {missing_after_fix}")
-                print(f"Still duplicate: {duplicate_after_fix}")
-                result += f"\n\n⚠️  WARNING: Analysis may be incomplete. Missing sections: {missing_after_fix}"
-            else:
-                print(f"\n✅ ANALYSIS FIXED SUCCESSFULLY")
     return result
 def analyze_transcript(file, age, gender, slp_notes):
@@ -1021,14 +984,6 @@ def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
     # Get targeted analysis from Claude API
     result = call_claude_api_with_continuation(prompt, max_continuations=3)
-    # For targeted analysis, we don't need the full 12-section validation
-    # but we can still validate that the response is complete and well-structured
-    if len(result.strip()) < 500:  # Basic length check
-        print(f"\n⚠️  TARGETED ANALYSIS MAY BE INCOMPLETE")
-        print(f"Response length: {len(result)} characters")
-        result += f"\n\n⚠️  WARNING: This targeted analysis may be incomplete. Please review the results carefully."
     return result
 # Create enhanced interface with tabs
@@ -1042,7 +997,7 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
     with gr.Tabs():
         # Tab 1: Basic Analysis
-        with gr.Tab("📊 Basic Analysis"):
             with gr.Row():
                 with gr.Column():
                     gr.Markdown("### Input Options")
@@ -1467,20 +1422,6 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
         """
         result = call_claude_api_with_continuation(prompt, max_continuations=2)
-        # For quick analysis, validate that all selected questions were addressed
-        if questions and len(questions) > 0:
-            missing_questions = []
-            for question in questions:
-                # Check if the question was addressed (basic check)
-                if question.lower() not in result.lower():
-                    missing_questions.append(question)
-            if missing_questions:
-                print(f"\n⚠️  QUICK ANALYSIS MAY BE INCOMPLETE")
-                print(f"Missing questions: {missing_questions}")
-                result += f"\n\n⚠️  WARNING: Some selected questions may not have been fully addressed: {missing_questions}"
         progress_msg = "✅ Quick analysis completed" if "[Analysis completed in" in result else "🔄 Quick analysis in progress..."
         return result, progress_msg

 import re
 import tempfile
 import numpy as np
+import time
 # Configure logging
 logging.basicConfig(level=logging.INFO)
 else:
     logger.warning("Claude API key not found - using demo mode")
+def segment_response_by_sections(response_text):
+    """Segment response by section titles and return a dictionary of sections"""
     required_sections = [
         "1. SPEECH FACTORS",
         "2. LANGUAGE SKILLS ASSESSMENT",
         "12. PROGNOSIS AND SUMMARY"
     ]
+    sections = {}
+    current_section = None
+    current_content = []
+    lines = response_text.split('\n')
+    for line in lines:
+        # Check if this line is a section header
+        is_section_header = False
+        for section in required_sections:
+            if section in line:
+                # Save previous section if exists
+                if current_section and current_content:
+                    sections[current_section] = '\n'.join(current_content).strip()
+                # Start new section
+                current_section = section
+                current_content = []
+                is_section_header = True
+                break
+        # If not a section header, add to current section content
+        if not is_section_header and current_section:
+            current_content.append(line)
+    # Save the last section
+    if current_section and current_content:
+        sections[current_section] = '\n'.join(current_content).strip()
+    return sections
+def combine_sections_smartly(sections_dict):
+    """Combine sections in the correct order without duplicates"""
+    required_sections = [
+        "1. SPEECH FACTORS",
+        "2. LANGUAGE SKILLS ASSESSMENT",
+        "3. COMPLEX SENTENCE ANALYSIS",
+        "4. FIGURATIVE LANGUAGE ANALYSIS",
+        "5. PRAGMATIC LANGUAGE ASSESSMENT",
+        "6. VOCABULARY AND SEMANTIC ANALYSIS",
+        "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
+        "8. COGNITIVE-LINGUISTIC FACTORS",
+        "9. FLUENCY AND RHYTHM ANALYSIS",
+        "10. QUANTITATIVE METRICS",
+        "11. CLINICAL IMPLICATIONS",
+        "12. PROGNOSIS AND SUMMARY"
+    ]
+    combined_parts = []
+    combined_parts.append("COMPREHENSIVE CASL ANALYSIS")
+    combined_parts.append("")
+    for section in required_sections:
+        if section in sections_dict:
+            combined_parts.append(section)
+            combined_parts.append("")
+            combined_parts.append(sections_dict[section])
+            combined_parts.append("")
+    return '\n'.join(combined_parts)
+def call_claude_api_with_continuation(prompt, max_continuations=0):
+    """Call Claude API with smart continuation system - unlimited continuations until complete"""
     if not ANTHROPIC_API_KEY:
         return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
         "12. PROGNOSIS AND SUMMARY"
     ]
+    # Safety limits to prevent infinite loops
+    MAX_CONTINUATIONS = 20  # Maximum 20 API calls
+    MAX_TIME_MINUTES = 10   # Maximum 10 minutes total
+    MIN_PROGRESS_PER_CALL = 1  # Must add at least 1 new section per call
     try:
+        all_sections = {}  # Store all sections found across all parts
         continuation_count = 0
+        start_time = time.time()
+        last_section_count = 0  # Track progress between calls
         # Add continuation instruction to original prompt
         initial_prompt = prompt + "\n\nIMPORTANT: If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Ensure you complete all sections of the analysis."
+        while True:  # Unlimited continuations until complete
             if continuation_count == 0:
                 current_prompt = initial_prompt
             else:
                 # For continuations, provide context about what was already covered
+                missing_sections = [s for s in required_sections if s not in all_sections]
+                missing_text = "\n".join([f"- {section}" for section in missing_sections])
+                current_prompt = prompt + f"\n\nContinue from where you left off (continuation {continuation_count + 1}):\n\nIMPORTANT: The following sections are still missing. Please provide ONLY these missing sections:\n\n{missing_text}\n\nDo not repeat any sections that are already complete. Focus only on the missing sections listed above."
             headers = {
                 "Content-Type": "application/json",
                 print(f"Last 200 chars: {response_text[-200:]}...")
                 print("=" * 50)
+                # Segment this part and add new sections to our collection
+                part_sections = segment_response_by_sections(response_text)
+                for section, content in part_sections.items():
+                    if section not in all_sections:  # Only add if not already present
+                        all_sections[section] = content
+                        print(f"Added section: {section}")
+                    else:
+                        print(f"Skipped duplicate section: {section}")
+                # Check completion status
+                completed_sections = len(all_sections)
+                missing_sections = [s for s in required_sections if s not in all_sections]
+                print(f"Completed sections: {completed_sections}/12")
+                print(f"Missing sections: {missing_sections}")
                 # Check if response indicates continuation is needed
                 needs_continuation = "<CONTINUE>" in response_text
                 print(f"Needs continuation: {needs_continuation}")
+                print(f"Continuation count: {continuation_count}")
+                # Safety checks to prevent infinite loops
+                current_time = time.time()
+                elapsed_minutes = (current_time - start_time) / 60
+                current_section_count = len(all_sections)
+                progress_made = current_section_count - last_section_count
+                # Check if we're making progress
+                if continuation_count > 0 and progress_made < MIN_PROGRESS_PER_CALL:
+                    logger.warning(f"No progress made in last call (added {progress_made} sections). Stopping to prevent infinite loop.")
+                    break
+                # Check time limit
+                if elapsed_minutes > MAX_TIME_MINUTES:
+                    logger.warning(f"Time limit exceeded ({elapsed_minutes:.1f} minutes). Stopping to prevent excessive API usage.")
+                    break
+                # Check continuation limit
+                if continuation_count >= MAX_CONTINUATIONS:
+                    logger.warning(f"Continuation limit reached ({MAX_CONTINUATIONS} calls). Stopping to prevent excessive API usage.")
+                    break
+                # Continue if <CONTINUE> is present and safety checks pass
+                if needs_continuation:
                     continuation_count += 1
+                    last_section_count = current_section_count
+                    logger.info(f"Continuing analysis (attempt {continuation_count}/{MAX_CONTINUATIONS}, {elapsed_minutes:.1f} minutes elapsed)")
                     continue
                 else:
                     break
         logger.error(f"Error calling Claude API: {str(e)}")
         return f"❌ Error: {str(e)}"
+    # Combine all sections in the correct order
+    final_response = combine_sections_smartly(all_sections)
+    # Log final results
+    print(f"\n=== FINAL SMART VALIDATION ===")
+    print(f"Total sections found: {len(all_sections)}")
+    print(f"All sections present: {len(all_sections) == 12}")
+    print(f"Missing sections: {[s for s in required_sections if s not in all_sections]}")
+    print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")
+    print(f"Total API calls: {continuation_count + 1}")
     print("=" * 50)
+    # Add completion indicator with safety info
+    if continuation_count > 0:
+        final_response += f"\n\n[Analysis completed in {continuation_count + 1} parts over {(time.time() - start_time) / 60:.1f} minutes]"
+    # Add warning if incomplete due to safety limits
+    if len(all_sections) < 12:
+        missing_sections = [s for s in required_sections if s not in all_sections]
+        final_response += f"\n\n⚠️ WARNING: Analysis incomplete due to safety limits. Missing sections: {', '.join(missing_sections)}"
+        final_response += f"\nConsider running analysis again or increasing limits if needed."
+    return final_response
 def call_claude_api(prompt):
     """Call Claude API directly (legacy function for backward compatibility)"""
     # Get analysis from Claude API
     result = call_claude_api_with_continuation(prompt, max_continuations=5)
     return result
 def analyze_transcript(file, age, gender, slp_notes):
     # Get targeted analysis from Claude API
     result = call_claude_api_with_continuation(prompt, max_continuations=3)
     return result
 # Create enhanced interface with tabs
     with gr.Tabs():
         # Tab 1: Basic Analysis
+        with gr.Tab("�� Basic Analysis"):
             with gr.Row():
                 with gr.Column():
                     gr.Markdown("### Input Options")
         """
         result = call_claude_api_with_continuation(prompt, max_continuations=2)
         progress_msg = "✅ Quick analysis completed" if "[Analysis completed in" in result else "🔄 Quick analysis in progress..."
         return result, progress_msg