SreekarB commited on
Commit
f30897e
·
verified ·
1 Parent(s): b5be089

Upload simple_casl_app.py

Browse files
Files changed (1) hide show
  1. simple_casl_app.py +133 -192
simple_casl_app.py CHANGED
@@ -6,6 +6,7 @@ import requests
6
  import re
7
  import tempfile
8
  import numpy as np
 
9
 
10
  # Configure logging
11
  logging.basicConfig(level=logging.INFO)
@@ -106,8 +107,8 @@ if ANTHROPIC_API_KEY:
106
  else:
107
  logger.warning("Claude API key not found - using demo mode")
108
 
109
- def validate_analysis_completeness(response_text):
110
- """Validate that all 12 sections are present in the analysis exactly once"""
111
  required_sections = [
112
  "1. SPEECH FACTORS",
113
  "2. LANGUAGE SKILLS ASSESSMENT",
@@ -123,107 +124,69 @@ def validate_analysis_completeness(response_text):
123
  "12. PROGNOSIS AND SUMMARY"
124
  ]
125
 
126
- missing_sections = []
127
- duplicate_sections = []
128
- section_counts = {}
129
 
130
- for section in required_sections:
131
- count = response_text.count(section)
132
- section_counts[section] = count
133
- if count == 0:
134
- missing_sections.append(section)
135
- elif count > 1:
136
- duplicate_sections.append(section)
137
-
138
- # Log detailed validation results
139
- print(f"\n=== COMPREHENSIVE VALIDATION ===")
140
- print(f"Total response length: {len(response_text)} characters")
141
- print(f"Missing sections: {missing_sections}")
142
- print(f"Duplicate sections: {duplicate_sections}")
143
- print(f"Section counts: {section_counts}")
144
-
145
- if missing_sections:
146
- print(f"\n⚠️ MISSING SECTIONS: {missing_sections}")
147
- return False, missing_sections, duplicate_sections, section_counts
148
- elif duplicate_sections:
149
- print(f"\n⚠️ DUPLICATE SECTIONS: {duplicate_sections}")
150
- return False, missing_sections, duplicate_sections, section_counts
151
- else:
152
- print(f"\n✅ ALL 12 SECTIONS PRESENT EXACTLY ONCE")
153
- return True, missing_sections, duplicate_sections, section_counts
 
 
154
 
155
- def fix_incomplete_analysis(response_text, missing_sections):
156
- """Attempt to fix incomplete analysis by requesting missing sections"""
157
- if not missing_sections:
158
- return response_text
 
 
 
 
 
 
 
 
 
 
 
 
159
 
160
- if not ANTHROPIC_API_KEY:
161
- return response_text + "\n\n❌ Cannot fix incomplete analysis - API key not configured"
 
162
 
163
- try:
164
- # Create a focused prompt for missing sections
165
- missing_sections_text = "\n".join([f"- {section}" for section in missing_sections])
166
-
167
- fix_prompt = f"""
168
- The following sections are missing from the CASL analysis. Please provide ONLY these missing sections:
169
-
170
- {missing_sections_text}
171
-
172
- IMPORTANT:
173
- - Provide ONLY the missing sections listed above
174
- - Do not repeat any sections that are already present
175
- - Use the exact section headers as shown above
176
- - Make each section comprehensive and detailed
177
- - Ensure clinical accuracy and appropriate depth for SLP assessment
178
- """
179
-
180
- headers = {
181
- "Content-Type": "application/json",
182
- "x-api-key": ANTHROPIC_API_KEY,
183
- "anthropic-version": "2023-06-01"
184
- }
185
-
186
- data = {
187
- "model": "claude-3-5-sonnet-20241022",
188
- "max_tokens": 4096,
189
- "messages": [
190
- {
191
- "role": "user",
192
- "content": fix_prompt
193
- }
194
- ]
195
- }
196
-
197
- response = requests.post(
198
- "https://api.anthropic.com/v1/messages",
199
- headers=headers,
200
- json=data,
201
- timeout=90
202
- )
203
-
204
- if response.status_code == 200:
205
- response_json = response.json()
206
- fix_text = response_json['content'][0]['text']
207
-
208
- # Combine original response with fix
209
- complete_response = response_text + "\n\n" + fix_text
210
-
211
- print(f"\n=== FIXED INCOMPLETE ANALYSIS ===")
212
- print(f"Added missing sections: {missing_sections}")
213
- print(f"Fix text length: {len(fix_text)} characters")
214
- print("=" * 50)
215
-
216
- return complete_response
217
- else:
218
- logger.error(f"Error fixing incomplete analysis: {response.status_code}")
219
- return response_text + f"\n\n❌ Error fixing incomplete analysis: {response.status_code}"
220
-
221
- except Exception as e:
222
- logger.error(f"Error in fix_incomplete_analysis: {str(e)}")
223
- return response_text + f"\n\n❌ Error fixing incomplete analysis: {str(e)}"
224
 
225
- def call_claude_api_with_continuation(prompt, max_continuations=3):
226
- """Call Claude API with continuation prompting to ensure complete responses"""
227
  if not ANTHROPIC_API_KEY:
228
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
229
 
@@ -243,20 +206,29 @@ def call_claude_api_with_continuation(prompt, max_continuations=3):
243
  "12. PROGNOSIS AND SUMMARY"
244
  ]
245
 
 
 
 
 
 
246
  try:
247
- response_parts = [] # Store each part as a separate item
248
  continuation_count = 0
249
- completed_sections = set() # Track which sections have been completed
 
250
 
251
  # Add continuation instruction to original prompt
252
  initial_prompt = prompt + "\n\nIMPORTANT: If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Ensure you complete all sections of the analysis."
253
 
254
- while continuation_count <= max_continuations:
255
  if continuation_count == 0:
256
  current_prompt = initial_prompt
257
  else:
258
  # For continuations, provide context about what was already covered
259
- current_prompt = prompt + f"\n\nContinue from where you left off (continuation {continuation_count + 1} of {max_continuations}):\n\nIMPORTANT: Do not repeat what you've already written. Continue with the next section or complete any unfinished sections. If you're done, do not include <CONTINUE>. Provide the remaining analysis sections. Make sure to complete ALL 12 sections of the analysis."
 
 
 
260
 
261
  headers = {
262
  "Content-Type": "application/json",
@@ -294,26 +266,54 @@ def call_claude_api_with_continuation(prompt, max_continuations=3):
294
  print(f"Last 200 chars: {response_text[-200:]}...")
295
  print("=" * 50)
296
 
297
- # Store this part
298
- response_parts.append(response_text)
 
 
 
 
 
 
299
 
300
- # Check which sections are present in this part
301
- for section in required_sections:
302
- if section in response_text:
303
- completed_sections.add(section)
 
 
304
 
305
  # Check if response indicates continuation is needed
306
  needs_continuation = "<CONTINUE>" in response_text
307
 
308
  print(f"Needs continuation: {needs_continuation}")
309
- print(f"Continuation count: {continuation_count}/{max_continuations}")
310
- print(f"Completed sections: {len(completed_sections)}/12")
311
- print(f"Missing sections: {[s for s in required_sections if s not in completed_sections]}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
312
 
313
- # Continue if <CONTINUE> is present and we haven't reached max
314
- if needs_continuation and continuation_count < max_continuations:
315
  continuation_count += 1
316
- logger.info(f"Continuing analysis (attempt {continuation_count}/{max_continuations})")
 
317
  continue
318
  else:
319
  break
@@ -325,42 +325,29 @@ def call_claude_api_with_continuation(prompt, max_continuations=3):
325
  logger.error(f"Error calling Claude API: {str(e)}")
326
  return f"❌ Error: {str(e)}"
327
 
328
- # Combine all parts and clean up
329
- full_response = "\n\n".join(response_parts)
330
- full_response = full_response.replace("<CONTINUE>", "")
331
 
332
- # Validate completeness
333
- missing_sections = []
334
- duplicate_sections = []
335
- section_counts = {}
336
-
337
- for section in required_sections:
338
- count = full_response.count(section)
339
- section_counts[section] = count
340
- if count == 0:
341
- missing_sections.append(section)
342
- elif count > 1:
343
- duplicate_sections.append(section)
344
-
345
- # Log validation results
346
- print(f"\n=== VALIDATION RESULTS ===")
347
- print(f"Total response length: {len(full_response)} characters")
348
- print(f"Number of parts: {len(response_parts)}")
349
- print(f"Missing sections: {missing_sections}")
350
- print(f"Duplicate sections: {duplicate_sections}")
351
- print(f"Section counts: {section_counts}")
352
  print("=" * 50)
353
 
354
- # Add completion indicator
355
- if len(response_parts) > 1:
356
- full_response += f"\n\n[Analysis completed in {len(response_parts)} parts]"
357
 
358
- # Print the entire final response for debugging
359
- print(f"\n=== ENTIRE FINAL RESPONSE ===")
360
- print(full_response)
361
- print("=" * 50)
 
362
 
363
- return full_response
364
 
365
  def call_claude_api(prompt):
366
  """Call Claude API directly (legacy function for backward compatibility)"""
@@ -873,30 +860,6 @@ def analyze_transcript_content(transcript_content, age, gender, slp_notes):
873
 
874
  # Get analysis from Claude API
875
  result = call_claude_api_with_continuation(prompt, max_continuations=5)
876
-
877
- # Validate completeness and fix if needed
878
- is_complete, missing_sections, duplicate_sections, section_counts = validate_analysis_completeness(result)
879
-
880
- if not is_complete:
881
- print(f"\n🔧 ATTEMPTING TO FIX INCOMPLETE ANALYSIS...")
882
- print(f"Missing sections: {missing_sections}")
883
- print(f"Duplicate sections: {duplicate_sections}")
884
-
885
- # Try to fix missing sections
886
- if missing_sections:
887
- result = fix_incomplete_analysis(result, missing_sections)
888
-
889
- # Re-validate after fix
890
- is_complete_after_fix, missing_after_fix, duplicate_after_fix, counts_after_fix = validate_analysis_completeness(result)
891
-
892
- if not is_complete_after_fix:
893
- print(f"\n⚠️ ANALYSIS STILL INCOMPLETE AFTER FIX ATTEMPT")
894
- print(f"Still missing: {missing_after_fix}")
895
- print(f"Still duplicate: {duplicate_after_fix}")
896
- result += f"\n\n⚠️ WARNING: Analysis may be incomplete. Missing sections: {missing_after_fix}"
897
- else:
898
- print(f"\n✅ ANALYSIS FIXED SUCCESSFULLY")
899
-
900
  return result
901
 
902
  def analyze_transcript(file, age, gender, slp_notes):
@@ -1021,14 +984,6 @@ def targeted_analysis(transcript, custom_question, age, gender, slp_notes):
1021
 
1022
  # Get targeted analysis from Claude API
1023
  result = call_claude_api_with_continuation(prompt, max_continuations=3)
1024
-
1025
- # For targeted analysis, we don't need the full 12-section validation
1026
- # but we can still validate that the response is complete and well-structured
1027
- if len(result.strip()) < 500: # Basic length check
1028
- print(f"\n⚠️ TARGETED ANALYSIS MAY BE INCOMPLETE")
1029
- print(f"Response length: {len(result)} characters")
1030
- result += f"\n\n⚠️ WARNING: This targeted analysis may be incomplete. Please review the results carefully."
1031
-
1032
  return result
1033
 
1034
  # Create enhanced interface with tabs
@@ -1042,7 +997,7 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
1042
 
1043
  with gr.Tabs():
1044
  # Tab 1: Basic Analysis
1045
- with gr.Tab("📊 Basic Analysis"):
1046
  with gr.Row():
1047
  with gr.Column():
1048
  gr.Markdown("### Input Options")
@@ -1467,20 +1422,6 @@ with gr.Blocks(title="Enhanced CASL Analysis", theme=gr.themes.Soft()) as app:
1467
  """
1468
 
1469
  result = call_claude_api_with_continuation(prompt, max_continuations=2)
1470
-
1471
- # For quick analysis, validate that all selected questions were addressed
1472
- if questions and len(questions) > 0:
1473
- missing_questions = []
1474
- for question in questions:
1475
- # Check if the question was addressed (basic check)
1476
- if question.lower() not in result.lower():
1477
- missing_questions.append(question)
1478
-
1479
- if missing_questions:
1480
- print(f"\n⚠️ QUICK ANALYSIS MAY BE INCOMPLETE")
1481
- print(f"Missing questions: {missing_questions}")
1482
- result += f"\n\n⚠️ WARNING: Some selected questions may not have been fully addressed: {missing_questions}"
1483
-
1484
  progress_msg = "✅ Quick analysis completed" if "[Analysis completed in" in result else "🔄 Quick analysis in progress..."
1485
  return result, progress_msg
1486
 
 
6
  import re
7
  import tempfile
8
  import numpy as np
9
+ import time
10
 
11
  # Configure logging
12
  logging.basicConfig(level=logging.INFO)
 
107
  else:
108
  logger.warning("Claude API key not found - using demo mode")
109
 
110
+ def segment_response_by_sections(response_text):
111
+ """Segment response by section titles and return a dictionary of sections"""
112
  required_sections = [
113
  "1. SPEECH FACTORS",
114
  "2. LANGUAGE SKILLS ASSESSMENT",
 
124
  "12. PROGNOSIS AND SUMMARY"
125
  ]
126
 
127
+ sections = {}
128
+ current_section = None
129
+ current_content = []
130
 
131
+ lines = response_text.split('\n')
132
+
133
+ for line in lines:
134
+ # Check if this line is a section header
135
+ is_section_header = False
136
+ for section in required_sections:
137
+ if section in line:
138
+ # Save previous section if exists
139
+ if current_section and current_content:
140
+ sections[current_section] = '\n'.join(current_content).strip()
141
+
142
+ # Start new section
143
+ current_section = section
144
+ current_content = []
145
+ is_section_header = True
146
+ break
147
+
148
+ # If not a section header, add to current section content
149
+ if not is_section_header and current_section:
150
+ current_content.append(line)
151
+
152
+ # Save the last section
153
+ if current_section and current_content:
154
+ sections[current_section] = '\n'.join(current_content).strip()
155
+
156
+ return sections
157
 
158
+ def combine_sections_smartly(sections_dict):
159
+ """Combine sections in the correct order without duplicates"""
160
+ required_sections = [
161
+ "1. SPEECH FACTORS",
162
+ "2. LANGUAGE SKILLS ASSESSMENT",
163
+ "3. COMPLEX SENTENCE ANALYSIS",
164
+ "4. FIGURATIVE LANGUAGE ANALYSIS",
165
+ "5. PRAGMATIC LANGUAGE ASSESSMENT",
166
+ "6. VOCABULARY AND SEMANTIC ANALYSIS",
167
+ "7. MORPHOLOGICAL AND PHONOLOGICAL ANALYSIS",
168
+ "8. COGNITIVE-LINGUISTIC FACTORS",
169
+ "9. FLUENCY AND RHYTHM ANALYSIS",
170
+ "10. QUANTITATIVE METRICS",
171
+ "11. CLINICAL IMPLICATIONS",
172
+ "12. PROGNOSIS AND SUMMARY"
173
+ ]
174
 
175
+ combined_parts = []
176
+ combined_parts.append("COMPREHENSIVE CASL ANALYSIS")
177
+ combined_parts.append("")
178
 
179
+ for section in required_sections:
180
+ if section in sections_dict:
181
+ combined_parts.append(section)
182
+ combined_parts.append("")
183
+ combined_parts.append(sections_dict[section])
184
+ combined_parts.append("")
185
+
186
+ return '\n'.join(combined_parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
+ def call_claude_api_with_continuation(prompt, max_continuations=0):
189
+ """Call Claude API with smart continuation system - unlimited continuations until complete"""
190
  if not ANTHROPIC_API_KEY:
191
  return "❌ Claude API key not configured. Please set ANTHROPIC_API_KEY environment variable."
192
 
 
206
  "12. PROGNOSIS AND SUMMARY"
207
  ]
208
 
209
+ # Safety limits to prevent infinite loops
210
+ MAX_CONTINUATIONS = 20 # Maximum 20 API calls
211
+ MAX_TIME_MINUTES = 10 # Maximum 10 minutes total
212
+ MIN_PROGRESS_PER_CALL = 1 # Must add at least 1 new section per call
213
+
214
  try:
215
+ all_sections = {} # Store all sections found across all parts
216
  continuation_count = 0
217
+ start_time = time.time()
218
+ last_section_count = 0 # Track progress between calls
219
 
220
  # Add continuation instruction to original prompt
221
  initial_prompt = prompt + "\n\nIMPORTANT: If your response is cut off or incomplete, end with <CONTINUE> to indicate more content is needed. Ensure you complete all sections of the analysis."
222
 
223
+ while True: # Unlimited continuations until complete
224
  if continuation_count == 0:
225
  current_prompt = initial_prompt
226
  else:
227
  # For continuations, provide context about what was already covered
228
+ missing_sections = [s for s in required_sections if s not in all_sections]
229
+ missing_text = "\n".join([f"- {section}" for section in missing_sections])
230
+
231
+ current_prompt = prompt + f"\n\nContinue from where you left off (continuation {continuation_count + 1}):\n\nIMPORTANT: The following sections are still missing. Please provide ONLY these missing sections:\n\n{missing_text}\n\nDo not repeat any sections that are already complete. Focus only on the missing sections listed above."
232
 
233
  headers = {
234
  "Content-Type": "application/json",
 
266
  print(f"Last 200 chars: {response_text[-200:]}...")
267
  print("=" * 50)
268
 
269
+ # Segment this part and add new sections to our collection
270
+ part_sections = segment_response_by_sections(response_text)
271
+ for section, content in part_sections.items():
272
+ if section not in all_sections: # Only add if not already present
273
+ all_sections[section] = content
274
+ print(f"Added section: {section}")
275
+ else:
276
+ print(f"Skipped duplicate section: {section}")
277
 
278
+ # Check completion status
279
+ completed_sections = len(all_sections)
280
+ missing_sections = [s for s in required_sections if s not in all_sections]
281
+
282
+ print(f"Completed sections: {completed_sections}/12")
283
+ print(f"Missing sections: {missing_sections}")
284
 
285
  # Check if response indicates continuation is needed
286
  needs_continuation = "<CONTINUE>" in response_text
287
 
288
  print(f"Needs continuation: {needs_continuation}")
289
+ print(f"Continuation count: {continuation_count}")
290
+
291
+ # Safety checks to prevent infinite loops
292
+ current_time = time.time()
293
+ elapsed_minutes = (current_time - start_time) / 60
294
+ current_section_count = len(all_sections)
295
+ progress_made = current_section_count - last_section_count
296
+
297
+ # Check if we're making progress
298
+ if continuation_count > 0 and progress_made < MIN_PROGRESS_PER_CALL:
299
+ logger.warning(f"No progress made in last call (added {progress_made} sections). Stopping to prevent infinite loop.")
300
+ break
301
+
302
+ # Check time limit
303
+ if elapsed_minutes > MAX_TIME_MINUTES:
304
+ logger.warning(f"Time limit exceeded ({elapsed_minutes:.1f} minutes). Stopping to prevent excessive API usage.")
305
+ break
306
+
307
+ # Check continuation limit
308
+ if continuation_count >= MAX_CONTINUATIONS:
309
+ logger.warning(f"Continuation limit reached ({MAX_CONTINUATIONS} calls). Stopping to prevent excessive API usage.")
310
+ break
311
 
312
+ # Continue if <CONTINUE> is present and safety checks pass
313
+ if needs_continuation:
314
  continuation_count += 1
315
+ last_section_count = current_section_count
316
+ logger.info(f"Continuing analysis (attempt {continuation_count}/{MAX_CONTINUATIONS}, {elapsed_minutes:.1f} minutes elapsed)")
317
  continue
318
  else:
319
  break
 
325
  logger.error(f"Error calling Claude API: {str(e)}")
326
  return f"❌ Error: {str(e)}"
327
 
328
+ # Combine all sections in the correct order
329
+ final_response = combine_sections_smartly(all_sections)
 
330
 
331
+ # Log final results
332
+ print(f"\n=== FINAL SMART VALIDATION ===")
333
+ print(f"Total sections found: {len(all_sections)}")
334
+ print(f"All sections present: {len(all_sections) == 12}")
335
+ print(f"Missing sections: {[s for s in required_sections if s not in all_sections]}")
336
+ print(f"Total time: {(time.time() - start_time) / 60:.1f} minutes")
337
+ print(f"Total API calls: {continuation_count + 1}")
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  print("=" * 50)
339
 
340
+ # Add completion indicator with safety info
341
+ if continuation_count > 0:
342
+ final_response += f"\n\n[Analysis completed in {continuation_count + 1} parts over {(time.time() - start_time) / 60:.1f} minutes]"
343
 
344
+ # Add warning if incomplete due to safety limits
345
+ if len(all_sections) < 12:
346
+ missing_sections = [s for s in required_sections if s not in all_sections]
347
+ final_response += f"\n\n⚠️ WARNING: Analysis incomplete due to safety limits. Missing sections: {', '.join(missing_sections)}"
348
+ final_response += f"\nConsider running analysis again or increasing limits if needed."
349
 
350
+ return final_response
351
 
352
  def call_claude_api(prompt):
353
  """Call Claude API directly (legacy function for backward compatibility)"""
 
860
 
861
  # Get analysis from Claude API
862
  result = call_claude_api_with_continuation(prompt, max_continuations=5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
863
  return result
864
 
865
  def analyze_transcript(file, age, gender, slp_notes):
 
984
 
985
  # Get targeted analysis from Claude API
986
  result = call_claude_api_with_continuation(prompt, max_continuations=3)
 
 
 
 
 
 
 
 
987
  return result
988
 
989
  # Create enhanced interface with tabs
 
997
 
998
  with gr.Tabs():
999
  # Tab 1: Basic Analysis
1000
+ with gr.Tab("�� Basic Analysis"):
1001
  with gr.Row():
1002
  with gr.Column():
1003
  gr.Markdown("### Input Options")
 
1422
  """
1423
 
1424
  result = call_claude_api_with_continuation(prompt, max_continuations=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1425
  progress_msg = "✅ Quick analysis completed" if "[Analysis completed in" in result else "🔄 Quick analysis in progress..."
1426
  return result, progress_msg
1427