Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -500,6 +500,7 @@ def get_diagnosis_and_severity_from_model(symptoms_text):
|
|
500 |
raw_predicted_diagnoses.append(item['label'])
|
501 |
|
502 |
filtered_diagnoses = []
|
|
|
503 |
generic_filter_keywords = [
|
504 |
'unspecified', 'acute', 'chronic', 'use', 'status', 'other', 'not elsewhere classified',
|
505 |
'no diagnosis', 'history of', 'finding', 'problem', 'syndrome', 'disease',
|
@@ -507,31 +508,60 @@ def get_diagnosis_and_severity_from_model(symptoms_text):
|
|
507 |
'sequelae', 'factor', 'manifestation', 'procedure', 'examination', 'observation',
|
508 |
'symptoms', 'sign', 'unconfirmed', 'type', 'group', 'normal', 'unknown', 'level',
|
509 |
'positive', 'negative', 'patient', 'value', 'test', 'result', 'diagnosis',
|
510 |
-
'kidney', 'stage', 'without', 'essential',
|
511 |
-
'organ', 'function', 'system', 'body', 'region',
|
512 |
'clinical', 'consideration', 'presence', 'absence', 'mild', 'moderate', 'severe',
|
513 |
'manifesting', 'affecting', 'affect', 'area', 'part', 'general', 'specific',
|
514 |
'diagnosis of', 'history of', 'finding of', 'problem of', 'type of', 'group of',
|
515 |
-
'unlikely', 'possible', 'likely',
|
516 |
-
'symptom', 'sign', 'pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage'
|
|
|
|
|
|
|
|
|
|
|
|
|
517 |
]
|
|
|
518 |
for diagnosis_label in raw_predicted_diagnoses:
|
519 |
-
lower_diag = diagnosis_label.lower()
|
520 |
is_generic = False
|
|
|
|
|
521 |
for generic_kw in generic_filter_keywords:
|
522 |
-
|
|
|
523 |
is_generic = True
|
524 |
break
|
525 |
-
if
|
|
|
|
|
|
|
|
|
526 |
is_generic = True
|
527 |
-
if
|
|
|
|
|
|
|
|
|
|
|
528 |
is_generic = True
|
529 |
-
if
|
530 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
|
532 |
-
filtered_diagnoses = list(dict.fromkeys(filtered_diagnoses))
|
533 |
if len(filtered_diagnoses) > 5:
|
534 |
-
filtered_diagnoses = filtered_diagnoses[:5]
|
535 |
|
536 |
if not filtered_diagnoses:
|
537 |
return "Overall Symptom Severity (AI Assessment): Undetermined Severity", []
|
|
|
500 |
raw_predicted_diagnoses.append(item['label'])
|
501 |
|
502 |
filtered_diagnoses = []
|
503 |
+
# EXPANDED AND REFINED GENERIC FILTER KEYWORDS
|
504 |
generic_filter_keywords = [
|
505 |
'unspecified', 'acute', 'chronic', 'use', 'status', 'other', 'not elsewhere classified',
|
506 |
'no diagnosis', 'history of', 'finding', 'problem', 'syndrome', 'disease',
|
|
|
508 |
'sequelae', 'factor', 'manifestation', 'procedure', 'examination', 'observation',
|
509 |
'symptoms', 'sign', 'unconfirmed', 'type', 'group', 'normal', 'unknown', 'level',
|
510 |
'positive', 'negative', 'patient', 'value', 'test', 'result', 'diagnosis',
|
511 |
+
'kidney', 'stage', 'without', 'essential', 'with', 'due to', 'related to', 'of',
|
512 |
+
'organ', 'function', 'system', 'body', 'region',
|
513 |
'clinical', 'consideration', 'presence', 'absence', 'mild', 'moderate', 'severe',
|
514 |
'manifesting', 'affecting', 'affect', 'area', 'part', 'general', 'specific',
|
515 |
'diagnosis of', 'history of', 'finding of', 'problem of', 'type of', 'group of',
|
516 |
+
'unlikely', 'possible', 'likely', 'primary', 'secondary', 'and', 'or', 'by', 'for', 'in', 'on',
|
517 |
+
'symptom', 'sign', 'pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage',
|
518 |
+
# Additional common non-diagnostic terms from ICD or general medical language
|
519 |
+
'i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', # Roman numerals
|
520 |
+
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', # Single letters
|
521 |
+
'longterm', 'shortterm', 'controlled', 'uncontrolled', 'recurrent', 'intermittent', 'persistent',
|
522 |
+
'follow-up', 'observation', 'screening', 'encounter', 'admission', 'discharge',
|
523 |
+
'acute on chronic', 'other specified', 'not otherwise specified'
|
524 |
]
|
525 |
+
|
526 |
for diagnosis_label in raw_predicted_diagnoses:
|
527 |
+
lower_diag = diagnosis_label.lower().strip()
|
528 |
is_generic = False
|
529 |
+
|
530 |
+
# Rule 1: Check against expanded generic filter keywords
|
531 |
for generic_kw in generic_filter_keywords:
|
532 |
+
# Use regex for whole word matching to avoid partial matches (e.g., 'use' matching 'house')
|
533 |
+
if re.fullmatch(r'\b' + re.escape(generic_kw) + r'\b', lower_diag):
|
534 |
is_generic = True
|
535 |
break
|
536 |
+
if is_generic:
|
537 |
+
continue # Skip to next diagnosis if it's a generic keyword
|
538 |
+
|
539 |
+
# Rule 2: Filter out very short numerical or alphanumeric strings (e.g., 'ii', 'a1')
|
540 |
+
if len(lower_diag) <= 2 and (lower_diag.replace('.', '').isdigit() or lower_diag.isalnum()):
|
541 |
is_generic = True
|
542 |
+
if is_generic:
|
543 |
+
continue
|
544 |
+
|
545 |
+
# Rule 3: Filter out terms that are purely numerical codes (e.g., '250.00', 'E11.9')
|
546 |
+
# This is a heuristic, as some numbers might be part of a valid diagnosis
|
547 |
+
if re.fullmatch(r'[\d\.]+', lower_diag) and len(lower_diag) < 7: # e.g., "250.00" or "E11.9"
|
548 |
is_generic = True
|
549 |
+
if is_generic:
|
550 |
+
continue
|
551 |
+
|
552 |
+
# Rule 4: Filter out terms that are single words and are very common, non-specific symptoms
|
553 |
+
# (already covered by generic_filter_keywords, but an explicit check for robustness)
|
554 |
+
if len(lower_diag.split()) == 1 and lower_diag in ['pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage']:
|
555 |
+
is_generic = True
|
556 |
+
if is_generic:
|
557 |
+
continue
|
558 |
+
|
559 |
+
# If none of the above rules flagged it as generic, add to filtered list
|
560 |
+
filtered_diagnoses.append(diagnosis_label)
|
561 |
|
562 |
+
filtered_diagnoses = list(dict.fromkeys(filtered_diagnoses)) # Remove duplicates
|
563 |
if len(filtered_diagnoses) > 5:
|
564 |
+
filtered_diagnoses = filtered_diagnoses[:5] # Limit to top 5
|
565 |
|
566 |
if not filtered_diagnoses:
|
567 |
return "Overall Symptom Severity (AI Assessment): Undetermined Severity", []
|