Spaces:

Innovator89
/

medibot

Sleeping

App Files Files Community

Innovator89 commited on Jul 27

Commit

c485e44

verified ·

1 Parent(s): bfa9e66

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -12

app.py CHANGED Viewed

@@ -500,6 +500,7 @@ def get_diagnosis_and_severity_from_model(symptoms_text):
                         raw_predicted_diagnoses.append(item['label'])
         filtered_diagnoses = []
         generic_filter_keywords = [
             'unspecified', 'acute', 'chronic', 'use', 'status', 'other', 'not elsewhere classified',
             'no diagnosis', 'history of', 'finding', 'problem', 'syndrome', 'disease',
@@ -507,31 +508,60 @@ def get_diagnosis_and_severity_from_model(symptoms_text):
             'sequelae', 'factor', 'manifestation', 'procedure', 'examination', 'observation',
             'symptoms', 'sign', 'unconfirmed', 'type', 'group', 'normal', 'unknown', 'level',
             'positive', 'negative', 'patient', 'value', 'test', 'result', 'diagnosis',
-            'kidney', 'stage', 'without', 'essential',
-            'organ', 'function', 'system', 'body', 'region', 'with', 'due to', 'related to',
             'clinical', 'consideration', 'presence', 'absence', 'mild', 'moderate', 'severe',
             'manifesting', 'affecting', 'affect', 'area', 'part', 'general', 'specific',
             'diagnosis of', 'history of', 'finding of', 'problem of', 'type of', 'group of',
-            'unlikely', 'possible', 'likely',
-            'symptom', 'sign', 'pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage'
         ]
         for diagnosis_label in raw_predicted_diagnoses:
-            lower_diag = diagnosis_label.lower()
             is_generic = False
             for generic_kw in generic_filter_keywords:
-                if re.search(r'\b' + re.escape(generic_kw) + r'\b', lower_diag) and len(lower_diag.split()) <= 2:
                     is_generic = True
                     break
-            if lower_diag.replace('.', '').isdigit() and len(lower_diag.replace('.', '')) < 5:
                 is_generic = True
-            if len(lower_diag.split()) <= 2 and lower_diag in ['pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage']:
                 is_generic = True
-            if not is_generic:
-                filtered_diagnoses.append(diagnosis_label)
-        filtered_diagnoses = list(dict.fromkeys(filtered_diagnoses))
         if len(filtered_diagnoses) > 5:
-            filtered_diagnoses = filtered_diagnoses[:5]
         if not filtered_diagnoses:
             return "Overall Symptom Severity (AI Assessment): Undetermined Severity", []

                         raw_predicted_diagnoses.append(item['label'])
         filtered_diagnoses = []
+        # EXPANDED AND REFINED GENERIC FILTER KEYWORDS
         generic_filter_keywords = [
             'unspecified', 'acute', 'chronic', 'use', 'status', 'other', 'not elsewhere classified',
             'no diagnosis', 'history of', 'finding', 'problem', 'syndrome', 'disease',
             'sequelae', 'factor', 'manifestation', 'procedure', 'examination', 'observation',
             'symptoms', 'sign', 'unconfirmed', 'type', 'group', 'normal', 'unknown', 'level',
             'positive', 'negative', 'patient', 'value', 'test', 'result', 'diagnosis',
+            'kidney', 'stage', 'without', 'essential', 'with', 'due to', 'related to', 'of',
+            'organ', 'function', 'system', 'body', 'region',
             'clinical', 'consideration', 'presence', 'absence', 'mild', 'moderate', 'severe',
             'manifesting', 'affecting', 'affect', 'area', 'part', 'general', 'specific',
             'diagnosis of', 'history of', 'finding of', 'problem of', 'type of', 'group of',
+            'unlikely', 'possible', 'likely', 'primary', 'secondary', 'and', 'or', 'by', 'for', 'in', 'on',
+            'symptom', 'sign', 'pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage',
+            # Additional common non-diagnostic terms from ICD or general medical language
+            'i', 'ii', 'iii', 'iv', 'v', 'vi', 'vii', 'viii', 'ix', 'x', # Roman numerals
+            'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'j', 'k', 'l', 'm', 'n', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', # Single letters
+            'longterm', 'shortterm', 'controlled', 'uncontrolled', 'recurrent', 'intermittent', 'persistent',
+            'follow-up', 'observation', 'screening', 'encounter', 'admission', 'discharge',
+            'acute on chronic', 'other specified', 'not otherwise specified'
         ]
         for diagnosis_label in raw_predicted_diagnoses:
+            lower_diag = diagnosis_label.lower().strip()
             is_generic = False
+            # Rule 1: Check against expanded generic filter keywords
             for generic_kw in generic_filter_keywords:
+                # Use regex for whole word matching to avoid partial matches (e.g., 'use' matching 'house')
+                if re.fullmatch(r'\b' + re.escape(generic_kw) + r'\b', lower_diag):
                     is_generic = True
                     break
+            if is_generic:
+                continue # Skip to next diagnosis if it's a generic keyword
+            # Rule 2: Filter out very short numerical or alphanumeric strings (e.g., 'ii', 'a1')
+            if len(lower_diag) <= 2 and (lower_diag.replace('.', '').isdigit() or lower_diag.isalnum()):
                 is_generic = True
+            if is_generic:
+                continue
+            # Rule 3: Filter out terms that are purely numerical codes (e.g., '250.00', 'E11.9')
+            # This is a heuristic, as some numbers might be part of a valid diagnosis
+            if re.fullmatch(r'[\d\.]+', lower_diag) and len(lower_diag) < 7: # e.g., "250.00" or "E11.9"
                 is_generic = True
+            if is_generic:
+                continue
+            # Rule 4: Filter out terms that are single words and are very common, non-specific symptoms
+            # (already covered by generic_filter_keywords, but an explicit check for robustness)
+            if len(lower_diag.split()) == 1 and lower_diag in ['pain', 'fever', 'cough', 'vomiting', 'nausea', 'rash', 'headache', 'fatigue', 'diarrhea', 'sore throat', 'hemorrhage']:
+                is_generic = True
+            if is_generic:
+                continue
+            # If none of the above rules flagged it as generic, add to filtered list
+            filtered_diagnoses.append(diagnosis_label)
+        filtered_diagnoses = list(dict.fromkeys(filtered_diagnoses)) # Remove duplicates
         if len(filtered_diagnoses) > 5:
+            filtered_diagnoses = filtered_diagnoses[:5] # Limit to top 5
         if not filtered_diagnoses:
             return "Overall Symptom Severity (AI Assessment): Undetermined Severity", []