Spaces:
Sleeping
Sleeping
after fixing bugs
Browse files
utils.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
from fuzzywuzzy import fuzz
|
3 |
|
4 |
def extract_keyword(text: str, symptoms: list = None) -> str:
|
@@ -13,8 +13,10 @@ def extract_keyword(text: str, symptoms: list = None) -> str:
|
|
13 |
words = question.split()
|
14 |
if not words:
|
15 |
return "Unknown"
|
|
|
16 |
# Common words to skip
|
17 |
-
common_words = {'what', 'is', 'are', 'how', 'why', 'can', 'do', 'does', 'i', 'have', 'my', 'a', 'an', 'the', 'in', 'of', 'and', 'or', 'for', 'with', 'from', 'about', 'some', 'any', 'this', 'that', 'there', 'be', 'to', 'me', 'am', 'feel', 'feeling', 'experiencing', 'symptoms', 'issue', 'problem', 'cause', 'causes'}
|
|
|
18 |
# Fuzzy matching against mb_symptoms (if provided)
|
19 |
if symptoms:
|
20 |
best_match = None
|
@@ -31,10 +33,12 @@ def extract_keyword(text: str, symptoms: list = None) -> str:
|
|
31 |
highest_score = score
|
32 |
if best_match:
|
33 |
return best_match.capitalize()
|
|
|
34 |
# Fallback: pick the first non-common word longer than 3 characters
|
35 |
for word in words:
|
36 |
if word.lower() not in common_words and len(word) > 3:
|
37 |
return word.capitalize()
|
|
|
38 |
return words[0].capitalize() if words else "Unknown"
|
39 |
else:
|
40 |
# For symptoms or diseases, take the first word (or primary symptom if it's a list)
|
|
|
1 |
+
import re
|
2 |
from fuzzywuzzy import fuzz
|
3 |
|
4 |
def extract_keyword(text: str, symptoms: list = None) -> str:
|
|
|
13 |
words = question.split()
|
14 |
if not words:
|
15 |
return "Unknown"
|
16 |
+
|
17 |
# Common words to skip
|
18 |
+
common_words = {'what', 'is', 'are', 'how', 'why', 'can', 'do', 'does', 'i', 'have', 'my', 'a', 'an', 'the', 'in', 'of', 'and', 'or', 'for', 'with', 'from', 'about', 'some', 'any', 'this', 'that', 'there', 'be', 'to', 'me', 'am', 'feel', 'feeling', 'experiencing', 'symptoms', 'issue', 'problem', 'cause', 'causes', 'having', 'severe'} # Added 'having', 'severe'
|
19 |
+
|
20 |
# Fuzzy matching against mb_symptoms (if provided)
|
21 |
if symptoms:
|
22 |
best_match = None
|
|
|
33 |
highest_score = score
|
34 |
if best_match:
|
35 |
return best_match.capitalize()
|
36 |
+
|
37 |
# Fallback: pick the first non-common word longer than 3 characters
|
38 |
for word in words:
|
39 |
if word.lower() not in common_words and len(word) > 3:
|
40 |
return word.capitalize()
|
41 |
+
|
42 |
return words[0].capitalize() if words else "Unknown"
|
43 |
else:
|
44 |
# For symptoms or diseases, take the first word (or primary symptom if it's a list)
|