Spaces:
Build error
Build error
Update app.py
Browse files
app.py
CHANGED
|
@@ -35,7 +35,7 @@ class Agent1:
|
|
| 35 |
self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
|
| 36 |
self.conjunctions = set(["and", "or"])
|
| 37 |
self.pronouns = set(["it", "its", "they", "their", "them", "he", "his", "him", "she", "her", "hers"])
|
| 38 |
-
self.context = {}
|
| 39 |
|
| 40 |
def is_question(self, text: str) -> bool:
|
| 41 |
words = word_tokenize(text.lower())
|
|
@@ -58,13 +58,11 @@ class Agent1:
|
|
| 58 |
if len(questions) < 2:
|
| 59 |
return questions
|
| 60 |
|
| 61 |
-
# Find the subject in the first question
|
| 62 |
subject = self.find_subject(questions[0])
|
| 63 |
|
| 64 |
if not subject:
|
| 65 |
return questions
|
| 66 |
|
| 67 |
-
# Replace pronouns in subsequent questions
|
| 68 |
for i in range(1, len(questions)):
|
| 69 |
words = word_tokenize(questions[i])
|
| 70 |
for j, word in enumerate(words):
|
|
@@ -91,60 +89,59 @@ class Agent1:
|
|
| 91 |
if self.is_question(' '.join(current_question)):
|
| 92 |
questions.append(' '.join(current_question))
|
| 93 |
|
| 94 |
-
# If no questions identified, return the original input
|
| 95 |
if not questions:
|
| 96 |
return [user_input]
|
| 97 |
|
| 98 |
-
# Handle pronoun replacement
|
| 99 |
questions = self.replace_pronoun(questions)
|
| 100 |
|
| 101 |
return questions
|
| 102 |
|
| 103 |
def update_context(self, query: str):
|
| 104 |
tokens = nltk.pos_tag(word_tokenize(query))
|
| 105 |
-
|
| 106 |
current_phrase = []
|
| 107 |
|
| 108 |
for word, tag in tokens:
|
| 109 |
-
if tag.startswith('NN') or tag.startswith('JJ')
|
| 110 |
current_phrase.append(word)
|
| 111 |
else:
|
| 112 |
if current_phrase:
|
| 113 |
-
|
| 114 |
current_phrase = []
|
| 115 |
|
| 116 |
if current_phrase:
|
| 117 |
-
|
| 118 |
|
| 119 |
-
if
|
| 120 |
-
self.context['main_topic'] =
|
| 121 |
-
self.context['related_topics'] =
|
| 122 |
|
| 123 |
def apply_context(self, query: str) -> str:
|
| 124 |
words = word_tokenize(query.lower())
|
| 125 |
|
| 126 |
-
# Check if the query is short, contains pronouns, or doesn't contain the main topic
|
| 127 |
if (len(words) <= 5 or
|
| 128 |
any(word in self.pronouns for word in words) or
|
| 129 |
(self.context.get('main_topic') and self.context['main_topic'].lower() not in query.lower())):
|
| 130 |
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
return query
|
| 142 |
|
| 143 |
def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:
|
| 144 |
-
# First, update context with the new input
|
| 145 |
self.update_context(user_input)
|
| 146 |
-
|
| 147 |
-
# Then apply context and split queries
|
| 148 |
contextualized_input = self.apply_context(user_input)
|
| 149 |
queries = self.rephrase_and_split(contextualized_input)
|
| 150 |
print("Identified queries:", queries)
|
|
|
|
| 35 |
self.question_words = set(["what", "when", "where", "who", "whom", "which", "whose", "why", "how"])
|
| 36 |
self.conjunctions = set(["and", "or"])
|
| 37 |
self.pronouns = set(["it", "its", "they", "their", "them", "he", "his", "him", "she", "her", "hers"])
|
| 38 |
+
self.context = {}
|
| 39 |
|
| 40 |
def is_question(self, text: str) -> bool:
|
| 41 |
words = word_tokenize(text.lower())
|
|
|
|
| 58 |
if len(questions) < 2:
|
| 59 |
return questions
|
| 60 |
|
|
|
|
| 61 |
subject = self.find_subject(questions[0])
|
| 62 |
|
| 63 |
if not subject:
|
| 64 |
return questions
|
| 65 |
|
|
|
|
| 66 |
for i in range(1, len(questions)):
|
| 67 |
words = word_tokenize(questions[i])
|
| 68 |
for j, word in enumerate(words):
|
|
|
|
| 89 |
if self.is_question(' '.join(current_question)):
|
| 90 |
questions.append(' '.join(current_question))
|
| 91 |
|
|
|
|
| 92 |
if not questions:
|
| 93 |
return [user_input]
|
| 94 |
|
|
|
|
| 95 |
questions = self.replace_pronoun(questions)
|
| 96 |
|
| 97 |
return questions
|
| 98 |
|
| 99 |
def update_context(self, query: str):
|
| 100 |
tokens = nltk.pos_tag(word_tokenize(query))
|
| 101 |
+
noun_phrases = []
|
| 102 |
current_phrase = []
|
| 103 |
|
| 104 |
for word, tag in tokens:
|
| 105 |
+
if tag.startswith('NN') or tag.startswith('JJ'):
|
| 106 |
current_phrase.append(word)
|
| 107 |
else:
|
| 108 |
if current_phrase:
|
| 109 |
+
noun_phrases.append(' '.join(current_phrase))
|
| 110 |
current_phrase = []
|
| 111 |
|
| 112 |
if current_phrase:
|
| 113 |
+
noun_phrases.append(' '.join(current_phrase))
|
| 114 |
|
| 115 |
+
if noun_phrases:
|
| 116 |
+
self.context['main_topic'] = noun_phrases[0]
|
| 117 |
+
self.context['related_topics'] = noun_phrases[1:]
|
| 118 |
|
| 119 |
def apply_context(self, query: str) -> str:
|
| 120 |
words = word_tokenize(query.lower())
|
| 121 |
|
|
|
|
| 122 |
if (len(words) <= 5 or
|
| 123 |
any(word in self.pronouns for word in words) or
|
| 124 |
(self.context.get('main_topic') and self.context['main_topic'].lower() not in query.lower())):
|
| 125 |
|
| 126 |
+
new_query_parts = []
|
| 127 |
+
main_topic_added = False
|
| 128 |
+
|
| 129 |
+
for word in words:
|
| 130 |
+
if word in self.pronouns and self.context.get('main_topic'):
|
| 131 |
+
new_query_parts.append(self.context['main_topic'])
|
| 132 |
+
main_topic_added = True
|
| 133 |
+
else:
|
| 134 |
+
new_query_parts.append(word)
|
| 135 |
+
|
| 136 |
+
if not main_topic_added and self.context.get('main_topic'):
|
| 137 |
+
new_query_parts.append(f"of {self.context['main_topic']}")
|
| 138 |
+
|
| 139 |
+
query = ' '.join(new_query_parts)
|
| 140 |
+
|
| 141 |
return query
|
| 142 |
|
| 143 |
def process(self, user_input: str) -> tuple[List[str], Dict[str, List[Dict[str, str]]]]:
|
|
|
|
| 144 |
self.update_context(user_input)
|
|
|
|
|
|
|
| 145 |
contextualized_input = self.apply_context(user_input)
|
| 146 |
queries = self.rephrase_and_split(contextualized_input)
|
| 147 |
print("Identified queries:", queries)
|