Update app.py
Browse files
app.py
CHANGED
@@ -196,61 +196,82 @@ class QuestionRecommender:
|
|
196 |
def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
|
197 |
"""Find questions to recommend based on common interests."""
|
198 |
with self.driver.session() as session:
|
199 |
-
questions
|
200 |
-
|
201 |
-
|
202 |
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
COLLECT(DISTINCT interest1) as u1_interests,
|
207 |
-
COLLECT(DISTINCT interest2) as u2_interests
|
208 |
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
(
|
215 |
-
|
216 |
-
|
217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
218 |
)
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
WITH q, interests, base_score,
|
224 |
-
CASE
|
225 |
-
WHEN q.created_utc_ts IS NOT NULL
|
226 |
-
THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
|
227 |
-
ELSE base_score
|
228 |
-
END as temporal_score
|
229 |
-
RETURN DISTINCT
|
230 |
-
q.title as title,
|
231 |
-
q.body as body,
|
232 |
-
q.created_utc_ts as created_utc_ts,
|
233 |
-
q.author as author,
|
234 |
-
q.source as source,
|
235 |
-
q.correct_answer as correct_answer,
|
236 |
-
q.incorrect_answers as incorrect_answers,
|
237 |
-
q.upvotes as upvotes,
|
238 |
-
q.num_comments as num_comments,
|
239 |
-
q.subreddit as subreddit,
|
240 |
-
[i in interests | CASE
|
241 |
-
WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
|
242 |
-
ELSE i.interest.topic
|
243 |
-
END] as matching_interests,
|
244 |
-
[i in interests | CASE
|
245 |
-
WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
|
246 |
-
ELSE 'topic'
|
247 |
-
END] as interest_types,
|
248 |
-
temporal_score as relevance_score
|
249 |
-
ORDER BY temporal_score DESC
|
250 |
-
LIMIT $max_questions
|
251 |
-
""", user1=user1, user2=user2, max_questions=max_questions)
|
252 |
|
253 |
-
|
|
|
|
|
254 |
|
255 |
def format_question(q: Dict) -> str:
|
256 |
"""Format a question for display based on its source."""
|
|
|
196 |
def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
|
197 |
"""Find questions to recommend based on common interests."""
|
198 |
with self.driver.session() as session:
|
199 |
+
# First, get questions per source
|
200 |
+
questions_per_source = max_questions // 4 # Divide evenly among 4 sources
|
201 |
+
remaining_slots = max_questions % 4 # Handle any remainder
|
202 |
|
203 |
+
# Query for each source separately to ensure balanced representation
|
204 |
+
sources = ['stack_exchange', 'reddit', 'wikipedia', 'trivia']
|
205 |
+
all_questions = []
|
|
|
|
|
206 |
|
207 |
+
for source in sources:
|
208 |
+
source_questions = session.run("""
|
209 |
+
MATCH (u1:User {name: $user1})
|
210 |
+
MATCH (u2:User {name: $user2})
|
211 |
+
|
212 |
+
OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
|
213 |
+
OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
|
214 |
+
WITH u1, u2,
|
215 |
+
COLLECT(DISTINCT interest1) as u1_interests,
|
216 |
+
COLLECT(DISTINCT interest2) as u2_interests
|
217 |
+
|
218 |
+
UNWIND u1_interests + u2_interests as interest
|
219 |
+
MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
|
220 |
+
WHERE
|
221 |
+
q.author <> $user1 AND
|
222 |
+
q.author <> $user2 AND
|
223 |
+
q.source = $source AND
|
224 |
+
(
|
225 |
+
(interest IN u1_interests AND interest IN u2_interests) OR
|
226 |
+
(interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
|
227 |
+
(interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
|
228 |
+
)
|
229 |
+
WITH q, interest, type(r) as rel_type,
|
230 |
+
CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
|
231 |
+
WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
|
232 |
+
sum(interest_weight) as base_score
|
233 |
+
WITH q, interests, base_score,
|
234 |
+
CASE
|
235 |
+
WHEN q.created_utc_ts IS NOT NULL
|
236 |
+
THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
|
237 |
+
ELSE base_score
|
238 |
+
END as temporal_score
|
239 |
+
RETURN DISTINCT
|
240 |
+
q.title as title,
|
241 |
+
q.body as body,
|
242 |
+
q.created_utc_ts as created_utc_ts,
|
243 |
+
q.author as author,
|
244 |
+
q.source as source,
|
245 |
+
q.correct_answer as correct_answer,
|
246 |
+
q.incorrect_answers as incorrect_answers,
|
247 |
+
q.upvotes as upvotes,
|
248 |
+
q.num_comments as num_comments,
|
249 |
+
q.subreddit as subreddit,
|
250 |
+
[i in interests | CASE
|
251 |
+
WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
|
252 |
+
ELSE i.interest.topic
|
253 |
+
END] as matching_interests,
|
254 |
+
[i in interests | CASE
|
255 |
+
WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
|
256 |
+
ELSE 'topic'
|
257 |
+
END] as interest_types,
|
258 |
+
temporal_score as relevance_score
|
259 |
+
ORDER BY temporal_score DESC
|
260 |
+
LIMIT $limit
|
261 |
+
""",
|
262 |
+
user1=user1,
|
263 |
+
user2=user2,
|
264 |
+
source=source,
|
265 |
+
limit=questions_per_source + (1 if remaining_slots > 0 else 0)
|
266 |
)
|
267 |
+
|
268 |
+
source_results = [dict(record) for record in source_questions]
|
269 |
+
all_questions.extend(source_results)
|
270 |
+
remaining_slots = max(0, remaining_slots - 1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
271 |
|
272 |
+
# Sort all questions by relevance score for final ordering
|
273 |
+
all_questions.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
|
274 |
+
return all_questions
|
275 |
|
276 |
def format_question(q: Dict) -> str:
|
277 |
"""Format a question for display based on its source."""
|