NimaKL commited on
Commit
ef3d758
·
verified ·
1 Parent(s): c08b5c8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +72 -51
app.py CHANGED
@@ -196,61 +196,82 @@ class QuestionRecommender:
196
  def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
197
  """Find questions to recommend based on common interests."""
198
  with self.driver.session() as session:
199
- questions = session.run("""
200
- MATCH (u1:User {name: $user1})
201
- MATCH (u2:User {name: $user2})
202
 
203
- OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
204
- OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
205
- WITH u1, u2,
206
- COLLECT(DISTINCT interest1) as u1_interests,
207
- COLLECT(DISTINCT interest2) as u2_interests
208
 
209
- UNWIND u1_interests + u2_interests as interest
210
- MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
211
- WHERE
212
- q.author <> $user1 AND
213
- q.author <> $user2 AND
214
- (
215
- (interest IN u1_interests AND interest IN u2_interests) OR
216
- (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
217
- (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  )
219
- WITH q, interest, type(r) as rel_type,
220
- CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
221
- WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
222
- sum(interest_weight) as base_score
223
- WITH q, interests, base_score,
224
- CASE
225
- WHEN q.created_utc_ts IS NOT NULL
226
- THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
227
- ELSE base_score
228
- END as temporal_score
229
- RETURN DISTINCT
230
- q.title as title,
231
- q.body as body,
232
- q.created_utc_ts as created_utc_ts,
233
- q.author as author,
234
- q.source as source,
235
- q.correct_answer as correct_answer,
236
- q.incorrect_answers as incorrect_answers,
237
- q.upvotes as upvotes,
238
- q.num_comments as num_comments,
239
- q.subreddit as subreddit,
240
- [i in interests | CASE
241
- WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
242
- ELSE i.interest.topic
243
- END] as matching_interests,
244
- [i in interests | CASE
245
- WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
246
- ELSE 'topic'
247
- END] as interest_types,
248
- temporal_score as relevance_score
249
- ORDER BY temporal_score DESC
250
- LIMIT $max_questions
251
- """, user1=user1, user2=user2, max_questions=max_questions)
252
 
253
- return [dict(record) for record in questions]
 
 
254
 
255
  def format_question(q: Dict) -> str:
256
  """Format a question for display based on its source."""
 
196
  def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
197
  """Find questions to recommend based on common interests."""
198
  with self.driver.session() as session:
199
+ # First, get questions per source
200
+ questions_per_source = max_questions // 4 # Divide evenly among 4 sources
201
+ remaining_slots = max_questions % 4 # Handle any remainder
202
 
203
+ # Query for each source separately to ensure balanced representation
204
+ sources = ['stack_exchange', 'reddit', 'wikipedia', 'trivia']
205
+ all_questions = []
 
 
206
 
207
+ for source in sources:
208
+ source_questions = session.run("""
209
+ MATCH (u1:User {name: $user1})
210
+ MATCH (u2:User {name: $user2})
211
+
212
+ OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
213
+ OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
214
+ WITH u1, u2,
215
+ COLLECT(DISTINCT interest1) as u1_interests,
216
+ COLLECT(DISTINCT interest2) as u2_interests
217
+
218
+ UNWIND u1_interests + u2_interests as interest
219
+ MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
220
+ WHERE
221
+ q.author <> $user1 AND
222
+ q.author <> $user2 AND
223
+ q.source = $source AND
224
+ (
225
+ (interest IN u1_interests AND interest IN u2_interests) OR
226
+ (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
227
+ (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
228
+ )
229
+ WITH q, interest, type(r) as rel_type,
230
+ CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
231
+ WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
232
+ sum(interest_weight) as base_score
233
+ WITH q, interests, base_score,
234
+ CASE
235
+ WHEN q.created_utc_ts IS NOT NULL
236
+ THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
237
+ ELSE base_score
238
+ END as temporal_score
239
+ RETURN DISTINCT
240
+ q.title as title,
241
+ q.body as body,
242
+ q.created_utc_ts as created_utc_ts,
243
+ q.author as author,
244
+ q.source as source,
245
+ q.correct_answer as correct_answer,
246
+ q.incorrect_answers as incorrect_answers,
247
+ q.upvotes as upvotes,
248
+ q.num_comments as num_comments,
249
+ q.subreddit as subreddit,
250
+ [i in interests | CASE
251
+ WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
252
+ ELSE i.interest.topic
253
+ END] as matching_interests,
254
+ [i in interests | CASE
255
+ WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
256
+ ELSE 'topic'
257
+ END] as interest_types,
258
+ temporal_score as relevance_score
259
+ ORDER BY temporal_score DESC
260
+ LIMIT $limit
261
+ """,
262
+ user1=user1,
263
+ user2=user2,
264
+ source=source,
265
+ limit=questions_per_source + (1 if remaining_slots > 0 else 0)
266
  )
267
+
268
+ source_results = [dict(record) for record in source_questions]
269
+ all_questions.extend(source_results)
270
+ remaining_slots = max(0, remaining_slots - 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
+ # Sort all questions by relevance score for final ordering
273
+ all_questions.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
274
+ return all_questions
275
 
276
  def format_question(q: Dict) -> str:
277
  """Format a question for display based on its source."""