NimaKL commited on
Commit
7c2dc3d
·
verified ·
1 Parent(s): ef3d758

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +504 -188
app.py CHANGED
@@ -5,14 +5,12 @@ from typing import List, Dict, Tuple
5
  import pandas as pd
6
  from datetime import datetime
7
  import os
8
- import re
9
- from html import escape
10
- from bs4 import BeautifulSoup
11
 
12
- # Set up basic logging
13
  logging.basicConfig(
14
- level=logging.WARNING,
15
- format='%(levelname)s: %(message)s'
 
16
  )
17
  logger = logging.getLogger(__name__)
18
 
@@ -21,16 +19,16 @@ NEO4J_URL = os.getenv("NEO4J_URL")
21
  NEO4J_USER = os.getenv("NEO4J_USER")
22
  NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
23
 
24
- if not all([NEO4J_URL, NEO4J_USER, NEO4J_PASSWORD]):
25
- raise ValueError("Missing required environment variables for database connection")
26
-
27
  def format_neo4j_datetime(dt) -> str:
28
  """Convert Neo4j datetime to string format."""
29
  if dt is None:
 
30
  return 'Unknown date'
31
  try:
 
32
  if hasattr(dt, 'to_native'):
33
  dt = dt.to_native()
 
34
  return dt.strftime('%Y-%m-%d')
35
  except Exception as e:
36
  logger.warning(f"Error formatting datetime: {e}")
@@ -45,85 +43,6 @@ def format_interest_list(interests: set, max_items: int = 10) -> str:
45
  return ', '.join(sorted_interests)
46
  return f"{', '.join(sorted_interests[:max_items])} (+{len(sorted_interests) - max_items} more)"
47
 
48
- def process_body(text, title):
49
- """Process question body to handle images and HTML."""
50
- if not text:
51
- return ""
52
-
53
- try:
54
- # Parse the HTML content
55
- soup = BeautifulSoup(str(text), 'html.parser')
56
-
57
- # Function to fix Stack Exchange URLs
58
- def fix_stack_exchange_url(url):
59
- if not url:
60
- return url
61
- if url.startswith(('http://', 'https://')):
62
- return url
63
- if url.startswith('//'):
64
- return 'https:' + url
65
- if url.startswith('/'):
66
- return 'https://i.stack.imgur.com' + url
67
- return 'https://i.stack.imgur.com/' + url
68
-
69
- # Find all img tags and replace with preview cards
70
- for img in soup.find_all('img'):
71
- src = img.get('src', '')
72
- if not src:
73
- continue
74
-
75
- fixed_src = fix_stack_exchange_url(src)
76
- alt_text = img.get('alt', '').strip()
77
- if not alt_text or alt_text.lower() == 'enter image description here':
78
- alt_text = 'Question image'
79
-
80
- preview_html = f"""
81
- <div class="image-preview" style="margin: 10px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px;">
82
- <div style="display: flex; align-items: center; margin-bottom: 8px;">
83
- <span style="font-size: 20px; margin-right: 8px;">🖼️</span>
84
- <span style="color: #93c5fd;">{alt_text}</span>
85
- </div>
86
- <a href="{fixed_src}" target="_blank" rel="noopener noreferrer"
87
- style="color: #60a5fa; text-decoration: none;">View image</a>
88
- </div>
89
- """
90
-
91
- new_soup = BeautifulSoup(preview_html, 'html.parser')
92
- img.replace_with(new_soup)
93
-
94
- # Style other elements
95
- for link in soup.find_all('a'):
96
- if 'View Image' not in (link.get_text() or ''):
97
- href = link.get('href', '')
98
- if href and not href.startswith(('http://', 'https://')):
99
- link['href'] = fix_stack_exchange_url(href)
100
- link['target'] = '_blank'
101
- link['rel'] = 'noopener noreferrer'
102
- link['style'] = 'color: #60a5fa; text-decoration: none;'
103
-
104
- # Add paragraph styling
105
- for p in soup.find_all(['p', 'div']):
106
- if not any(cls in (p.get('class', []) or []) for cls in ['image-preview', 'question-card']):
107
- current_style = p.get('style', '')
108
- p['style'] = f"{current_style}; margin: 0.8em 0; line-height: 1.6; color: #e2e8f0;"
109
-
110
- # Add list styling
111
- for ul in soup.find_all(['ul', 'ol']):
112
- ul['style'] = 'margin: 0.8em 0; padding-left: 1.5em; color: #e2e8f0;'
113
-
114
- for li in soup.find_all('li'):
115
- li['style'] = 'margin: 0.4em 0; line-height: 1.6; color: #e2e8f0;'
116
-
117
- # Add code block styling
118
- for code in soup.find_all(['code', 'pre']):
119
- code['style'] = 'background: rgba(30, 41, 59, 0.5); padding: 0.2em 0.4em; border-radius: 4px; font-family: monospace; color: #e2e8f0;'
120
-
121
- return str(soup)
122
-
123
- except Exception as e:
124
- logger.warning(f"Error processing question body: {str(e)}")
125
- return str(text) if text else ""
126
-
127
  class QuestionRecommender:
128
  def __init__(self):
129
  try:
@@ -131,12 +50,126 @@ class QuestionRecommender:
131
  NEO4J_URL,
132
  auth=(NEO4J_USER, NEO4J_PASSWORD)
133
  )
 
134
  # Test connection immediately
135
  self.driver.verify_connectivity()
 
 
 
 
136
  except Exception as e:
137
  logger.error(f"Failed to initialize database connection: {str(e)}")
138
  raise
139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  def close(self):
141
  self.driver.close()
142
 
@@ -144,6 +177,7 @@ class QuestionRecommender:
144
  """Get list of all users with interest counts."""
145
  with self.driver.session() as session:
146
  try:
 
147
  result = session.run("""
148
  MATCH (u:User)
149
  OPTIONAL MATCH (u)-[r:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest)
@@ -166,8 +200,15 @@ class QuestionRecommender:
166
  ) for record in result if record["username"]]
167
 
168
  if not users_with_counts:
 
169
  return []
 
 
 
 
 
170
 
 
171
  return [
172
  f"{username} ({kw_count} keywords, {topic_count} topics)"
173
  for username, kw_count, topic_count in users_with_counts
@@ -179,48 +220,133 @@ class QuestionRecommender:
179
  def get_user_interests(self, username: str) -> Dict[str, set]:
180
  """Get keywords and topics a user is interested in."""
181
  with self.driver.session() as session:
 
182
  keyword_result = session.run("""
183
  MATCH (u:User {name: $username})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)
184
  RETURN DISTINCT k.keyword as keyword
185
  """, username=username)
186
  keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
 
 
 
187
 
 
188
  topic_result = session.run("""
189
  MATCH (u:User {name: $username})-[:INTERESTED_IN_TOPIC]->(t:Topic)
190
  RETURN DISTINCT t.topic as topic
191
  """, username=username)
192
  topics = {str(record["topic"]) for record in topic_result if record["topic"]}
 
 
 
193
 
194
  return {"keywords": keywords or set(), "topics": topics or set()}
195
 
196
  def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
197
- """Find questions to recommend based on common interests."""
198
  with self.driver.session() as session:
199
- # First, get questions per source
200
- questions_per_source = max_questions // 4 # Divide evenly among 4 sources
201
- remaining_slots = max_questions % 4 # Handle any remainder
202
-
203
- # Query for each source separately to ensure balanced representation
204
- sources = ['stack_exchange', 'reddit', 'wikipedia', 'trivia']
205
- all_questions = []
206
-
207
- for source in sources:
208
- source_questions = session.run("""
209
  MATCH (u1:User {name: $user1})
210
  MATCH (u2:User {name: $user2})
211
-
212
  OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
213
  OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
214
- WITH u1, u2,
215
- COLLECT(DISTINCT interest1) as u1_interests,
216
- COLLECT(DISTINCT interest2) as u2_interests
 
 
 
 
 
 
 
217
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
218
  UNWIND u1_interests + u2_interests as interest
219
  MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
220
  WHERE
221
  q.author <> $user1 AND
222
  q.author <> $user2 AND
223
- q.source = $source AND
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
224
  (
225
  (interest IN u1_interests AND interest IN u2_interests) OR
226
  (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
@@ -230,61 +356,174 @@ class QuestionRecommender:
230
  CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
231
  WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
232
  sum(interest_weight) as base_score
233
- WITH q, interests, base_score,
234
- CASE
235
- WHEN q.created_utc_ts IS NOT NULL
236
- THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
237
- ELSE base_score
238
- END as temporal_score
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  RETURN DISTINCT
240
  q.title as title,
241
  q.body as body,
242
- q.created_utc_ts as created_utc_ts,
243
  q.author as author,
244
- q.source as source,
245
- q.correct_answer as correct_answer,
246
- q.incorrect_answers as incorrect_answers,
247
- q.upvotes as upvotes,
248
- q.num_comments as num_comments,
249
- q.subreddit as subreddit,
250
- [i in interests | CASE
251
- WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
252
- ELSE i.interest.topic
253
- END] as matching_interests,
254
- [i in interests | CASE
255
- WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
256
- ELSE 'topic'
257
- END] as interest_types,
258
- temporal_score as relevance_score
259
- ORDER BY temporal_score DESC
260
- LIMIT $limit
261
- """,
262
- user1=user1,
263
- user2=user2,
264
- source=source,
265
- limit=questions_per_source + (1 if remaining_slots > 0 else 0)
266
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
268
- source_results = [dict(record) for record in source_questions]
269
- all_questions.extend(source_results)
270
- remaining_slots = max(0, remaining_slots - 1)
 
271
 
272
- # Sort all questions by relevance score for final ordering
273
- all_questions.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
274
- return all_questions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  def format_question(q: Dict) -> str:
277
  """Format a question for display based on its source."""
278
  try:
 
279
  title = q.get('title', 'Untitled')
280
- source = q.get('source', '').lower()
281
 
 
 
 
 
 
 
282
  metadata_html = ""
283
  content_html = ""
284
 
 
285
  if 'author' in q or 'created_utc_ts' in q:
286
  author = q.get('author', 'Unknown author')
287
  created_date = format_neo4j_datetime(q.get('created_utc_ts'))
 
288
  upvotes = q.get('upvotes', 0)
289
  num_comments = q.get('num_comments', 0)
290
 
@@ -296,10 +535,11 @@ def format_question(q: Dict) -> str:
296
  <div class="stats" style="margin-top: 5px;">
297
  <span title="Upvotes"><span style="color: #93c5fd;">▲</span> {upvotes}</span>
298
  <span style="margin-left: 15px;" title="Comments"><span style="color: #93c5fd;">💬</span> {num_comments}</span>
299
- </div>
300
  </div>
301
  """
302
 
 
303
  if source == "stack_exchange":
304
  body = q.get('body', '')
305
  if body:
@@ -308,10 +548,12 @@ def format_question(q: Dict) -> str:
308
  {process_body(body, title)}
309
  </div>
310
  """
 
311
  elif source == "trivia":
312
  correct_answer = q.get('correct_answer', '')
313
  incorrect_answers = q.get('incorrect_answers', [])
314
 
 
315
  answers = [correct_answer] + incorrect_answers if incorrect_answers else [correct_answer]
316
  answers_html = "".join([
317
  f"""
@@ -330,21 +572,58 @@ def format_question(q: Dict) -> str:
330
  {answers_html}
331
  </div>
332
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
334
  source_icon = {
335
- 'stack_exchange': '⚡',
336
- 'reddit': '🔸',
337
- 'wikipedia': '📚',
338
- 'trivia': '🎯',
339
- }.get(source, '❔')
340
 
341
  source_color = {
342
- 'stack_exchange': '#60a5fa',
343
- 'reddit': '#f97316',
344
- 'wikipedia': '#22c55e',
345
- 'trivia': '#eab308',
346
- }.get(source, '#60a5fa')
347
 
 
348
  source_display = source.title() if source else "Unknown"
349
  source_badge = f"""
350
  <div class="source-badge" style="display: inline-flex; align-items: center; padding: 4px 8px; background: rgba(51, 65, 85, 0.5); border-radius: 4px; margin-right: 10px; border: 1px solid {source_color}25;">
@@ -353,6 +632,7 @@ def format_question(q: Dict) -> str:
353
  </div>
354
  """
355
 
 
356
  matching_interests = q.get('matching_interests', [])
357
  interest_types = q.get('interest_types', [])
358
  interests_with_types = []
@@ -363,9 +643,11 @@ def format_question(q: Dict) -> str:
363
  'type': type_
364
  })
365
 
 
366
  keywords = [i['name'] for i in interests_with_types if i['type'] == 'keyword']
367
  topics = [i['name'] for i in interests_with_types if i['type'] == 'topic']
368
 
 
369
  interests_display = []
370
  if keywords:
371
  interests_display.append(f"Keywords: {format_interest_list(set(keywords), max_items=3)}")
@@ -373,6 +655,7 @@ def format_question(q: Dict) -> str:
373
  interests_display.append(f"Topics: {format_interest_list(set(topics), max_items=3)}")
374
  interests_str = " | ".join(interests_display) if interests_display else "No common interests found"
375
 
 
376
  relevance_score = q.get('relevance_score', 0)
377
  score_display = f"""
378
  <div class="relevance-score" style="display: inline-block; padding: 4px 8px; background: rgba(59, 130, 246, 0.2); border-radius: 4px; margin-left: 10px;">
@@ -380,7 +663,8 @@ def format_question(q: Dict) -> str:
380
  </div>
381
  """ if relevance_score > 0 else ""
382
 
383
- return f"""
 
384
  <div class="question-card" style="background: rgba(51, 65, 85, 0.5); padding: 20px; border-radius: 8px; margin: 15px 0; border: 1px solid rgba(148, 163, 184, 0.2);">
385
  <div class="question-header" style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px;">
386
  <div style="flex: 1; display: flex; align-items: center;">
@@ -401,6 +685,8 @@ def format_question(q: Dict) -> str:
401
  </div>
402
  """
403
 
 
 
404
  except Exception as e:
405
  logger.error(f"Error formatting question: {str(e)}")
406
  return f"""
@@ -409,53 +695,70 @@ def format_question(q: Dict) -> str:
409
  </div>
410
  """
411
 
 
 
 
 
 
 
 
 
 
 
 
 
412
  def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dict]]:
413
  """Main function to get recommendations and user interests."""
 
414
  user1 = user1.split(" (")[0] if " (" in user1 else user1
415
  user2 = user2.split(" (")[0] if " (" in user2 else user2
416
 
417
  recommender = QuestionRecommender()
418
  try:
 
419
  user1_interests = recommender.get_user_interests(user1)
420
  user2_interests = recommender.get_user_interests(user2)
421
 
 
422
  common_keywords = user1_interests['keywords'] & user2_interests['keywords']
423
  common_topics = user1_interests['topics'] & user2_interests['topics']
424
 
 
425
  interests_summary = f"""
426
- <div class="interests-summary">
427
- <div class="user-interests">
428
- <h3>{user1}'s Interests</h3>
429
- <div class="interest-section">
430
- <strong>Keywords:</strong> {format_interest_list(user1_interests['keywords'], max_items=8)}
431
- </div>
432
- <div class="interest-section">
433
- <strong>Topics:</strong> {format_interest_list(user1_interests['topics'], max_items=5)}
434
- </div>
435
- </div>
436
-
437
- <div class="user-interests">
438
- <h3>{user2}'s Interests</h3>
439
- <div class="interest-section">
440
- <strong>Keywords:</strong> {format_interest_list(user2_interests['keywords'], max_items=8)}
441
- </div>
442
- <div class="interest-section">
443
- <strong>Topics:</strong> {format_interest_list(user2_interests['topics'], max_items=5)}
444
- </div>
445
- </div>
446
-
447
- <div class="common-interests">
448
- <h3>Common Interests</h3>
449
- <div class="interest-section">
450
- <strong>Keywords:</strong> {format_interest_list(common_keywords, max_items=8)}
451
- </div>
452
- <div class="interest-section">
453
- <strong>Topics:</strong> {format_interest_list(common_topics, max_items=5)}
454
- </div>
455
- </div>
456
  </div>
457
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
 
 
459
  questions = recommender.find_common_questions(user1, user2, max_questions=50)
460
 
461
  if questions:
@@ -481,7 +784,7 @@ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dic
481
  finally:
482
  recommender.close()
483
 
484
- # Custom CSS for styling
485
  custom_css = """
486
  .gradio-container {
487
  max-width: 1200px !important;
@@ -659,14 +962,17 @@ strong {
659
  """
660
 
661
  def main():
 
662
  recommender = QuestionRecommender()
663
  users = recommender.get_all_users()
664
  recommender.close()
665
 
666
- with gr.Blocks(title="Question Recommender", theme=gr.themes.Soft(), css=custom_css) as iface:
667
  gr.Markdown("""
668
- # 🤝 Question Recommender
669
  Find questions that two users might be interested in discussing together based on their common interests.
 
 
670
  """)
671
 
672
  with gr.Row(equal_height=True):
@@ -697,8 +1003,18 @@ def main():
697
  recommendation_type = gr.HTML()
698
  questions_output = gr.HTML()
699
 
 
 
 
 
 
 
700
  recommend_btn.click(
701
- fn=recommend_questions,
 
 
 
 
702
  inputs=[user1_dropdown, user2_dropdown],
703
  outputs=[interests_output, recommendation_type, questions_output]
704
  )
 
5
  import pandas as pd
6
  from datetime import datetime
7
  import os
 
 
 
8
 
9
+ # Set up logging with more detailed format for debugging
10
  logging.basicConfig(
11
+ level=logging.INFO,
12
+ format='%(asctime)s - %(levelname)s - %(message)s',
13
+ datefmt='%Y-%m-%d %H:%M:%S'
14
  )
15
  logger = logging.getLogger(__name__)
16
 
 
19
  NEO4J_USER = os.getenv("NEO4J_USER")
20
  NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
21
 
 
 
 
22
  def format_neo4j_datetime(dt) -> str:
23
  """Convert Neo4j datetime to string format."""
24
  if dt is None:
25
+ logger.info("Received None datetime")
26
  return 'Unknown date'
27
  try:
28
+ logger.info(f"Formatting datetime: {dt} of type {type(dt)}")
29
  if hasattr(dt, 'to_native'):
30
  dt = dt.to_native()
31
+ logger.info(f"Converted to native: {dt} of type {type(dt)}")
32
  return dt.strftime('%Y-%m-%d')
33
  except Exception as e:
34
  logger.warning(f"Error formatting datetime: {e}")
 
43
  return ', '.join(sorted_interests)
44
  return f"{', '.join(sorted_interests[:max_items])} (+{len(sorted_interests) - max_items} more)"
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  class QuestionRecommender:
47
  def __init__(self):
48
  try:
 
50
  NEO4J_URL,
51
  auth=(NEO4J_USER, NEO4J_PASSWORD)
52
  )
53
+ logger.info("Initializing QuestionRecommender with debug database")
54
  # Test connection immediately
55
  self.driver.verify_connectivity()
56
+ logger.info("Successfully connected to Neo4j database")
57
+ self.verify_connection()
58
+ # Inspect question types on initialization
59
+ self.inspect_question_types()
60
  except Exception as e:
61
  logger.error(f"Failed to initialize database connection: {str(e)}")
62
  raise
63
 
64
+ def verify_connection(self):
65
+ """Verify database connection and log basic statistics."""
66
+ try:
67
+ with self.driver.session() as session:
68
+ # First try a simple query to verify connection
69
+ test_result = session.run("MATCH (n) RETURN count(n) as count").single()
70
+ if not test_result:
71
+ raise Exception("Could not execute test query")
72
+ logger.info(f"Database contains {test_result['count']} total nodes")
73
+
74
+ # Get database statistics with relationship counts
75
+ stats = session.run("""
76
+ // Count nodes
77
+ MATCH (u:User)
78
+ WITH COUNT(u) as user_count
79
+ MATCH (k:Keyword)
80
+ WITH user_count, COUNT(k) as keyword_count
81
+ MATCH (q:Question)
82
+ WITH user_count, keyword_count, COUNT(q) as question_count
83
+ MATCH (t:Topic)
84
+ WITH user_count, keyword_count, question_count, COUNT(t) as topic_count
85
+
86
+ // Count relationships
87
+ OPTIONAL MATCH ()-[r:INTERESTED_IN_KEYWORD]->()
88
+ WITH user_count, keyword_count, question_count, topic_count, COUNT(r) as keyword_rel_count
89
+ OPTIONAL MATCH ()-[r:INTERESTED_IN_TOPIC]->()
90
+ WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, COUNT(r) as topic_rel_count
91
+ OPTIONAL MATCH ()-[r:HAS_KEYWORD]->()
92
+ WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, topic_rel_count, COUNT(r) as question_keyword_count
93
+ OPTIONAL MATCH ()-[r:HAS_TOPIC]->()
94
+ RETURN
95
+ user_count, keyword_count, question_count, topic_count,
96
+ keyword_rel_count, topic_rel_count,
97
+ question_keyword_count, COUNT(r) as question_topic_count
98
+ """).single()
99
+
100
+ if not stats:
101
+ raise Exception("Could not retrieve database statistics")
102
+
103
+ logger.info("=== Database Statistics ===")
104
+ logger.info(f"Nodes:")
105
+ logger.info(f" Users: {stats['user_count']}")
106
+ logger.info(f" Keywords: {stats['keyword_count']}")
107
+ logger.info(f" Questions: {stats['question_count']}")
108
+ logger.info(f" Topics: {stats['topic_count']}")
109
+ logger.info(f"\nRelationships:")
110
+ logger.info(f" User->Keyword (INTERESTED_IN_KEYWORD): {stats['keyword_rel_count']}")
111
+ logger.info(f" User->Topic (INTERESTED_IN_TOPIC): {stats['topic_rel_count']}")
112
+ logger.info(f" Question->Keyword (HAS_KEYWORD): {stats['question_keyword_count']}")
113
+ logger.info(f" Question->Topic (HAS_TOPIC): {stats['question_topic_count']}")
114
+
115
+ except Exception as e:
116
+ logger.error(f"Database verification failed: {str(e)}")
117
+ logger.error(f"URL: {NEO4J_URL}")
118
+ logger.error(f"User: {NEO4J_USER}")
119
+ raise Exception(f"Failed to verify database connection: {str(e)}")
120
+
121
+ def inspect_question_types(self):
122
+ """Inspect different types of questions and their attributes in the database."""
123
+ with self.driver.session() as session:
124
+ try:
125
+ # Get all distinct question types and their properties
126
+ result = session.run("""
127
+ MATCH (q:Question)
128
+ WITH DISTINCT keys(q) as props, labels(q) as types
129
+ RETURN types, props, count(*) as count
130
+ ORDER BY count DESC
131
+ """)
132
+
133
+ logger.info("\n=== Question Types and Properties ===")
134
+ for record in result:
135
+ types = record["types"]
136
+ props = record["props"]
137
+ count = record["count"]
138
+ logger.info(f"\nType: {types}")
139
+ logger.info(f"Count: {count}")
140
+ logger.info("Properties:")
141
+ for prop in props:
142
+ # Get a sample value for this property
143
+ sample = session.run("""
144
+ MATCH (q:Question)
145
+ WHERE $prop in keys(q)
146
+ RETURN q[$prop] as value
147
+ LIMIT 1
148
+ """, prop=prop).single()
149
+
150
+ value = sample["value"] if sample else None
151
+ value_type = type(value).__name__ if value is not None else "None"
152
+ logger.info(f" - {prop}: {value_type} (example: {str(value)[:100]}{'...' if str(value)[100:] else ''})")
153
+
154
+ # Get relationships specific to different question types
155
+ result = session.run("""
156
+ MATCH (q:Question)-[r]->(target)
157
+ WITH DISTINCT type(r) as rel_type, labels(target) as target_labels, count(*) as count
158
+ RETURN rel_type, target_labels, count
159
+ ORDER BY count DESC
160
+ """)
161
+
162
+ logger.info("\n=== Question Relationships ===")
163
+ for record in result:
164
+ rel_type = record["rel_type"]
165
+ target_labels = record["target_labels"]
166
+ count = record["count"]
167
+ logger.info(f"Relationship: {rel_type} -> {target_labels} (Count: {count})")
168
+
169
+ except Exception as e:
170
+ logger.error(f"Error inspecting question types: {str(e)}")
171
+ raise
172
+
173
  def close(self):
174
  self.driver.close()
175
 
 
177
  """Get list of all users with interest counts."""
178
  with self.driver.session() as session:
179
  try:
180
+ # Get users with their interest counts using proper relationship patterns
181
  result = session.run("""
182
  MATCH (u:User)
183
  OPTIONAL MATCH (u)-[r:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest)
 
200
  ) for record in result if record["username"]]
201
 
202
  if not users_with_counts:
203
+ logger.warning("No users found with interests")
204
  return []
205
+
206
+ logger.info(f"Retrieved {len(users_with_counts)} users with interests")
207
+ logger.info("Top 5 users by interest count:")
208
+ for username, kw_count, topic_count in users_with_counts[:5]:
209
+ logger.info(f" - {username}: {kw_count} keywords, {topic_count} topics")
210
 
211
+ # Format usernames with their counts
212
  return [
213
  f"{username} ({kw_count} keywords, {topic_count} topics)"
214
  for username, kw_count, topic_count in users_with_counts
 
220
  def get_user_interests(self, username: str) -> Dict[str, set]:
221
  """Get keywords and topics a user is interested in."""
222
  with self.driver.session() as session:
223
+ # Get keywords the user is interested in
224
  keyword_result = session.run("""
225
  MATCH (u:User {name: $username})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)
226
  RETURN DISTINCT k.keyword as keyword
227
  """, username=username)
228
  keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
229
+
230
+ # Log keyword count for debugging
231
+ logger.debug(f"Found {len(keywords)} keywords for user {username}")
232
 
233
+ # Get topics the user is interested in
234
  topic_result = session.run("""
235
  MATCH (u:User {name: $username})-[:INTERESTED_IN_TOPIC]->(t:Topic)
236
  RETURN DISTINCT t.topic as topic
237
  """, username=username)
238
  topics = {str(record["topic"]) for record in topic_result if record["topic"]}
239
+
240
+ # Log topic count for debugging
241
+ logger.debug(f"Found {len(topics)} topics for user {username}")
242
 
243
  return {"keywords": keywords or set(), "topics": topics or set()}
244
 
245
  def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
246
+ """Find questions to recommend based on common interests using advanced Neo4j features."""
247
  with self.driver.session() as session:
248
+ # Debug: Check if users exist and have interests
249
+ user_check = session.run("""
 
 
 
 
 
 
 
 
250
  MATCH (u1:User {name: $user1})
251
  MATCH (u2:User {name: $user2})
 
252
  OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
253
  OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
254
+ RETURN
255
+ COUNT(DISTINCT u1) as user1_exists,
256
+ COUNT(DISTINCT u2) as user2_exists,
257
+ COUNT(DISTINCT interest1) as user1_interests,
258
+ COUNT(DISTINCT interest2) as user2_interests
259
+ """, user1=user1, user2=user2).single()
260
+
261
+ if not (user_check and user_check['user1_exists'] and user_check['user2_exists']):
262
+ logger.error(f"One or both users not found: {user1}, {user2}")
263
+ return []
264
 
265
+ logger.info(f"User {user1} has {user_check['user1_interests']} total interests")
266
+ logger.info(f"User {user2} has {user_check['user2_interests']} total interests")
267
+
268
+ # Advanced question recommendation query using Neo4j path finding and scoring
269
+ questions_query = """
270
+ // Find all interests (both keywords and topics) for both users
271
+ MATCH (u1:User {name: $user1})
272
+ MATCH (u2:User {name: $user2})
273
+
274
+ // Get all interests for both users
275
+ OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
276
+ OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
277
+ WITH u1, u2,
278
+ COLLECT(DISTINCT interest1) as u1_interests,
279
+ COLLECT(DISTINCT interest2) as u2_interests
280
+
281
+ // Find questions related to either user's interests for each source
282
+ CALL {
283
+ WITH u1, u2, u1_interests, u2_interests
284
  UNWIND u1_interests + u2_interests as interest
285
  MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
286
  WHERE
287
  q.author <> $user1 AND
288
  q.author <> $user2 AND
289
+ q.source = 'stack_exchange' AND
290
+ (
291
+ (interest IN u1_interests AND interest IN u2_interests) OR
292
+ (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
293
+ (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
294
+ )
295
+ WITH q, interest, type(r) as rel_type,
296
+ CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
297
+ WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
298
+ sum(interest_weight) as base_score
299
+ RETURN q, interests, base_score
300
+ ORDER BY base_score * rand() DESC
301
+ LIMIT 15 // Increased from 10 to get more variety
302
+
303
+ UNION
304
+
305
+ WITH u1, u2, u1_interests, u2_interests
306
+ UNWIND u1_interests + u2_interests as interest
307
+ MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
308
+ WHERE
309
+ q.source = 'trivia' AND
310
+ (
311
+ (interest IN u1_interests AND interest IN u2_interests) OR
312
+ (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
313
+ (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
314
+ )
315
+ WITH q, interest, type(r) as rel_type,
316
+ CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
317
+ WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
318
+ sum(interest_weight) as base_score
319
+ RETURN q, interests, base_score
320
+ ORDER BY base_score * rand() DESC
321
+ LIMIT 15 // Increased from 10 to get more variety
322
+
323
+ UNION
324
+
325
+ WITH u1, u2, u1_interests, u2_interests
326
+ UNWIND u1_interests + u2_interests as interest
327
+ MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
328
+ WHERE
329
+ q.source = 'wikipedia' AND
330
+ (
331
+ (interest IN u1_interests AND interest IN u2_interests) OR
332
+ (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
333
+ (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
334
+ )
335
+ WITH q, interest, type(r) as rel_type,
336
+ CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
337
+ WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
338
+ sum(interest_weight) as base_score
339
+ RETURN q, interests, base_score
340
+ ORDER BY base_score * rand() DESC
341
+ LIMIT 15 // Increased from 10 to get more variety
342
+
343
+ UNION
344
+
345
+ WITH u1, u2, u1_interests, u2_interests
346
+ UNWIND u1_interests + u2_interests as interest
347
+ MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
348
+ WHERE
349
+ q.source = 'reddit' AND
350
  (
351
  (interest IN u1_interests AND interest IN u2_interests) OR
352
  (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
 
356
  CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
357
  WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
358
  sum(interest_weight) as base_score
359
+ RETURN q, interests, base_score
360
+ ORDER BY base_score * rand() DESC
361
+ LIMIT 15 // Increased from 10 to get more variety
362
+ }
363
+
364
+ // Calculate temporal relevance for the combined results
365
+ WITH q, interests, base_score,
366
+ CASE
367
+ WHEN q.created_utc_ts IS NOT NULL
368
+ THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
369
+ ELSE base_score
370
+ END as temporal_score,
371
+ // Add source-specific random boost to ensure better mixing
372
+ CASE q.source
373
+ WHEN 'stack_exchange' THEN rand() * 0.4
374
+ WHEN 'trivia' THEN rand() * 0.4
375
+ WHEN 'wikipedia' THEN rand() * 0.4
376
+ WHEN 'reddit' THEN rand() * 0.4
377
+ ELSE rand() * 0.4
378
+ END as source_random_boost
379
+
380
+ // Return results with all metadata
381
+ WITH q, interests, temporal_score, source_random_boost,
382
+ temporal_score * (0.6 + 0.8 * rand()) + source_random_boost as final_score
383
  RETURN DISTINCT
384
  q.title as title,
385
  q.body as body,
386
+ q.created_utc_ts as created_utc_ts,
387
  q.author as author,
388
+ q.source as source,
389
+ q.correct_answer as correct_answer,
390
+ q.incorrect_answers as incorrect_answers,
391
+ q.upvotes as upvotes,
392
+ q.num_comments as num_comments,
393
+ q.subreddit as subreddit,
394
+ [i in interests | CASE
395
+ WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
396
+ ELSE i.interest.topic
397
+ END] as matching_interests,
398
+ [i in interests | CASE
399
+ WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
400
+ ELSE 'topic'
401
+ END] as interest_types,
402
+ final_score as relevance_score
403
+ ORDER BY final_score DESC
404
+ LIMIT $max_questions
405
+ """
406
+
407
+ questions = [dict(record) for record in session.run(questions_query,
408
+ user1=user1,
409
+ user2=user2,
410
+ max_questions=max_questions)]
411
+
412
+ if questions:
413
+ first_q = questions[0]
414
+ logger.info(f"Sample question:")
415
+ logger.info(f"Title: {first_q.get('title', 'No title')}")
416
+ logger.info(f"Author: {first_q.get('author', 'No author')}")
417
+ logger.info(f"Score: {first_q.get('relevance_score', 0)}")
418
+ logger.info(f"Interests: {first_q.get('matching_interests', [])}")
419
+
420
+ logger.info(f"Found {len(questions)} questions with common interests")
421
+ return questions
422
+
423
+ def process_body(text, title):
424
+ """Process question body to handle images and HTML."""
425
+ if not text:
426
+ logger.warning(f"Empty body for question: {title}")
427
+ return ""
428
+
429
+ try:
430
+ from bs4 import BeautifulSoup
431
+
432
+ # Parse the HTML content
433
+ soup = BeautifulSoup(str(text), 'html.parser')
434
+
435
+ # Function to fix Stack Exchange URLs
436
+ def fix_stack_exchange_url(url):
437
+ if not url:
438
+ return url
439
+ if url.startswith(('http://', 'https://')):
440
+ return url
441
+ if url.startswith('//'):
442
+ return 'https:' + url
443
+ if url.startswith('/'):
444
+ return 'https://i.stack.imgur.com' + url
445
+ return 'https://i.stack.imgur.com/' + url
446
+
447
+ # Find all img tags and replace with preview cards
448
+ for img in soup.find_all('img'):
449
+ src = img.get('src', '')
450
+ if not src:
451
+ continue
452
 
453
+ fixed_src = fix_stack_exchange_url(src)
454
+ alt_text = img.get('alt', '').strip()
455
+ if not alt_text or alt_text.lower() == 'enter image description here':
456
+ alt_text = 'Question image'
457
 
458
+ # Create an image preview card
459
+ preview_html = f"""
460
+ <div class="image-preview" style="margin: 10px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px;">
461
+ <div style="display: flex; align-items: center; margin-bottom: 8px;">
462
+ <span style="font-size: 20px; margin-right: 8px;">🖼️</span>
463
+ <span style="color: #93c5fd;">{alt_text}</span>
464
+ </div>
465
+ <a href="{fixed_src}" target="_blank" rel="noopener noreferrer"
466
+ style="color: #60a5fa; text-decoration: none;">View image</a>
467
+ </div>
468
+ """
469
+
470
+ new_soup = BeautifulSoup(preview_html, 'html.parser')
471
+ img.replace_with(new_soup)
472
+
473
+ # Style other elements
474
+ for link in soup.find_all('a'):
475
+ if 'View Image' not in (link.get_text() or ''):
476
+ href = link.get('href', '')
477
+ if href and not href.startswith(('http://', 'https://')):
478
+ link['href'] = fix_stack_exchange_url(href)
479
+ link['target'] = '_blank'
480
+ link['rel'] = 'noopener noreferrer'
481
+ link['style'] = 'color: #60a5fa; text-decoration: none;'
482
+
483
+ # Add paragraph styling
484
+ for p in soup.find_all(['p', 'div']):
485
+ if not any(cls in (p.get('class', []) or []) for cls in ['image-preview', 'question-card']):
486
+ current_style = p.get('style', '')
487
+ p['style'] = f"{current_style}; margin: 0.8em 0; line-height: 1.6; color: #e2e8f0;"
488
+
489
+ # Add list styling
490
+ for ul in soup.find_all(['ul', 'ol']):
491
+ ul['style'] = 'margin: 0.8em 0; padding-left: 1.5em; color: #e2e8f0;'
492
+
493
+ for li in soup.find_all('li'):
494
+ li['style'] = 'margin: 0.4em 0; line-height: 1.6; color: #e2e8f0;'
495
+
496
+ # Add code block styling
497
+ for code in soup.find_all(['code', 'pre']):
498
+ code['style'] = 'background: rgba(30, 41, 59, 0.5); padding: 0.2em 0.4em; border-radius: 4px; font-family: monospace; color: #e2e8f0;'
499
+
500
+ return str(soup)
501
+
502
+ except Exception as e:
503
+ logger.error(f"Error processing question body: {str(e)}")
504
+ return str(text) if text else ""
505
 
506
  def format_question(q: Dict) -> str:
507
  """Format a question for display based on its source."""
508
  try:
509
+ # Extract and validate basic question data
510
  title = q.get('title', 'Untitled')
511
+ source = q.get('source', '').lower() # Convert to lowercase for consistent comparison
512
 
513
+ # Log available fields for debugging
514
+ logger.info(f"Question fields: {list(q.keys())}")
515
+ if 'created_utc_ts' in q:
516
+ logger.info(f"Raw created_utc_ts value: {q['created_utc_ts']}")
517
+
518
+ # Format metadata section based on source
519
  metadata_html = ""
520
  content_html = ""
521
 
522
+ # Default metadata for questions with author/date
523
  if 'author' in q or 'created_utc_ts' in q:
524
  author = q.get('author', 'Unknown author')
525
  created_date = format_neo4j_datetime(q.get('created_utc_ts'))
526
+ logger.info(f"Question {title}: author={author}, date={created_date}")
527
  upvotes = q.get('upvotes', 0)
528
  num_comments = q.get('num_comments', 0)
529
 
 
535
  <div class="stats" style="margin-top: 5px;">
536
  <span title="Upvotes"><span style="color: #93c5fd;">▲</span> {upvotes}</span>
537
  <span style="margin-left: 15px;" title="Comments"><span style="color: #93c5fd;">💬</span> {num_comments}</span>
538
+ </div>
539
  </div>
540
  """
541
 
542
+ # Handle content based on source and available fields
543
  if source == "stack_exchange":
544
  body = q.get('body', '')
545
  if body:
 
548
  {process_body(body, title)}
549
  </div>
550
  """
551
+
552
  elif source == "trivia":
553
  correct_answer = q.get('correct_answer', '')
554
  incorrect_answers = q.get('incorrect_answers', [])
555
 
556
+ # Create answer options HTML
557
  answers = [correct_answer] + incorrect_answers if incorrect_answers else [correct_answer]
558
  answers_html = "".join([
559
  f"""
 
572
  {answers_html}
573
  </div>
574
  """
575
+
576
+ elif source == "wikipedia":
577
+ correct_answer = q.get('correct_answer', '')
578
+ if correct_answer:
579
+ content_html = f"""
580
+ <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px; border-left: 3px solid #10b981;">
581
+ <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
582
+ <div style="color: #34d399;">{correct_answer}</div>
583
+ </div>
584
+ """
585
+
586
+ elif source == "reddit":
587
+ # Add subreddit to metadata if available
588
+ if 'subreddit' in q:
589
+ subreddit = q.get('subreddit', '')
590
+ metadata_html = metadata_html.replace(
591
+ 'posted on',
592
+ f'posted in <span style="color: #60a5fa; font-weight: 500;">r/{subreddit}</span> on'
593
+ )
594
 
595
+ # If no specific content is set, try to use any available content fields
596
+ if not content_html:
597
+ if 'body' in q:
598
+ content_html = f"""
599
+ <div class="question-content" style="margin-top: 20px; font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; color: #e2e8f0; line-height: 1.6;">
600
+ {process_body(q['body'], title)}
601
+ </div>
602
+ """
603
+ elif 'correct_answer' in q:
604
+ content_html = f"""
605
+ <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px;">
606
+ <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
607
+ <div style="color: #e2e8f0;">{q['correct_answer']}</div>
608
+ </div>
609
+ """
610
+
611
+ # Get source-specific icon and color
612
  source_icon = {
613
+ 'stack_exchange': '⚡', # Lightning bolt for Stack Exchange
614
+ 'reddit': '🔸', # Orange diamond for Reddit
615
+ 'wikipedia': '📚', # Books for Wikipedia
616
+ 'trivia': '🎯', # Target/bullseye for Trivia
617
+ }.get(source, '❔') # Question mark as fallback
618
 
619
  source_color = {
620
+ 'stack_exchange': '#60a5fa', # Blue
621
+ 'reddit': '#f97316', # Orange
622
+ 'wikipedia': '#22c55e', # Green
623
+ 'trivia': '#eab308', # Yellow
624
+ }.get(source, '#60a5fa') # Default blue
625
 
626
+ # Create the source badge with icon
627
  source_display = source.title() if source else "Unknown"
628
  source_badge = f"""
629
  <div class="source-badge" style="display: inline-flex; align-items: center; padding: 4px 8px; background: rgba(51, 65, 85, 0.5); border-radius: 4px; margin-right: 10px; border: 1px solid {source_color}25;">
 
632
  </div>
633
  """
634
 
635
+ # Handle matching interests display
636
  matching_interests = q.get('matching_interests', [])
637
  interest_types = q.get('interest_types', [])
638
  interests_with_types = []
 
643
  'type': type_
644
  })
645
 
646
+ # Format interests by type
647
  keywords = [i['name'] for i in interests_with_types if i['type'] == 'keyword']
648
  topics = [i['name'] for i in interests_with_types if i['type'] == 'topic']
649
 
650
+ # Create interests display string
651
  interests_display = []
652
  if keywords:
653
  interests_display.append(f"Keywords: {format_interest_list(set(keywords), max_items=3)}")
 
655
  interests_display.append(f"Topics: {format_interest_list(set(topics), max_items=3)}")
656
  interests_str = " | ".join(interests_display) if interests_display else "No common interests found"
657
 
658
+ # Calculate relevance score display
659
  relevance_score = q.get('relevance_score', 0)
660
  score_display = f"""
661
  <div class="relevance-score" style="display: inline-block; padding: 4px 8px; background: rgba(59, 130, 246, 0.2); border-radius: 4px; margin-left: 10px;">
 
663
  </div>
664
  """ if relevance_score > 0 else ""
665
 
666
+ # Create the question card HTML
667
+ question_html = f"""
668
  <div class="question-card" style="background: rgba(51, 65, 85, 0.5); padding: 20px; border-radius: 8px; margin: 15px 0; border: 1px solid rgba(148, 163, 184, 0.2);">
669
  <div class="question-header" style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px;">
670
  <div style="flex: 1; display: flex; align-items: center;">
 
685
  </div>
686
  """
687
 
688
+ return question_html
689
+
690
  except Exception as e:
691
  logger.error(f"Error formatting question: {str(e)}")
692
  return f"""
 
695
  </div>
696
  """
697
 
698
+ def loading_message() -> Tuple[str, str, str]:
699
+ """Return loading message in proper HTML format."""
700
+ loading_html = """
701
+ <div class="loading-spinner">
702
+ <div style="text-align: center;">
703
+ <div style="border: 4px solid #60a5fa; border-top: 4px solid transparent; border-radius: 50%; width: 40px; height: 40px; animation: spin 1s linear infinite; margin: 20px auto;"></div>
704
+ <div style="color: #60a5fa; margin-top: 10px;">Analyzing interests and finding recommendations...</div>
705
+ </div>
706
+ </div>
707
+ """
708
+ return loading_html, loading_html, loading_html
709
+
710
  def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dict]]:
711
  """Main function to get recommendations and user interests."""
712
+ # Extract actual usernames from the formatted strings
713
  user1 = user1.split(" (")[0] if " (" in user1 else user1
714
  user2 = user2.split(" (")[0] if " (" in user2 else user2
715
 
716
  recommender = QuestionRecommender()
717
  try:
718
+ # Get interests for both users
719
  user1_interests = recommender.get_user_interests(user1)
720
  user2_interests = recommender.get_user_interests(user2)
721
 
722
+ # Find common interests
723
  common_keywords = user1_interests['keywords'] & user2_interests['keywords']
724
  common_topics = user1_interests['topics'] & user2_interests['topics']
725
 
726
+ # Format interests summary
727
  interests_summary = f"""
728
+ <div class="interests-summary">
729
+ <div class="user-interests">
730
+ <h3>{user1}'s Interests</h3>
731
+ <div class="interest-section">
732
+ <strong>Keywords:</strong> {format_interest_list(user1_interests['keywords'], max_items=8)}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
733
  </div>
734
+ <div class="interest-section">
735
+ <strong>Topics:</strong> {format_interest_list(user1_interests['topics'], max_items=5)}
736
+ </div>
737
+ </div>
738
+
739
+ <div class="user-interests">
740
+ <h3>{user2}'s Interests</h3>
741
+ <div class="interest-section">
742
+ <strong>Keywords:</strong> {format_interest_list(user2_interests['keywords'], max_items=8)}
743
+ </div>
744
+ <div class="interest-section">
745
+ <strong>Topics:</strong> {format_interest_list(user2_interests['topics'], max_items=5)}
746
+ </div>
747
+ </div>
748
+
749
+ <div class="common-interests">
750
+ <h3>Common Interests</h3>
751
+ <div class="interest-section">
752
+ <strong>Keywords:</strong> {format_interest_list(common_keywords, max_items=8)}
753
+ </div>
754
+ <div class="interest-section">
755
+ <strong>Topics:</strong> {format_interest_list(common_topics, max_items=5)}
756
+ </div>
757
+ </div>
758
+ </div>
759
+ """
760
 
761
+ # Get all recommended questions
762
  questions = recommender.find_common_questions(user1, user2, max_questions=50)
763
 
764
  if questions:
 
784
  finally:
785
  recommender.close()
786
 
787
+ # Custom CSS for better styling
788
  custom_css = """
789
  .gradio-container {
790
  max-width: 1200px !important;
 
962
  """
963
 
964
  def main():
965
+ # Create Gradio interface
966
  recommender = QuestionRecommender()
967
  users = recommender.get_all_users()
968
  recommender.close()
969
 
970
+ with gr.Blocks(title="Question Recommender (Local Debug)", theme=gr.themes.Soft(), css=custom_css) as iface:
971
  gr.Markdown("""
972
+ # 🤝 Question Recommender (Local Debug Version)
973
  Find questions that two users might be interested in discussing together based on their common interests.
974
+
975
+ > This is the local debug version using the test database.
976
  """)
977
 
978
  with gr.Row(equal_height=True):
 
1003
  recommendation_type = gr.HTML()
1004
  questions_output = gr.HTML()
1005
 
1006
+ def recommend_and_store(user1, user2):
1007
+ """Get recommendations and store questions."""
1008
+ interests, rec_type, questions_html, questions_data = recommend_questions(user1, user2)
1009
+ return interests, rec_type, questions_html
1010
+
1011
+ # Wire up the components
1012
  recommend_btn.click(
1013
+ fn=loading_message,
1014
+ outputs=[interests_output, recommendation_type, questions_output],
1015
+ queue=False
1016
+ ).then(
1017
+ fn=recommend_and_store,
1018
  inputs=[user1_dropdown, user2_dropdown],
1019
  outputs=[interests_output, recommendation_type, questions_output]
1020
  )