NimaKL commited on
Commit
cba807f
Β·
verified Β·
1 Parent(s): cffd762

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +732 -224
app.py CHANGED
@@ -4,10 +4,13 @@ import logging
4
  from typing import List, Dict, Tuple
5
  import pandas as pd
6
  from datetime import datetime
7
- import os
8
 
9
- # Set up logging
10
- logging.basicConfig(level=logging.INFO)
 
 
 
 
11
  logger = logging.getLogger(__name__)
12
 
13
  # Get Neo4j credentials from Hugging Face secrets
@@ -18,10 +21,13 @@ NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
18
  def format_neo4j_datetime(dt) -> str:
19
  """Convert Neo4j datetime to string format."""
20
  if dt is None:
 
21
  return 'Unknown date'
22
  try:
 
23
  if hasattr(dt, 'to_native'):
24
  dt = dt.to_native()
 
25
  return dt.strftime('%Y-%m-%d')
26
  except Exception as e:
27
  logger.warning(f"Error formatting datetime: {e}")
@@ -38,23 +44,177 @@ def format_interest_list(interests: set, max_items: int = 10) -> str:
38
 
39
  class QuestionRecommender:
40
  def __init__(self):
41
- self.driver = GraphDatabase.driver(
42
- NEO4J_URL,
43
- auth=(NEO4J_USER, NEO4J_PASSWORD)
44
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
46
  def close(self):
47
  self.driver.close()
48
 
49
  def get_all_users(self) -> List[str]:
50
- """Get list of all users."""
51
  with self.driver.session() as session:
52
- result = session.run("""
53
- MATCH (u:User)
54
- RETURN DISTINCT u.name as username
55
- ORDER BY username
56
- """)
57
- return [record["username"] for record in result if record["username"]]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
  def get_user_interests(self, username: str) -> Dict[str, set]:
60
  """Get keywords and topics a user is interested in."""
@@ -65,6 +225,9 @@ class QuestionRecommender:
65
  RETURN DISTINCT k.keyword as keyword
66
  """, username=username)
67
  keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
 
 
 
68
 
69
  # Get topics the user is interested in
70
  topic_result = session.run("""
@@ -72,139 +235,464 @@ class QuestionRecommender:
72
  RETURN DISTINCT t.topic as topic
73
  """, username=username)
74
  topics = {str(record["topic"]) for record in topic_result if record["topic"]}
 
 
 
75
 
76
  return {"keywords": keywords or set(), "topics": topics or set()}
77
 
78
  def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
79
- """Find questions to recommend based on common interests."""
80
  with self.driver.session() as session:
81
- # First try to find questions with common keywords, but be more selective
82
- keyword_questions = session.run("""
83
- // Find keywords that both users are interested in
84
- MATCH (u1:User {name: $user1})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)<-[:INTERESTED_IN_KEYWORD]-(u2:User {name: $user2})
85
-
86
- // Calculate how specific each keyword is based on total user interest
87
- MATCH (anyUser:User)-[:INTERESTED_IN_KEYWORD]->(k)
88
- WITH k, COUNT(anyUser) as keyword_popularity
89
- WHERE keyword_popularity < 1000 // Filter out extremely common keywords
90
-
91
- // Find questions with these more specific common keywords
92
- MATCH (q:Question)-[:HAS_KEYWORD]->(k)
93
- WHERE q.author <> $user1 AND q.author <> $user2
94
-
95
- // Group questions and calculate relevance
96
- WITH q, k, 1.0/keyword_popularity as keyword_specificity
97
- WITH q,
98
- COLLECT(DISTINCT k.keyword) as matching_keywords,
99
- SUM(keyword_specificity) as relevance_score
100
-
101
- // Ensure we have enough matching keywords but not too many
102
- WHERE SIZE(matching_keywords) >= 2 AND SIZE(matching_keywords) <= 5
103
 
104
- // Return questions with their details
105
- RETURN DISTINCT
106
- q.title as title,
107
- q.body as body,
108
- q.created_utc_ts as created_date,
109
- q.author as author,
110
- matching_keywords as keywords,
111
- relevance_score
112
- ORDER BY relevance_score DESC, q.created_utc_ts DESC
113
- LIMIT 25
114
- """, user1=user1, user2=user2)
115
 
116
- questions = [dict(record) for record in keyword_questions]
 
 
 
 
117
 
118
- # If no questions found with common keywords, try topics
119
- if not questions:
120
- topic_questions = session.run("""
121
- // Find topics that both users are interested in
122
- MATCH (u1:User {name: $user1})-[:INTERESTED_IN_TOPIC]->(t:Topic)<-[:INTERESTED_IN_TOPIC]-(u2:User {name: $user2})
123
- WITH DISTINCT t
124
-
125
- // Find questions with these common topics
126
- MATCH (q:Question)-[:HAS_TOPIC]->(t)
127
- WHERE q.author <> $user1 AND q.author <> $user2
128
-
129
- // Group questions and calculate relevance
130
- WITH q, COLLECT(DISTINCT t.topic) as matching_topics
131
- WITH q, matching_topics, SIZE(matching_topics) as relevance_score
132
-
133
- // Return questions with their details
134
- RETURN DISTINCT
135
- q.title as title,
136
- q.body as body,
137
- q.created_utc_ts as created_date,
138
- q.author as author,
139
- matching_topics as topics,
140
- relevance_score
141
- ORDER BY relevance_score DESC, q.created_utc_ts DESC
142
- LIMIT $limit
143
- """, user1=user1, user2=user2, limit=max_questions)
 
 
 
 
144
 
145
- questions = [dict(record) for record in topic_questions]
146
-
147
- # Post-process to remove duplicate/similar questions and ensure diversity
148
- seen_titles = set()
149
- filtered_questions = []
150
- for q in questions:
151
- # Create a simplified version of the title for comparison
152
- simple_title = q['title'].lower().strip()
 
 
 
 
 
 
 
 
 
153
 
154
- # Skip if we've seen a very similar title
155
- if any(self._titles_are_similar(simple_title, seen) for seen in seen_titles):
156
- continue
157
 
158
- seen_titles.add(simple_title)
159
- filtered_questions.append(q)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
 
161
- if len(filtered_questions) >= max_questions:
162
- break
163
-
164
- return filtered_questions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
- def _titles_are_similar(self, title1: str, title2: str, similarity_threshold: float = 0.8) -> bool:
167
- """Check if two titles are very similar to avoid recommending duplicate questions."""
168
- # Remove common punctuation and convert to set of words
169
- words1 = set(title1.replace('?', '').replace('!', '').replace('.', '').split())
170
- words2 = set(title2.replace('?', '').replace('!', '').replace('.', '').split())
171
 
172
- # Calculate Jaccard similarity
173
- intersection = len(words1 & words2)
174
- union = len(words1 | words2)
175
 
176
- if union == 0:
177
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- return intersection / union >= similarity_threshold
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
181
  def format_question(q: Dict) -> str:
182
- """Format a question for display."""
183
- created_date = format_neo4j_datetime(q.get('created_date'))
184
- keywords_or_topics = q.get('keywords', q.get('topics', []))
185
- interests = format_interest_list(set(k for k in keywords_or_topics if k is not None), max_items=5)
186
- author = q.get('author', 'Unknown author')
187
- title = q.get('title', 'Untitled')
188
- body = q.get('body', '')
189
-
190
- # Only show body section if there's actual content
191
- body_html = f"""
192
- <div class="question-body">
193
- {body[:300] + "... [truncated]" if body and len(body) > 300 else body}
194
- </div>
195
- """ if body else ""
196
-
197
- return f"""
198
- <div class="question-card">
199
- <h3>{title}</h3>
200
- <div class="question-meta">
201
- Posted by <span class="author">{author}</span> on <span class="date">{created_date}</span>
202
- </div>
203
- <div class="interests">
204
- Common Interests: <span class="interest-tags">{interests}</span>
205
- </div>{body_html}
206
- </div>
207
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
 
209
  def loading_message() -> Tuple[str, str, str]:
210
  """Return loading message in proper HTML format."""
@@ -218,8 +706,12 @@ def loading_message() -> Tuple[str, str, str]:
218
  """
219
  return loading_html, loading_html, loading_html
220
 
221
- def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
222
  """Main function to get recommendations and user interests."""
 
 
 
 
223
  recommender = QuestionRecommender()
224
  try:
225
  # Get interests for both users
@@ -232,62 +724,61 @@ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
232
 
233
  # Format interests summary
234
  interests_summary = f"""
235
- <div class="interests-summary">
236
- <div class="user-interests">
237
- <h3>{user1}'s Interests</h3>
238
- <div class="interest-section">
239
- <strong>Keywords:</strong> {format_interest_list(user1_interests['keywords'], max_items=8)}
240
- </div>
241
- <div class="interest-section">
242
- <strong>Topics:</strong> {format_interest_list(user1_interests['topics'], max_items=5)}
243
- </div>
244
- </div>
245
-
246
- <div class="user-interests">
247
- <h3>{user2}'s Interests</h3>
248
- <div class="interest-section">
249
- <strong>Keywords:</strong> {format_interest_list(user2_interests['keywords'], max_items=8)}
250
- </div>
251
- <div class="interest-section">
252
- <strong>Topics:</strong> {format_interest_list(user2_interests['topics'], max_items=5)}
253
- </div>
254
- </div>
255
-
256
- <div class="common-interests">
257
- <h3>Common Interests</h3>
258
- <div class="interest-section">
259
- <strong>Keywords:</strong> {format_interest_list(common_keywords, max_items=8)}
260
- </div>
261
- <div class="interest-section">
262
- <strong>Topics:</strong> {format_interest_list(common_topics, max_items=5)}
 
 
263
  </div>
264
- </div>
265
- </div>
266
- """
267
 
268
- # Get recommended questions
269
- questions = recommender.find_common_questions(user1, user2)
270
 
271
  if questions:
272
- questions_text = """<div class="questions-container">\n""" + \
273
- """\n""".join(format_question(q) for q in questions) + \
274
- """\n</div>"""
275
- recommendation_type = """<h2 class="recommendation-header">""" + \
276
- ("Recommendations Based on Common Keywords" if 'keywords' in questions[0]
277
- else "Recommendations Based on Common Topics") + \
278
- """</h2>"""
279
  else:
280
- questions_text = """<div class="no-questions">No questions found based on common interests.</div>"""
281
- recommendation_type = """<h2 class="recommendation-header">No Recommendations Available</h2>"""
282
 
283
- return interests_summary, recommendation_type, questions_text
284
 
285
  except Exception as e:
286
  logger.error(f"Error in recommend_questions: {str(e)}")
287
  return (
288
- """<div class="error">Error fetching user interests. Please try again.</div>""",
289
- """<h2 class="error-header">Error</h2>""",
290
- f"""<div class="error-message">An error occurred: {str(e)}</div>"""
 
291
  )
292
  finally:
293
  recommender.close()
@@ -469,54 +960,71 @@ strong {
469
  }
470
  """
471
 
472
- # Create Gradio interface
473
- recommender = QuestionRecommender()
474
- users = recommender.get_all_users()
475
- recommender.close()
 
476
 
477
- with gr.Blocks(title="Question Recommender", theme=gr.themes.Soft(), css=custom_css) as iface:
478
- gr.Markdown("""
479
- # 🀝 Question Recommender
480
- Find questions that two users might be interested in discussing together based on their common interests.
481
- """)
482
-
483
- with gr.Row(equal_height=True):
484
- with gr.Column(scale=1):
485
- user1_dropdown = gr.Dropdown(
486
- choices=users,
487
- label="πŸ‘€ First User",
488
- interactive=True,
489
- max_choices=None
490
- )
491
- with gr.Column(scale=1):
492
- user2_dropdown = gr.Dropdown(
493
- choices=users,
494
- label="πŸ‘€ Second User",
495
- interactive=True,
496
- max_choices=None
497
- )
498
-
499
- recommend_btn = gr.Button(
500
- "πŸ” Get Recommendations",
501
- variant="primary",
502
- size="lg"
503
- )
504
-
505
- with gr.Row():
506
- interests_output = gr.HTML(label="Common Interests")
507
-
508
- recommendation_type = gr.HTML()
509
- questions_output = gr.HTML()
510
-
511
- # Add loading state
512
- recommend_btn.click(
513
- fn=loading_message, # First show loading message
514
- outputs=[interests_output, recommendation_type, questions_output],
515
- queue=False # Don't queue this call
516
- ).then( # Then get the actual recommendations
517
- fn=recommend_questions,
518
- inputs=[user1_dropdown, user2_dropdown],
519
- outputs=[interests_output, recommendation_type, questions_output]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
520
  )
521
 
522
- iface.launch()
 
 
4
  from typing import List, Dict, Tuple
5
  import pandas as pd
6
  from datetime import datetime
 
7
 
8
+ # Set up logging with more detailed format for debugging
9
+ logging.basicConfig(
10
+ level=logging.INFO,
11
+ format='%(asctime)s - %(levelname)s - %(message)s',
12
+ datefmt='%Y-%m-%d %H:%M:%S'
13
+ )
14
  logger = logging.getLogger(__name__)
15
 
16
  # Get Neo4j credentials from Hugging Face secrets
 
21
  def format_neo4j_datetime(dt) -> str:
22
  """Convert Neo4j datetime to string format."""
23
  if dt is None:
24
+ logger.info("Received None datetime")
25
  return 'Unknown date'
26
  try:
27
+ logger.info(f"Formatting datetime: {dt} of type {type(dt)}")
28
  if hasattr(dt, 'to_native'):
29
  dt = dt.to_native()
30
+ logger.info(f"Converted to native: {dt} of type {type(dt)}")
31
  return dt.strftime('%Y-%m-%d')
32
  except Exception as e:
33
  logger.warning(f"Error formatting datetime: {e}")
 
44
 
45
  class QuestionRecommender:
46
  def __init__(self):
47
+ try:
48
+ self.driver = GraphDatabase.driver(
49
+ NEO4J_URL,
50
+ auth=(NEO4J_USER, NEO4J_PASSWORD)
51
+ )
52
+ logger.info("Initializing QuestionRecommender with debug database")
53
+ # Test connection immediately
54
+ self.driver.verify_connectivity()
55
+ logger.info("Successfully connected to Neo4j database")
56
+ self.verify_connection()
57
+ # Inspect question types on initialization
58
+ self.inspect_question_types()
59
+ except Exception as e:
60
+ logger.error(f"Failed to initialize database connection: {str(e)}")
61
+ raise
62
+
63
+ def verify_connection(self):
64
+ """Verify database connection and log basic statistics."""
65
+ try:
66
+ with self.driver.session() as session:
67
+ # First try a simple query to verify connection
68
+ test_result = session.run("MATCH (n) RETURN count(n) as count").single()
69
+ if not test_result:
70
+ raise Exception("Could not execute test query")
71
+ logger.info(f"Database contains {test_result['count']} total nodes")
72
+
73
+ # Get database statistics with relationship counts
74
+ stats = session.run("""
75
+ // Count nodes
76
+ MATCH (u:User)
77
+ WITH COUNT(u) as user_count
78
+ MATCH (k:Keyword)
79
+ WITH user_count, COUNT(k) as keyword_count
80
+ MATCH (q:Question)
81
+ WITH user_count, keyword_count, COUNT(q) as question_count
82
+ MATCH (t:Topic)
83
+ WITH user_count, keyword_count, question_count, COUNT(t) as topic_count
84
+
85
+ // Count relationships
86
+ OPTIONAL MATCH ()-[r:INTERESTED_IN_KEYWORD]->()
87
+ WITH user_count, keyword_count, question_count, topic_count, COUNT(r) as keyword_rel_count
88
+ OPTIONAL MATCH ()-[r:INTERESTED_IN_TOPIC]->()
89
+ WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, COUNT(r) as topic_rel_count
90
+ OPTIONAL MATCH ()-[r:HAS_KEYWORD]->()
91
+ WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, topic_rel_count, COUNT(r) as question_keyword_count
92
+ OPTIONAL MATCH ()-[r:HAS_TOPIC]->()
93
+ RETURN
94
+ user_count, keyword_count, question_count, topic_count,
95
+ keyword_rel_count, topic_rel_count,
96
+ question_keyword_count, COUNT(r) as question_topic_count
97
+ """).single()
98
+
99
+ if not stats:
100
+ raise Exception("Could not retrieve database statistics")
101
+
102
+ logger.info("=== Database Statistics ===")
103
+ logger.info(f"Nodes:")
104
+ logger.info(f" Users: {stats['user_count']}")
105
+ logger.info(f" Keywords: {stats['keyword_count']}")
106
+ logger.info(f" Questions: {stats['question_count']}")
107
+ logger.info(f" Topics: {stats['topic_count']}")
108
+ logger.info(f"\nRelationships:")
109
+ logger.info(f" User->Keyword (INTERESTED_IN_KEYWORD): {stats['keyword_rel_count']}")
110
+ logger.info(f" User->Topic (INTERESTED_IN_TOPIC): {stats['topic_rel_count']}")
111
+ logger.info(f" Question->Keyword (HAS_KEYWORD): {stats['question_keyword_count']}")
112
+ logger.info(f" Question->Topic (HAS_TOPIC): {stats['question_topic_count']}")
113
+
114
+ except Exception as e:
115
+ logger.error(f"Database verification failed: {str(e)}")
116
+ logger.error(f"URL: {NEO4J_URL}")
117
+ logger.error(f"User: {NEO4J_USER}")
118
+ raise Exception(f"Failed to verify database connection: {str(e)}")
119
+
120
+ def inspect_question_types(self):
121
+ """Inspect different types of questions and their attributes in the database."""
122
+ with self.driver.session() as session:
123
+ try:
124
+ # Get all distinct question types and their properties
125
+ result = session.run("""
126
+ MATCH (q:Question)
127
+ WITH DISTINCT keys(q) as props, labels(q) as types
128
+ RETURN types, props, count(*) as count
129
+ ORDER BY count DESC
130
+ """)
131
+
132
+ logger.info("\n=== Question Types and Properties ===")
133
+ for record in result:
134
+ types = record["types"]
135
+ props = record["props"]
136
+ count = record["count"]
137
+ logger.info(f"\nType: {types}")
138
+ logger.info(f"Count: {count}")
139
+ logger.info("Properties:")
140
+ for prop in props:
141
+ # Get a sample value for this property
142
+ sample = session.run("""
143
+ MATCH (q:Question)
144
+ WHERE $prop in keys(q)
145
+ RETURN q[$prop] as value
146
+ LIMIT 1
147
+ """, prop=prop).single()
148
+
149
+ value = sample["value"] if sample else None
150
+ value_type = type(value).__name__ if value is not None else "None"
151
+ logger.info(f" - {prop}: {value_type} (example: {str(value)[:100]}{'...' if str(value)[100:] else ''})")
152
+
153
+ # Get relationships specific to different question types
154
+ result = session.run("""
155
+ MATCH (q:Question)-[r]->(target)
156
+ WITH DISTINCT type(r) as rel_type, labels(target) as target_labels, count(*) as count
157
+ RETURN rel_type, target_labels, count
158
+ ORDER BY count DESC
159
+ """)
160
+
161
+ logger.info("\n=== Question Relationships ===")
162
+ for record in result:
163
+ rel_type = record["rel_type"]
164
+ target_labels = record["target_labels"]
165
+ count = record["count"]
166
+ logger.info(f"Relationship: {rel_type} -> {target_labels} (Count: {count})")
167
+
168
+ except Exception as e:
169
+ logger.error(f"Error inspecting question types: {str(e)}")
170
+ raise
171
 
172
  def close(self):
173
  self.driver.close()
174
 
175
  def get_all_users(self) -> List[str]:
176
+ """Get list of all users with interest counts."""
177
  with self.driver.session() as session:
178
+ try:
179
+ # Get users with their interest counts using proper relationship patterns
180
+ result = session.run("""
181
+ MATCH (u:User)
182
+ OPTIONAL MATCH (u)-[r:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest)
183
+ WITH u,
184
+ COUNT(DISTINCT CASE WHEN type(r) = 'INTERESTED_IN_KEYWORD' THEN interest END) as keyword_count,
185
+ COUNT(DISTINCT CASE WHEN type(r) = 'INTERESTED_IN_TOPIC' THEN interest END) as topic_count
186
+ WHERE keyword_count > 0 OR topic_count > 0
187
+ RETURN
188
+ u.name as username,
189
+ keyword_count,
190
+ topic_count,
191
+ keyword_count + topic_count as total_interests
192
+ ORDER BY total_interests DESC, username
193
+ """)
194
+
195
+ users_with_counts = [(
196
+ record["username"],
197
+ record["keyword_count"],
198
+ record["topic_count"]
199
+ ) for record in result if record["username"]]
200
+
201
+ if not users_with_counts:
202
+ logger.warning("No users found with interests")
203
+ return []
204
+
205
+ logger.info(f"Retrieved {len(users_with_counts)} users with interests")
206
+ logger.info("Top 5 users by interest count:")
207
+ for username, kw_count, topic_count in users_with_counts[:5]:
208
+ logger.info(f" - {username}: {kw_count} keywords, {topic_count} topics")
209
+
210
+ # Format usernames with their counts
211
+ return [
212
+ f"{username} ({kw_count} keywords, {topic_count} topics)"
213
+ for username, kw_count, topic_count in users_with_counts
214
+ ]
215
+ except Exception as e:
216
+ logger.error(f"Error fetching users: {str(e)}")
217
+ return []
218
 
219
  def get_user_interests(self, username: str) -> Dict[str, set]:
220
  """Get keywords and topics a user is interested in."""
 
225
  RETURN DISTINCT k.keyword as keyword
226
  """, username=username)
227
  keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
228
+
229
+ # Log keyword count for debugging
230
+ logger.debug(f"Found {len(keywords)} keywords for user {username}")
231
 
232
  # Get topics the user is interested in
233
  topic_result = session.run("""
 
235
  RETURN DISTINCT t.topic as topic
236
  """, username=username)
237
  topics = {str(record["topic"]) for record in topic_result if record["topic"]}
238
+
239
+ # Log topic count for debugging
240
+ logger.debug(f"Found {len(topics)} topics for user {username}")
241
 
242
  return {"keywords": keywords or set(), "topics": topics or set()}
243
 
244
  def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
245
+ """Find questions to recommend based on common interests using advanced Neo4j features."""
246
  with self.driver.session() as session:
247
+ # Debug: Check if users exist and have interests
248
+ user_check = session.run("""
249
+ MATCH (u1:User {name: $user1})
250
+ MATCH (u2:User {name: $user2})
251
+ OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
252
+ OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
253
+ RETURN
254
+ COUNT(DISTINCT u1) as user1_exists,
255
+ COUNT(DISTINCT u2) as user2_exists,
256
+ COUNT(DISTINCT interest1) as user1_interests,
257
+ COUNT(DISTINCT interest2) as user2_interests
258
+ """, user1=user1, user2=user2).single()
259
+
260
+ if not (user_check and user_check['user1_exists'] and user_check['user2_exists']):
261
+ logger.error(f"One or both users not found: {user1}, {user2}")
262
+ return []
 
 
 
 
 
 
263
 
264
+ logger.info(f"User {user1} has {user_check['user1_interests']} total interests")
265
+ logger.info(f"User {user2} has {user_check['user2_interests']} total interests")
 
 
 
 
 
 
 
 
 
266
 
267
+ # Advanced question recommendation query using Neo4j path finding and scoring
268
+ questions_query = """
269
+ // Find all interests (both keywords and topics) for both users
270
+ MATCH (u1:User {name: $user1})
271
+ MATCH (u2:User {name: $user2})
272
 
273
+ // Get all interests for both users
274
+ OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
275
+ OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
276
+ WITH u1, u2,
277
+ COLLECT(DISTINCT interest1) as u1_interests,
278
+ COLLECT(DISTINCT interest2) as u2_interests
279
+
280
+ // Find questions related to either user's interests for each source
281
+ CALL {
282
+ WITH u1, u2, u1_interests, u2_interests
283
+ UNWIND u1_interests + u2_interests as interest
284
+ MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
285
+ WHERE
286
+ q.author <> $user1 AND
287
+ q.author <> $user2 AND
288
+ q.source = 'stack_exchange' AND
289
+ (
290
+ (interest IN u1_interests AND interest IN u2_interests) OR
291
+ (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
292
+ (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
293
+ )
294
+ WITH q, interest, type(r) as rel_type,
295
+ CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
296
+ WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
297
+ sum(interest_weight) as base_score
298
+ RETURN q, interests, base_score
299
+ ORDER BY base_score * rand() DESC
300
+ LIMIT 15 // Increased from 10 to get more variety
301
+
302
+ UNION
303
 
304
+ WITH u1, u2, u1_interests, u2_interests
305
+ UNWIND u1_interests + u2_interests as interest
306
+ MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
307
+ WHERE
308
+ q.source = 'trivia' AND
309
+ (
310
+ (interest IN u1_interests AND interest IN u2_interests) OR
311
+ (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
312
+ (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
313
+ )
314
+ WITH q, interest, type(r) as rel_type,
315
+ CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
316
+ WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
317
+ sum(interest_weight) as base_score
318
+ RETURN q, interests, base_score
319
+ ORDER BY base_score * rand() DESC
320
+ LIMIT 15 // Increased from 10 to get more variety
321
 
322
+ UNION
 
 
323
 
324
+ WITH u1, u2, u1_interests, u2_interests
325
+ UNWIND u1_interests + u2_interests as interest
326
+ MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
327
+ WHERE
328
+ q.source = 'wikipedia' AND
329
+ (
330
+ (interest IN u1_interests AND interest IN u2_interests) OR
331
+ (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
332
+ (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
333
+ )
334
+ WITH q, interest, type(r) as rel_type,
335
+ CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
336
+ WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
337
+ sum(interest_weight) as base_score
338
+ RETURN q, interests, base_score
339
+ ORDER BY base_score * rand() DESC
340
+ LIMIT 15 // Increased from 10 to get more variety
341
 
342
+ UNION
343
+
344
+ WITH u1, u2, u1_interests, u2_interests
345
+ UNWIND u1_interests + u2_interests as interest
346
+ MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
347
+ WHERE
348
+ q.source = 'reddit' AND
349
+ (
350
+ (interest IN u1_interests AND interest IN u2_interests) OR
351
+ (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
352
+ (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
353
+ )
354
+ WITH q, interest, type(r) as rel_type,
355
+ CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
356
+ WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
357
+ sum(interest_weight) as base_score
358
+ RETURN q, interests, base_score
359
+ ORDER BY base_score * rand() DESC
360
+ LIMIT 15 // Increased from 10 to get more variety
361
+ }
362
+
363
+ // Calculate temporal relevance for the combined results
364
+ WITH q, interests, base_score,
365
+ CASE
366
+ WHEN q.created_utc_ts IS NOT NULL
367
+ THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
368
+ ELSE base_score
369
+ END as temporal_score,
370
+ // Add source-specific random boost to ensure better mixing
371
+ CASE q.source
372
+ WHEN 'stack_exchange' THEN rand() * 0.4
373
+ WHEN 'trivia' THEN rand() * 0.4
374
+ WHEN 'wikipedia' THEN rand() * 0.4
375
+ WHEN 'reddit' THEN rand() * 0.4
376
+ ELSE rand() * 0.4
377
+ END as source_random_boost
378
+
379
+ // Return results with all metadata
380
+ WITH q, interests, temporal_score, source_random_boost,
381
+ temporal_score * (0.6 + 0.8 * rand()) + source_random_boost as final_score
382
+ RETURN DISTINCT
383
+ q.title as title,
384
+ q.body as body,
385
+ q.created_utc_ts as created_utc_ts,
386
+ q.author as author,
387
+ q.source as source,
388
+ q.correct_answer as correct_answer,
389
+ q.incorrect_answers as incorrect_answers,
390
+ q.upvotes as upvotes,
391
+ q.num_comments as num_comments,
392
+ q.subreddit as subreddit,
393
+ [i in interests | CASE
394
+ WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
395
+ ELSE i.interest.topic
396
+ END] as matching_interests,
397
+ [i in interests | CASE
398
+ WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
399
+ ELSE 'topic'
400
+ END] as interest_types,
401
+ final_score as relevance_score
402
+ ORDER BY final_score DESC
403
+ LIMIT $max_questions
404
+ """
405
+
406
+ questions = [dict(record) for record in session.run(questions_query,
407
+ user1=user1,
408
+ user2=user2,
409
+ max_questions=max_questions)]
410
+
411
+ if questions:
412
+ first_q = questions[0]
413
+ logger.info(f"Sample question:")
414
+ logger.info(f"Title: {first_q.get('title', 'No title')}")
415
+ logger.info(f"Author: {first_q.get('author', 'No author')}")
416
+ logger.info(f"Score: {first_q.get('relevance_score', 0)}")
417
+ logger.info(f"Interests: {first_q.get('matching_interests', [])}")
418
+
419
+ logger.info(f"Found {len(questions)} questions with common interests")
420
+ return questions
421
 
422
+ def process_body(text, title):
423
+ """Process question body to handle images and HTML."""
424
+ if not text:
425
+ logger.warning(f"Empty body for question: {title}")
426
+ return ""
427
 
428
+ try:
429
+ from bs4 import BeautifulSoup
 
430
 
431
+ # Parse the HTML content
432
+ soup = BeautifulSoup(str(text), 'html.parser')
433
+
434
+ # Function to fix Stack Exchange URLs
435
+ def fix_stack_exchange_url(url):
436
+ if not url:
437
+ return url
438
+ if url.startswith(('http://', 'https://')):
439
+ return url
440
+ if url.startswith('//'):
441
+ return 'https:' + url
442
+ if url.startswith('/'):
443
+ return 'https://i.stack.imgur.com' + url
444
+ return 'https://i.stack.imgur.com/' + url
445
+
446
+ # Find all img tags and replace with preview cards
447
+ for img in soup.find_all('img'):
448
+ src = img.get('src', '')
449
+ if not src:
450
+ continue
451
+
452
+ fixed_src = fix_stack_exchange_url(src)
453
+ alt_text = img.get('alt', '').strip()
454
+ if not alt_text or alt_text.lower() == 'enter image description here':
455
+ alt_text = 'Question image'
456
+
457
+ # Create an image preview card
458
+ preview_html = f"""
459
+ <div class="image-preview" style="margin: 10px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px;">
460
+ <div style="display: flex; align-items: center; margin-bottom: 8px;">
461
+ <span style="font-size: 20px; margin-right: 8px;">πŸ–ΌοΈ</span>
462
+ <span style="color: #93c5fd;">{alt_text}</span>
463
+ </div>
464
+ <a href="{fixed_src}" target="_blank" rel="noopener noreferrer"
465
+ style="color: #60a5fa; text-decoration: none;">View image</a>
466
+ </div>
467
+ """
468
 
469
+ new_soup = BeautifulSoup(preview_html, 'html.parser')
470
+ img.replace_with(new_soup)
471
+
472
+ # Style other elements
473
+ for link in soup.find_all('a'):
474
+ if 'View Image' not in (link.get_text() or ''):
475
+ href = link.get('href', '')
476
+ if href and not href.startswith(('http://', 'https://')):
477
+ link['href'] = fix_stack_exchange_url(href)
478
+ link['target'] = '_blank'
479
+ link['rel'] = 'noopener noreferrer'
480
+ link['style'] = 'color: #60a5fa; text-decoration: none;'
481
+
482
+ # Add paragraph styling
483
+ for p in soup.find_all(['p', 'div']):
484
+ if not any(cls in (p.get('class', []) or []) for cls in ['image-preview', 'question-card']):
485
+ current_style = p.get('style', '')
486
+ p['style'] = f"{current_style}; margin: 0.8em 0; line-height: 1.6; color: #e2e8f0;"
487
+
488
+ # Add list styling
489
+ for ul in soup.find_all(['ul', 'ol']):
490
+ ul['style'] = 'margin: 0.8em 0; padding-left: 1.5em; color: #e2e8f0;'
491
+
492
+ for li in soup.find_all('li'):
493
+ li['style'] = 'margin: 0.4em 0; line-height: 1.6; color: #e2e8f0;'
494
+
495
+ # Add code block styling
496
+ for code in soup.find_all(['code', 'pre']):
497
+ code['style'] = 'background: rgba(30, 41, 59, 0.5); padding: 0.2em 0.4em; border-radius: 4px; font-family: monospace; color: #e2e8f0;'
498
+
499
+ return str(soup)
500
+
501
+ except Exception as e:
502
+ logger.error(f"Error processing question body: {str(e)}")
503
+ return str(text) if text else ""
504
 
505
  def format_question(q: Dict) -> str:
506
+ """Format a question for display based on its source."""
507
+ try:
508
+ # Extract and validate basic question data
509
+ title = q.get('title', 'Untitled')
510
+ source = q.get('source', '').lower() # Convert to lowercase for consistent comparison
511
+
512
+ # Log available fields for debugging
513
+ logger.info(f"Question fields: {list(q.keys())}")
514
+ if 'created_utc_ts' in q:
515
+ logger.info(f"Raw created_utc_ts value: {q['created_utc_ts']}")
516
+
517
+ # Format metadata section based on source
518
+ metadata_html = ""
519
+ content_html = ""
520
+
521
+ # Default metadata for questions with author/date
522
+ if 'author' in q or 'created_utc_ts' in q:
523
+ author = q.get('author', 'Unknown author')
524
+ created_date = format_neo4j_datetime(q.get('created_utc_ts'))
525
+ logger.info(f"Question {title}: author={author}, date={created_date}")
526
+ upvotes = q.get('upvotes', 0)
527
+ num_comments = q.get('num_comments', 0)
528
+
529
+ metadata_html = f"""
530
+ <div class="question-meta" style="font-size: 0.9em; color: #cbd5e1; margin-bottom: 15px;">
531
+ <span style="color: #93c5fd; font-weight: 500;">{author}</span>
532
+ {' asked' if source == 'stack_exchange' else ' posted'} on
533
+ <span style="color: #94a3b8;">{created_date}</span>
534
+ <div class="stats" style="margin-top: 5px;">
535
+ <span title="Upvotes"><span style="color: #93c5fd;">β–²</span> {upvotes}</span>
536
+ <span style="margin-left: 15px;" title="Comments"><span style="color: #93c5fd;">πŸ’¬</span> {num_comments}</span>
537
+ </div>
538
+ </div>
539
+ """
540
+
541
+ # Handle content based on source and available fields
542
+ if source == "stack_exchange":
543
+ body = q.get('body', '')
544
+ if body:
545
+ content_html = f"""
546
+ <div class="question-content" style="margin-top: 20px; font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; color: #e2e8f0; line-height: 1.6;">
547
+ {process_body(body, title)}
548
+ </div>
549
+ """
550
+
551
+ elif source == "trivia":
552
+ correct_answer = q.get('correct_answer', '')
553
+ incorrect_answers = q.get('incorrect_answers', [])
554
+
555
+ # Create answer options HTML
556
+ answers = [correct_answer] + incorrect_answers if incorrect_answers else [correct_answer]
557
+ answers_html = "".join([
558
+ f"""
559
+ <div class="answer-option" style="margin: 8px 0; padding: 10px; background: rgba(51, 65, 85, 0.4); border-radius: 6px; border-left: 3px solid {'#10b981' if answer == correct_answer else '#475569'};">
560
+ <span style="color: {'#34d399' if answer == correct_answer else '#94a3b8'};">
561
+ {answer}
562
+ </span>
563
+ </div>
564
+ """
565
+ for answer in answers
566
+ ])
567
+
568
+ content_html = f"""
569
+ <div class="answers-container" style="margin-top: 15px;">
570
+ <div style="color: #94a3b8; margin-bottom: 10px;">Answer options:</div>
571
+ {answers_html}
572
+ </div>
573
+ """
574
+
575
+ elif source == "wikipedia":
576
+ correct_answer = q.get('correct_answer', '')
577
+ if correct_answer:
578
+ content_html = f"""
579
+ <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px; border-left: 3px solid #10b981;">
580
+ <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
581
+ <div style="color: #34d399;">{correct_answer}</div>
582
+ </div>
583
+ """
584
+
585
+ elif source == "reddit":
586
+ # Add subreddit to metadata if available
587
+ if 'subreddit' in q:
588
+ subreddit = q.get('subreddit', '')
589
+ metadata_html = metadata_html.replace(
590
+ 'posted on',
591
+ f'posted in <span style="color: #60a5fa; font-weight: 500;">r/{subreddit}</span> on'
592
+ )
593
+
594
+ # If no specific content is set, try to use any available content fields
595
+ if not content_html:
596
+ if 'body' in q:
597
+ content_html = f"""
598
+ <div class="question-content" style="margin-top: 20px; font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; color: #e2e8f0; line-height: 1.6;">
599
+ {process_body(q['body'], title)}
600
+ </div>
601
+ """
602
+ elif 'correct_answer' in q:
603
+ content_html = f"""
604
+ <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px;">
605
+ <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
606
+ <div style="color: #e2e8f0;">{q['correct_answer']}</div>
607
+ </div>
608
+ """
609
+
610
+ # Get source-specific icon and color
611
+ source_icon = {
612
+ 'stack_exchange': '⚑', # Lightning bolt for Stack Exchange
613
+ 'reddit': 'πŸ”Έ', # Orange diamond for Reddit
614
+ 'wikipedia': 'πŸ“š', # Books for Wikipedia
615
+ 'trivia': '🎯', # Target/bullseye for Trivia
616
+ }.get(source, '❔') # Question mark as fallback
617
+
618
+ source_color = {
619
+ 'stack_exchange': '#60a5fa', # Blue
620
+ 'reddit': '#f97316', # Orange
621
+ 'wikipedia': '#22c55e', # Green
622
+ 'trivia': '#eab308', # Yellow
623
+ }.get(source, '#60a5fa') # Default blue
624
+
625
+ # Create the source badge with icon
626
+ source_display = source.title() if source else "Unknown"
627
+ source_badge = f"""
628
+ <div class="source-badge" style="display: inline-flex; align-items: center; padding: 4px 8px; background: rgba(51, 65, 85, 0.5); border-radius: 4px; margin-right: 10px; border: 1px solid {source_color}25;">
629
+ <span style="margin-right: 6px; font-size: 1.1em;">{source_icon}</span>
630
+ <span style="color: {source_color}; font-size: 0.9em; font-weight: 500;">{source_display}</span>
631
+ </div>
632
+ """
633
+
634
+ # Handle matching interests display
635
+ matching_interests = q.get('matching_interests', [])
636
+ interest_types = q.get('interest_types', [])
637
+ interests_with_types = []
638
+ for interest, type_ in zip(matching_interests, interest_types):
639
+ if interest and type_:
640
+ interests_with_types.append({
641
+ 'name': interest,
642
+ 'type': type_
643
+ })
644
+
645
+ # Format interests by type
646
+ keywords = [i['name'] for i in interests_with_types if i['type'] == 'keyword']
647
+ topics = [i['name'] for i in interests_with_types if i['type'] == 'topic']
648
+
649
+ # Create interests display string
650
+ interests_display = []
651
+ if keywords:
652
+ interests_display.append(f"Keywords: {format_interest_list(set(keywords), max_items=3)}")
653
+ if topics:
654
+ interests_display.append(f"Topics: {format_interest_list(set(topics), max_items=3)}")
655
+ interests_str = " | ".join(interests_display) if interests_display else "No common interests found"
656
+
657
+ # Calculate relevance score display
658
+ relevance_score = q.get('relevance_score', 0)
659
+ score_display = f"""
660
+ <div class="relevance-score" style="display: inline-block; padding: 4px 8px; background: rgba(59, 130, 246, 0.2); border-radius: 4px; margin-left: 10px;">
661
+ <span style="color: #93c5fd; font-size: 0.9em;">Relevance: {relevance_score:.2f}</span>
662
+ </div>
663
+ """ if relevance_score > 0 else ""
664
+
665
+ # Create the question card HTML
666
+ question_html = f"""
667
+ <div class="question-card" style="background: rgba(51, 65, 85, 0.5); padding: 20px; border-radius: 8px; margin: 15px 0; border: 1px solid rgba(148, 163, 184, 0.2);">
668
+ <div class="question-header" style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px;">
669
+ <div style="flex: 1; display: flex; align-items: center;">
670
+ {source_badge}
671
+ <h3 style="color: #60a5fa; margin: 0; font-size: 1.4em; display: inline;">{title}</h3>
672
+ </div>
673
+ {score_display}
674
+ </div>
675
+
676
+ {metadata_html}
677
+
678
+ <div class="interests-bar" style="margin: 15px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px; border-left: 3px solid #3b82f6;">
679
+ <div style="color: #94a3b8; font-size: 0.9em;">Common Interests:</div>
680
+ <div style="color: #93c5fd; font-weight: 500; margin-top: 5px;">{interests_str}</div>
681
+ </div>
682
+
683
+ {content_html}
684
+ </div>
685
+ """
686
+
687
+ return question_html
688
+
689
+ except Exception as e:
690
+ logger.error(f"Error formatting question: {str(e)}")
691
+ return f"""
692
+ <div style="background: rgba(239, 68, 68, 0.2); padding: 15px; border-radius: 8px; margin: 10px 0; border: 1px solid rgba(239, 68, 68, 0.3);">
693
+ <div style="color: #fca5a5;">Error displaying question: {str(e)}</div>
694
+ </div>
695
+ """
696
 
697
  def loading_message() -> Tuple[str, str, str]:
698
  """Return loading message in proper HTML format."""
 
706
  """
707
  return loading_html, loading_html, loading_html
708
 
709
+ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dict]]:
710
  """Main function to get recommendations and user interests."""
711
+ # Extract actual usernames from the formatted strings
712
+ user1 = user1.split(" (")[0] if " (" in user1 else user1
713
+ user2 = user2.split(" (")[0] if " (" in user2 else user2
714
+
715
  recommender = QuestionRecommender()
716
  try:
717
  # Get interests for both users
 
724
 
725
  # Format interests summary
726
  interests_summary = f"""
727
+ <div class="interests-summary">
728
+ <div class="user-interests">
729
+ <h3>{user1}'s Interests</h3>
730
+ <div class="interest-section">
731
+ <strong>Keywords:</strong> {format_interest_list(user1_interests['keywords'], max_items=8)}
732
+ </div>
733
+ <div class="interest-section">
734
+ <strong>Topics:</strong> {format_interest_list(user1_interests['topics'], max_items=5)}
735
+ </div>
736
+ </div>
737
+
738
+ <div class="user-interests">
739
+ <h3>{user2}'s Interests</h3>
740
+ <div class="interest-section">
741
+ <strong>Keywords:</strong> {format_interest_list(user2_interests['keywords'], max_items=8)}
742
+ </div>
743
+ <div class="interest-section">
744
+ <strong>Topics:</strong> {format_interest_list(user2_interests['topics'], max_items=5)}
745
+ </div>
746
+ </div>
747
+
748
+ <div class="common-interests">
749
+ <h3>Common Interests</h3>
750
+ <div class="interest-section">
751
+ <strong>Keywords:</strong> {format_interest_list(common_keywords, max_items=8)}
752
+ </div>
753
+ <div class="interest-section">
754
+ <strong>Topics:</strong> {format_interest_list(common_topics, max_items=5)}
755
+ </div>
756
+ </div>
757
  </div>
758
+ """
 
 
759
 
760
+ # Get all recommended questions
761
+ questions = recommender.find_common_questions(user1, user2, max_questions=50)
762
 
763
  if questions:
764
+ questions_text = '<div class="questions-container">\n' + \
765
+ '\n'.join(format_question(q) for q in questions) + \
766
+ '\n</div>'
767
+
768
+ recommendation_type = '<h2 class="recommendation-header">Recommendations Based on Common Interests</h2>'
 
 
769
  else:
770
+ questions_text = '<div class="no-questions">No questions found based on common interests.</div>'
771
+ recommendation_type = '<h2 class="recommendation-header">No Recommendations Available</h2>'
772
 
773
+ return interests_summary, recommendation_type, questions_text, questions
774
 
775
  except Exception as e:
776
  logger.error(f"Error in recommend_questions: {str(e)}")
777
  return (
778
+ '<div class="error">Error fetching user interests. Please try again.</div>',
779
+ '<h2 class="error-header">Error</h2>',
780
+ f'<div class="error-message">An error occurred: {str(e)}</div>',
781
+ []
782
  )
783
  finally:
784
  recommender.close()
 
960
  }
961
  """
962
 
963
+ def main():
964
+ # Create Gradio interface
965
+ recommender = QuestionRecommender()
966
+ users = recommender.get_all_users()
967
+ recommender.close()
968
 
969
+ with gr.Blocks(title="Question Recommender (Local Debug)", theme=gr.themes.Soft(), css=custom_css) as iface:
970
+ gr.Markdown("""
971
+ # 🀝 Question Recommender (Local Debug Version)
972
+ Find questions that two users might be interested in discussing together based on their common interests.
973
+
974
+ > This is the local debug version using the test database.
975
+ """)
976
+
977
+ with gr.Row(equal_height=True):
978
+ with gr.Column(scale=1):
979
+ user1_dropdown = gr.Dropdown(
980
+ choices=users,
981
+ label="πŸ‘€ First User",
982
+ interactive=True,
983
+ elem_id="user1-input"
984
+ )
985
+ with gr.Column(scale=1):
986
+ user2_dropdown = gr.Dropdown(
987
+ choices=users,
988
+ label="πŸ‘€ Second User",
989
+ interactive=True,
990
+ elem_id="user2-input"
991
+ )
992
+
993
+ recommend_btn = gr.Button(
994
+ "πŸ” Get Recommendations",
995
+ variant="primary",
996
+ size="lg"
997
+ )
998
+
999
+ with gr.Row():
1000
+ interests_output = gr.HTML(label="Common Interests")
1001
+
1002
+ recommendation_type = gr.HTML()
1003
+ questions_output = gr.HTML()
1004
+
1005
+ def recommend_and_store(user1, user2):
1006
+ """Get recommendations and store questions."""
1007
+ interests, rec_type, questions_html, questions_data = recommend_questions(user1, user2)
1008
+ return interests, rec_type, questions_html
1009
+
1010
+ # Wire up the components
1011
+ recommend_btn.click(
1012
+ fn=loading_message,
1013
+ outputs=[interests_output, recommendation_type, questions_output],
1014
+ queue=False
1015
+ ).then(
1016
+ fn=recommend_and_store,
1017
+ inputs=[user1_dropdown, user2_dropdown],
1018
+ outputs=[interests_output, recommendation_type, questions_output]
1019
+ )
1020
+
1021
+ # Launch with additional debug info
1022
+ logger.info("Starting local debug version of Question Recommender")
1023
+ iface.launch(
1024
+ show_error=True,
1025
+ server_name="127.0.0.1",
1026
+ server_port=7860
1027
  )
1028
 
1029
+ if __name__ == "__main__":
1030
+ main()