Spaces:

NimaKL
/

LetsTalk

Runtime error

App Files Files Community

NimaKL commited on May 27

Commit

7c2dc3d

verified ·

1 Parent(s): ef3d758

Update app.py

Browse files

Files changed (1) hide show

app.py +504 -188

app.py CHANGED Viewed

@@ -5,14 +5,12 @@ from typing import List, Dict, Tuple
 import pandas as pd
 from datetime import datetime
 import os
-import re
-from html import escape
-from bs4 import BeautifulSoup
-# Set up basic logging
 logging.basicConfig(
-    level=logging.WARNING,
-    format='%(levelname)s: %(message)s'
 )
 logger = logging.getLogger(__name__)
@@ -21,16 +19,16 @@ NEO4J_URL = os.getenv("NEO4J_URL")
 NEO4J_USER = os.getenv("NEO4J_USER")
 NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
-if not all([NEO4J_URL, NEO4J_USER, NEO4J_PASSWORD]):
-    raise ValueError("Missing required environment variables for database connection")
 def format_neo4j_datetime(dt) -> str:
     """Convert Neo4j datetime to string format."""
     if dt is None:
         return 'Unknown date'
     try:
         if hasattr(dt, 'to_native'):
             dt = dt.to_native()
         return dt.strftime('%Y-%m-%d')
     except Exception as e:
         logger.warning(f"Error formatting datetime: {e}")
@@ -45,85 +43,6 @@ def format_interest_list(interests: set, max_items: int = 10) -> str:
         return ', '.join(sorted_interests)
     return f"{', '.join(sorted_interests[:max_items])} (+{len(sorted_interests) - max_items} more)"
-def process_body(text, title):
-    """Process question body to handle images and HTML."""
-    if not text:
-        return ""
-    try:
-        # Parse the HTML content
-        soup = BeautifulSoup(str(text), 'html.parser')
-        # Function to fix Stack Exchange URLs
-        def fix_stack_exchange_url(url):
-            if not url:
-                return url
-            if url.startswith(('http://', 'https://')):
-                return url
-            if url.startswith('//'):
-                return 'https:' + url
-            if url.startswith('/'):
-                return 'https://i.stack.imgur.com' + url
-            return 'https://i.stack.imgur.com/' + url
-        # Find all img tags and replace with preview cards
-        for img in soup.find_all('img'):
-            src = img.get('src', '')
-            if not src:
-                continue
-            fixed_src = fix_stack_exchange_url(src)
-            alt_text = img.get('alt', '').strip()
-            if not alt_text or alt_text.lower() == 'enter image description here':
-                alt_text = 'Question image'
-            preview_html = f"""
-            <div class="image-preview" style="margin: 10px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px;">
-                <div style="display: flex; align-items: center; margin-bottom: 8px;">
-                    <span style="font-size: 20px; margin-right: 8px;">🖼️</span>
-                    <span style="color: #93c5fd;">{alt_text}</span>
-                </div>
-                <a href="{fixed_src}" target="_blank" rel="noopener noreferrer"
-                   style="color: #60a5fa; text-decoration: none;">View image</a>
-            </div>
-            """
-            new_soup = BeautifulSoup(preview_html, 'html.parser')
-            img.replace_with(new_soup)
-        # Style other elements
-        for link in soup.find_all('a'):
-            if 'View Image' not in (link.get_text() or ''):
-                href = link.get('href', '')
-                if href and not href.startswith(('http://', 'https://')):
-                    link['href'] = fix_stack_exchange_url(href)
-                link['target'] = '_blank'
-                link['rel'] = 'noopener noreferrer'
-                link['style'] = 'color: #60a5fa; text-decoration: none;'
-        # Add paragraph styling
-        for p in soup.find_all(['p', 'div']):
-            if not any(cls in (p.get('class', []) or []) for cls in ['image-preview', 'question-card']):
-                current_style = p.get('style', '')
-                p['style'] = f"{current_style}; margin: 0.8em 0; line-height: 1.6; color: #e2e8f0;"
-        # Add list styling
-        for ul in soup.find_all(['ul', 'ol']):
-            ul['style'] = 'margin: 0.8em 0; padding-left: 1.5em; color: #e2e8f0;'
-        for li in soup.find_all('li'):
-            li['style'] = 'margin: 0.4em 0; line-height: 1.6; color: #e2e8f0;'
-        # Add code block styling
-        for code in soup.find_all(['code', 'pre']):
-            code['style'] = 'background: rgba(30, 41, 59, 0.5); padding: 0.2em 0.4em; border-radius: 4px; font-family: monospace; color: #e2e8f0;'
-        return str(soup)
-    except Exception as e:
-        logger.warning(f"Error processing question body: {str(e)}")
-        return str(text) if text else ""
 class QuestionRecommender:
     def __init__(self):
         try:
@@ -131,12 +50,126 @@ class QuestionRecommender:
                 NEO4J_URL,
                 auth=(NEO4J_USER, NEO4J_PASSWORD)
             )
             # Test connection immediately
             self.driver.verify_connectivity()
         except Exception as e:
             logger.error(f"Failed to initialize database connection: {str(e)}")
             raise
     def close(self):
         self.driver.close()
@@ -144,6 +177,7 @@ class QuestionRecommender:
         """Get list of all users with interest counts."""
         with self.driver.session() as session:
             try:
                 result = session.run("""
                     MATCH (u:User)
                     OPTIONAL MATCH (u)-[r:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest)
@@ -166,8 +200,15 @@ class QuestionRecommender:
                 ) for record in result if record["username"]]
                 if not users_with_counts:
                     return []
                 return [
                     f"{username} ({kw_count} keywords, {topic_count} topics)"
                     for username, kw_count, topic_count in users_with_counts
@@ -179,48 +220,133 @@ class QuestionRecommender:
     def get_user_interests(self, username: str) -> Dict[str, set]:
         """Get keywords and topics a user is interested in."""
         with self.driver.session() as session:
             keyword_result = session.run("""
                 MATCH (u:User {name: $username})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)
                 RETURN DISTINCT k.keyword as keyword
             """, username=username)
             keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
             topic_result = session.run("""
                 MATCH (u:User {name: $username})-[:INTERESTED_IN_TOPIC]->(t:Topic)
                 RETURN DISTINCT t.topic as topic
             """, username=username)
             topics = {str(record["topic"]) for record in topic_result if record["topic"]}
             return {"keywords": keywords or set(), "topics": topics or set()}
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
-        """Find questions to recommend based on common interests."""
         with self.driver.session() as session:
-            # First, get questions per source
-            questions_per_source = max_questions // 4  # Divide evenly among 4 sources
-            remaining_slots = max_questions % 4  # Handle any remainder
-            # Query for each source separately to ensure balanced representation
-            sources = ['stack_exchange', 'reddit', 'wikipedia', 'trivia']
-            all_questions = []
-            for source in sources:
-                source_questions = session.run("""
                 MATCH (u1:User {name: $user1})
                 MATCH (u2:User {name: $user2})
                 OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
                 OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
-                WITH u1, u2,
-                     COLLECT(DISTINCT interest1) as u1_interests,
-                     COLLECT(DISTINCT interest2) as u2_interests
                 UNWIND u1_interests + u2_interests as interest
                 MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
                 WHERE
                     q.author <> $user1 AND
                     q.author <> $user2 AND
-                    q.source = $source AND
                     (
                         (interest IN u1_interests AND interest IN u2_interests) OR
                         (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
@@ -230,61 +356,174 @@ class QuestionRecommender:
                      CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
                 WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
                      sum(interest_weight) as base_score
-                WITH q, interests, base_score,
-                     CASE
-                         WHEN q.created_utc_ts IS NOT NULL
-                         THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
-                         ELSE base_score
-                     END as temporal_score
                 RETURN DISTINCT
                     q.title as title,
                     q.body as body,
-                    q.created_utc_ts as created_utc_ts,
                     q.author as author,
-                    q.source as source,
-                    q.correct_answer as correct_answer,
-                    q.incorrect_answers as incorrect_answers,
-                    q.upvotes as upvotes,
-                    q.num_comments as num_comments,
-                    q.subreddit as subreddit,
-                    [i in interests | CASE
-                        WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
-                        ELSE i.interest.topic
-                    END] as matching_interests,
-                    [i in interests | CASE
-                        WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
-                        ELSE 'topic'
-                    END] as interest_types,
-                    temporal_score as relevance_score
-                ORDER BY temporal_score DESC
-                LIMIT $limit
-                """,
-                user1=user1,
-                user2=user2,
-                source=source,
-                limit=questions_per_source + (1 if remaining_slots > 0 else 0)
-                )
-                source_results = [dict(record) for record in source_questions]
-                all_questions.extend(source_results)
-                remaining_slots = max(0, remaining_slots - 1)
-            # Sort all questions by relevance score for final ordering
-            all_questions.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
-            return all_questions
 def format_question(q: Dict) -> str:
     """Format a question for display based on its source."""
     try:
         title = q.get('title', 'Untitled')
-        source = q.get('source', '').lower()
         metadata_html = ""
         content_html = ""
         if 'author' in q or 'created_utc_ts' in q:
             author = q.get('author', 'Unknown author')
             created_date = format_neo4j_datetime(q.get('created_utc_ts'))
             upvotes = q.get('upvotes', 0)
             num_comments = q.get('num_comments', 0)
@@ -296,10 +535,11 @@ def format_question(q: Dict) -> str:
                 <div class="stats" style="margin-top: 5px;">
                     <span title="Upvotes"><span style="color: #93c5fd;">▲</span> {upvotes}</span>
                     <span style="margin-left: 15px;" title="Comments"><span style="color: #93c5fd;">💬</span> {num_comments}</span>
-                </div>
             </div>
             """
         if source == "stack_exchange":
             body = q.get('body', '')
             if body:
@@ -308,10 +548,12 @@ def format_question(q: Dict) -> str:
                     {process_body(body, title)}
                 </div>
                 """
         elif source == "trivia":
             correct_answer = q.get('correct_answer', '')
             incorrect_answers = q.get('incorrect_answers', [])
             answers = [correct_answer] + incorrect_answers if incorrect_answers else [correct_answer]
             answers_html = "".join([
                 f"""
@@ -330,21 +572,58 @@ def format_question(q: Dict) -> str:
                 {answers_html}
             </div>
             """
         source_icon = {
-            'stack_exchange': '⚡',
-            'reddit': '🔸',
-            'wikipedia': '📚',
-            'trivia': '🎯',
-        }.get(source, '❔')
         source_color = {
-            'stack_exchange': '#60a5fa',
-            'reddit': '#f97316',
-            'wikipedia': '#22c55e',
-            'trivia': '#eab308',
-        }.get(source, '#60a5fa')
         source_display = source.title() if source else "Unknown"
         source_badge = f"""
         <div class="source-badge" style="display: inline-flex; align-items: center; padding: 4px 8px; background: rgba(51, 65, 85, 0.5); border-radius: 4px; margin-right: 10px; border: 1px solid {source_color}25;">
@@ -353,6 +632,7 @@ def format_question(q: Dict) -> str:
         </div>
         """
         matching_interests = q.get('matching_interests', [])
         interest_types = q.get('interest_types', [])
         interests_with_types = []
@@ -363,9 +643,11 @@ def format_question(q: Dict) -> str:
                     'type': type_
                 })
         keywords = [i['name'] for i in interests_with_types if i['type'] == 'keyword']
         topics = [i['name'] for i in interests_with_types if i['type'] == 'topic']
         interests_display = []
         if keywords:
             interests_display.append(f"Keywords: {format_interest_list(set(keywords), max_items=3)}")
@@ -373,6 +655,7 @@ def format_question(q: Dict) -> str:
             interests_display.append(f"Topics: {format_interest_list(set(topics), max_items=3)}")
         interests_str = " | ".join(interests_display) if interests_display else "No common interests found"
         relevance_score = q.get('relevance_score', 0)
         score_display = f"""
         <div class="relevance-score" style="display: inline-block; padding: 4px 8px; background: rgba(59, 130, 246, 0.2); border-radius: 4px; margin-left: 10px;">
@@ -380,7 +663,8 @@ def format_question(q: Dict) -> str:
         </div>
         """ if relevance_score > 0 else ""
-        return f"""
         <div class="question-card" style="background: rgba(51, 65, 85, 0.5); padding: 20px; border-radius: 8px; margin: 15px 0; border: 1px solid rgba(148, 163, 184, 0.2);">
             <div class="question-header" style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px;">
                 <div style="flex: 1; display: flex; align-items: center;">
@@ -401,6 +685,8 @@ def format_question(q: Dict) -> str:
         </div>
         """
     except Exception as e:
         logger.error(f"Error formatting question: {str(e)}")
         return f"""
@@ -409,53 +695,70 @@ def format_question(q: Dict) -> str:
         </div>
         """
 def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dict]]:
     """Main function to get recommendations and user interests."""
     user1 = user1.split(" (")[0] if " (" in user1 else user1
     user2 = user2.split(" (")[0] if " (" in user2 else user2
     recommender = QuestionRecommender()
     try:
         user1_interests = recommender.get_user_interests(user1)
         user2_interests = recommender.get_user_interests(user2)
         common_keywords = user1_interests['keywords'] & user2_interests['keywords']
         common_topics = user1_interests['topics'] & user2_interests['topics']
         interests_summary = f"""
-        <div class="interests-summary">
-            <div class="user-interests">
-                <h3>{user1}'s Interests</h3>
-                <div class="interest-section">
-                    <strong>Keywords:</strong> {format_interest_list(user1_interests['keywords'], max_items=8)}
-                </div>
-                <div class="interest-section">
-                    <strong>Topics:</strong> {format_interest_list(user1_interests['topics'], max_items=5)}
-                </div>
-            </div>
-            <div class="user-interests">
-                <h3>{user2}'s Interests</h3>
-                <div class="interest-section">
-                    <strong>Keywords:</strong> {format_interest_list(user2_interests['keywords'], max_items=8)}
-                </div>
-                <div class="interest-section">
-                    <strong>Topics:</strong> {format_interest_list(user2_interests['topics'], max_items=5)}
-                </div>
-            </div>
-            <div class="common-interests">
-                <h3>Common Interests</h3>
-                <div class="interest-section">
-                    <strong>Keywords:</strong> {format_interest_list(common_keywords, max_items=8)}
-                </div>
-                <div class="interest-section">
-                    <strong>Topics:</strong> {format_interest_list(common_topics, max_items=5)}
-                </div>
-            </div>
         </div>
-        """
         questions = recommender.find_common_questions(user1, user2, max_questions=50)
         if questions:
@@ -481,7 +784,7 @@ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dic
     finally:
         recommender.close()
-# Custom CSS for styling
 custom_css = """
 .gradio-container {
     max-width: 1200px !important;
@@ -659,14 +962,17 @@ strong {
 """
 def main():
     recommender = QuestionRecommender()
     users = recommender.get_all_users()
     recommender.close()
-    with gr.Blocks(title="Question Recommender", theme=gr.themes.Soft(), css=custom_css) as iface:
         gr.Markdown("""
-        # 🤝 Question Recommender
         Find questions that two users might be interested in discussing together based on their common interests.
         """)
         with gr.Row(equal_height=True):
@@ -697,8 +1003,18 @@ def main():
         recommendation_type = gr.HTML()
         questions_output = gr.HTML()
         recommend_btn.click(
-            fn=recommend_questions,
             inputs=[user1_dropdown, user2_dropdown],
             outputs=[interests_output, recommendation_type, questions_output]
         )

 import pandas as pd
 from datetime import datetime
 import os
+# Set up logging with more detailed format for debugging
 logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s',
+    datefmt='%Y-%m-%d %H:%M:%S'
 )
 logger = logging.getLogger(__name__)
 NEO4J_USER = os.getenv("NEO4J_USER")
 NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
 def format_neo4j_datetime(dt) -> str:
     """Convert Neo4j datetime to string format."""
     if dt is None:
+        logger.info("Received None datetime")
         return 'Unknown date'
     try:
+        logger.info(f"Formatting datetime: {dt} of type {type(dt)}")
         if hasattr(dt, 'to_native'):
             dt = dt.to_native()
+            logger.info(f"Converted to native: {dt} of type {type(dt)}")
         return dt.strftime('%Y-%m-%d')
     except Exception as e:
         logger.warning(f"Error formatting datetime: {e}")
         return ', '.join(sorted_interests)
     return f"{', '.join(sorted_interests[:max_items])} (+{len(sorted_interests) - max_items} more)"
 class QuestionRecommender:
     def __init__(self):
         try:
                 NEO4J_URL,
                 auth=(NEO4J_USER, NEO4J_PASSWORD)
             )
+            logger.info("Initializing QuestionRecommender with debug database")
             # Test connection immediately
             self.driver.verify_connectivity()
+            logger.info("Successfully connected to Neo4j database")
+            self.verify_connection()
+            # Inspect question types on initialization
+            self.inspect_question_types()
         except Exception as e:
             logger.error(f"Failed to initialize database connection: {str(e)}")
             raise
+    def verify_connection(self):
+        """Verify database connection and log basic statistics."""
+        try:
+            with self.driver.session() as session:
+                # First try a simple query to verify connection
+                test_result = session.run("MATCH (n) RETURN count(n) as count").single()
+                if not test_result:
+                    raise Exception("Could not execute test query")
+                logger.info(f"Database contains {test_result['count']} total nodes")
+                # Get database statistics with relationship counts
+                stats = session.run("""
+                    // Count nodes
+                    MATCH (u:User)
+                    WITH COUNT(u) as user_count
+                    MATCH (k:Keyword)
+                    WITH user_count, COUNT(k) as keyword_count
+                    MATCH (q:Question)
+                    WITH user_count, keyword_count, COUNT(q) as question_count
+                    MATCH (t:Topic)
+                    WITH user_count, keyword_count, question_count, COUNT(t) as topic_count
+                    // Count relationships
+                    OPTIONAL MATCH ()-[r:INTERESTED_IN_KEYWORD]->()
+                    WITH user_count, keyword_count, question_count, topic_count, COUNT(r) as keyword_rel_count
+                    OPTIONAL MATCH ()-[r:INTERESTED_IN_TOPIC]->()
+                    WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, COUNT(r) as topic_rel_count
+                    OPTIONAL MATCH ()-[r:HAS_KEYWORD]->()
+                    WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, topic_rel_count, COUNT(r) as question_keyword_count
+                    OPTIONAL MATCH ()-[r:HAS_TOPIC]->()
+                    RETURN
+                        user_count, keyword_count, question_count, topic_count,
+                        keyword_rel_count, topic_rel_count,
+                        question_keyword_count, COUNT(r) as question_topic_count
+                """).single()
+                if not stats:
+                    raise Exception("Could not retrieve database statistics")
+                logger.info("=== Database Statistics ===")
+                logger.info(f"Nodes:")
+                logger.info(f"  Users: {stats['user_count']}")
+                logger.info(f"  Keywords: {stats['keyword_count']}")
+                logger.info(f"  Questions: {stats['question_count']}")
+                logger.info(f"  Topics: {stats['topic_count']}")
+                logger.info(f"\nRelationships:")
+                logger.info(f"  User->Keyword (INTERESTED_IN_KEYWORD): {stats['keyword_rel_count']}")
+                logger.info(f"  User->Topic (INTERESTED_IN_TOPIC): {stats['topic_rel_count']}")
+                logger.info(f"  Question->Keyword (HAS_KEYWORD): {stats['question_keyword_count']}")
+                logger.info(f"  Question->Topic (HAS_TOPIC): {stats['question_topic_count']}")
+        except Exception as e:
+            logger.error(f"Database verification failed: {str(e)}")
+            logger.error(f"URL: {NEO4J_URL}")
+            logger.error(f"User: {NEO4J_USER}")
+            raise Exception(f"Failed to verify database connection: {str(e)}")
+    def inspect_question_types(self):
+        """Inspect different types of questions and their attributes in the database."""
+        with self.driver.session() as session:
+            try:
+                # Get all distinct question types and their properties
+                result = session.run("""
+                    MATCH (q:Question)
+                    WITH DISTINCT keys(q) as props, labels(q) as types
+                    RETURN types, props, count(*) as count
+                    ORDER BY count DESC
+                """)
+                logger.info("\n=== Question Types and Properties ===")
+                for record in result:
+                    types = record["types"]
+                    props = record["props"]
+                    count = record["count"]
+                    logger.info(f"\nType: {types}")
+                    logger.info(f"Count: {count}")
+                    logger.info("Properties:")
+                    for prop in props:
+                        # Get a sample value for this property
+                        sample = session.run("""
+                            MATCH (q:Question)
+                            WHERE $prop in keys(q)
+                            RETURN q[$prop] as value
+                            LIMIT 1
+                        """, prop=prop).single()
+                        value = sample["value"] if sample else None
+                        value_type = type(value).__name__ if value is not None else "None"
+                        logger.info(f"  - {prop}: {value_type} (example: {str(value)[:100]}{'...' if str(value)[100:] else ''})")
+                # Get relationships specific to different question types
+                result = session.run("""
+                    MATCH (q:Question)-[r]->(target)
+                    WITH DISTINCT type(r) as rel_type, labels(target) as target_labels, count(*) as count
+                    RETURN rel_type, target_labels, count
+                    ORDER BY count DESC
+                """)
+                logger.info("\n=== Question Relationships ===")
+                for record in result:
+                    rel_type = record["rel_type"]
+                    target_labels = record["target_labels"]
+                    count = record["count"]
+                    logger.info(f"Relationship: {rel_type} -> {target_labels} (Count: {count})")
+            except Exception as e:
+                logger.error(f"Error inspecting question types: {str(e)}")
+                raise
     def close(self):
         self.driver.close()
         """Get list of all users with interest counts."""
         with self.driver.session() as session:
             try:
+                # Get users with their interest counts using proper relationship patterns
                 result = session.run("""
                     MATCH (u:User)
                     OPTIONAL MATCH (u)-[r:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest)
                 ) for record in result if record["username"]]
                 if not users_with_counts:
+                    logger.warning("No users found with interests")
                     return []
+                logger.info(f"Retrieved {len(users_with_counts)} users with interests")
+                logger.info("Top 5 users by interest count:")
+                for username, kw_count, topic_count in users_with_counts[:5]:
+                    logger.info(f"  - {username}: {kw_count} keywords, {topic_count} topics")
+                # Format usernames with their counts
                 return [
                     f"{username} ({kw_count} keywords, {topic_count} topics)"
                     for username, kw_count, topic_count in users_with_counts
     def get_user_interests(self, username: str) -> Dict[str, set]:
         """Get keywords and topics a user is interested in."""
         with self.driver.session() as session:
+            # Get keywords the user is interested in
             keyword_result = session.run("""
                 MATCH (u:User {name: $username})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)
                 RETURN DISTINCT k.keyword as keyword
             """, username=username)
             keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
+            # Log keyword count for debugging
+            logger.debug(f"Found {len(keywords)} keywords for user {username}")
+            # Get topics the user is interested in
             topic_result = session.run("""
                 MATCH (u:User {name: $username})-[:INTERESTED_IN_TOPIC]->(t:Topic)
                 RETURN DISTINCT t.topic as topic
             """, username=username)
             topics = {str(record["topic"]) for record in topic_result if record["topic"]}
+            # Log topic count for debugging
+            logger.debug(f"Found {len(topics)} topics for user {username}")
             return {"keywords": keywords or set(), "topics": topics or set()}
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
+        """Find questions to recommend based on common interests using advanced Neo4j features."""
         with self.driver.session() as session:
+            # Debug: Check if users exist and have interests
+            user_check = session.run("""
                 MATCH (u1:User {name: $user1})
                 MATCH (u2:User {name: $user2})
                 OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
                 OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
+                RETURN
+                    COUNT(DISTINCT u1) as user1_exists,
+                    COUNT(DISTINCT u2) as user2_exists,
+                    COUNT(DISTINCT interest1) as user1_interests,
+                    COUNT(DISTINCT interest2) as user2_interests
+            """, user1=user1, user2=user2).single()
+            if not (user_check and user_check['user1_exists'] and user_check['user2_exists']):
+                logger.error(f"One or both users not found: {user1}, {user2}")
+                return []
+            logger.info(f"User {user1} has {user_check['user1_interests']} total interests")
+            logger.info(f"User {user2} has {user_check['user2_interests']} total interests")
+            # Advanced question recommendation query using Neo4j path finding and scoring
+            questions_query = """
+            // Find all interests (both keywords and topics) for both users
+            MATCH (u1:User {name: $user1})
+            MATCH (u2:User {name: $user2})
+            // Get all interests for both users
+            OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
+            OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
+            WITH u1, u2,
+                 COLLECT(DISTINCT interest1) as u1_interests,
+                 COLLECT(DISTINCT interest2) as u2_interests
+            // Find questions related to either user's interests for each source
+            CALL {
+                WITH u1, u2, u1_interests, u2_interests
                 UNWIND u1_interests + u2_interests as interest
                 MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
                 WHERE
                     q.author <> $user1 AND
                     q.author <> $user2 AND
+                    q.source = 'stack_exchange' AND
+                    (
+                        (interest IN u1_interests AND interest IN u2_interests) OR
+                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                    )
+                WITH q, interest, type(r) as rel_type,
+                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                     sum(interest_weight) as base_score
+                RETURN q, interests, base_score
+                ORDER BY base_score * rand() DESC
+                LIMIT 15 // Increased from 10 to get more variety
+                UNION
+                WITH u1, u2, u1_interests, u2_interests
+                UNWIND u1_interests + u2_interests as interest
+                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+                WHERE
+                    q.source = 'trivia' AND
+                    (
+                        (interest IN u1_interests AND interest IN u2_interests) OR
+                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                    )
+                WITH q, interest, type(r) as rel_type,
+                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                     sum(interest_weight) as base_score
+                RETURN q, interests, base_score
+                ORDER BY base_score * rand() DESC
+                LIMIT 15 // Increased from 10 to get more variety
+                UNION
+                WITH u1, u2, u1_interests, u2_interests
+                UNWIND u1_interests + u2_interests as interest
+                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+                WHERE
+                    q.source = 'wikipedia' AND
+                    (
+                        (interest IN u1_interests AND interest IN u2_interests) OR
+                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                    )
+                WITH q, interest, type(r) as rel_type,
+                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                     sum(interest_weight) as base_score
+                RETURN q, interests, base_score
+                ORDER BY base_score * rand() DESC
+                LIMIT 15 // Increased from 10 to get more variety
+                UNION
+                WITH u1, u2, u1_interests, u2_interests
+                UNWIND u1_interests + u2_interests as interest
+                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+                WHERE
+                    q.source = 'reddit' AND
                     (
                         (interest IN u1_interests AND interest IN u2_interests) OR
                         (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
                      CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
                 WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
                      sum(interest_weight) as base_score
+                RETURN q, interests, base_score
+                ORDER BY base_score * rand() DESC
+                LIMIT 15 // Increased from 10 to get more variety
+            }
+            // Calculate temporal relevance for the combined results
+            WITH q, interests, base_score,
+                 CASE
+                     WHEN q.created_utc_ts IS NOT NULL
+                     THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
+                     ELSE base_score
+                 END as temporal_score,
+                 // Add source-specific random boost to ensure better mixing
+                 CASE q.source
+                     WHEN 'stack_exchange' THEN rand() * 0.4
+                     WHEN 'trivia' THEN rand() * 0.4
+                     WHEN 'wikipedia' THEN rand() * 0.4
+                     WHEN 'reddit' THEN rand() * 0.4
+                     ELSE rand() * 0.4
+                 END as source_random_boost
+            // Return results with all metadata
+            WITH q, interests, temporal_score, source_random_boost,
+                 temporal_score * (0.6 + 0.8 * rand()) + source_random_boost as final_score
                 RETURN DISTINCT
                     q.title as title,
                     q.body as body,
+                q.created_utc_ts as created_utc_ts,
                     q.author as author,
+                q.source as source,
+                q.correct_answer as correct_answer,
+                q.incorrect_answers as incorrect_answers,
+                q.upvotes as upvotes,
+                q.num_comments as num_comments,
+                q.subreddit as subreddit,
+                [i in interests | CASE
+                    WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
+                    ELSE i.interest.topic
+                END] as matching_interests,
+                [i in interests | CASE
+                    WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
+                    ELSE 'topic'
+                END] as interest_types,
+                final_score as relevance_score
+            ORDER BY final_score DESC
+            LIMIT $max_questions
+            """
+            questions = [dict(record) for record in session.run(questions_query,
+                                                              user1=user1,
+                                                              user2=user2,
+                                                              max_questions=max_questions)]
+            if questions:
+                first_q = questions[0]
+                logger.info(f"Sample question:")
+                logger.info(f"Title: {first_q.get('title', 'No title')}")
+                logger.info(f"Author: {first_q.get('author', 'No author')}")
+                logger.info(f"Score: {first_q.get('relevance_score', 0)}")
+                logger.info(f"Interests: {first_q.get('matching_interests', [])}")
+            logger.info(f"Found {len(questions)} questions with common interests")
+            return questions
+def process_body(text, title):
+    """Process question body to handle images and HTML."""
+    if not text:
+        logger.warning(f"Empty body for question: {title}")
+        return ""
+    try:
+        from bs4 import BeautifulSoup
+        # Parse the HTML content
+        soup = BeautifulSoup(str(text), 'html.parser')
+        # Function to fix Stack Exchange URLs
+        def fix_stack_exchange_url(url):
+            if not url:
+                return url
+            if url.startswith(('http://', 'https://')):
+                return url
+            if url.startswith('//'):
+                return 'https:' + url
+            if url.startswith('/'):
+                return 'https://i.stack.imgur.com' + url
+            return 'https://i.stack.imgur.com/' + url
+        # Find all img tags and replace with preview cards
+        for img in soup.find_all('img'):
+            src = img.get('src', '')
+            if not src:
+                continue
+            fixed_src = fix_stack_exchange_url(src)
+            alt_text = img.get('alt', '').strip()
+            if not alt_text or alt_text.lower() == 'enter image description here':
+                alt_text = 'Question image'
+            # Create an image preview card
+            preview_html = f"""
+    <div class="image-preview" style="margin: 10px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px;">
+        <div style="display: flex; align-items: center; margin-bottom: 8px;">
+            <span style="font-size: 20px; margin-right: 8px;">🖼️</span>
+            <span style="color: #93c5fd;">{alt_text}</span>
+                    </div>
+                    <a href="{fixed_src}" target="_blank" rel="noopener noreferrer"
+            style="color: #60a5fa; text-decoration: none;">View image</a>
+            </div>
+            """
+            new_soup = BeautifulSoup(preview_html, 'html.parser')
+            img.replace_with(new_soup)
+        # Style other elements
+        for link in soup.find_all('a'):
+            if 'View Image' not in (link.get_text() or ''):
+                href = link.get('href', '')
+                if href and not href.startswith(('http://', 'https://')):
+                    link['href'] = fix_stack_exchange_url(href)
+                link['target'] = '_blank'
+                link['rel'] = 'noopener noreferrer'
+                link['style'] = 'color: #60a5fa; text-decoration: none;'
+        # Add paragraph styling
+        for p in soup.find_all(['p', 'div']):
+            if not any(cls in (p.get('class', []) or []) for cls in ['image-preview', 'question-card']):
+                current_style = p.get('style', '')
+                p['style'] = f"{current_style}; margin: 0.8em 0; line-height: 1.6; color: #e2e8f0;"
+        # Add list styling
+        for ul in soup.find_all(['ul', 'ol']):
+            ul['style'] = 'margin: 0.8em 0; padding-left: 1.5em; color: #e2e8f0;'
+        for li in soup.find_all('li'):
+            li['style'] = 'margin: 0.4em 0; line-height: 1.6; color: #e2e8f0;'
+        # Add code block styling
+        for code in soup.find_all(['code', 'pre']):
+            code['style'] = 'background: rgba(30, 41, 59, 0.5); padding: 0.2em 0.4em; border-radius: 4px; font-family: monospace; color: #e2e8f0;'
+        return str(soup)
+    except Exception as e:
+        logger.error(f"Error processing question body: {str(e)}")
+        return str(text) if text else ""
 def format_question(q: Dict) -> str:
     """Format a question for display based on its source."""
     try:
+        # Extract and validate basic question data
         title = q.get('title', 'Untitled')
+        source = q.get('source', '').lower()  # Convert to lowercase for consistent comparison
+        # Log available fields for debugging
+        logger.info(f"Question fields: {list(q.keys())}")
+        if 'created_utc_ts' in q:
+            logger.info(f"Raw created_utc_ts value: {q['created_utc_ts']}")
+        # Format metadata section based on source
         metadata_html = ""
         content_html = ""
+        # Default metadata for questions with author/date
         if 'author' in q or 'created_utc_ts' in q:
             author = q.get('author', 'Unknown author')
             created_date = format_neo4j_datetime(q.get('created_utc_ts'))
+            logger.info(f"Question {title}: author={author}, date={created_date}")
             upvotes = q.get('upvotes', 0)
             num_comments = q.get('num_comments', 0)
                 <div class="stats" style="margin-top: 5px;">
                     <span title="Upvotes"><span style="color: #93c5fd;">▲</span> {upvotes}</span>
                     <span style="margin-left: 15px;" title="Comments"><span style="color: #93c5fd;">💬</span> {num_comments}</span>
+            </div>
             </div>
             """
+        # Handle content based on source and available fields
         if source == "stack_exchange":
             body = q.get('body', '')
             if body:
                     {process_body(body, title)}
                 </div>
                 """
         elif source == "trivia":
             correct_answer = q.get('correct_answer', '')
             incorrect_answers = q.get('incorrect_answers', [])
+            # Create answer options HTML
             answers = [correct_answer] + incorrect_answers if incorrect_answers else [correct_answer]
             answers_html = "".join([
                 f"""
                 {answers_html}
             </div>
             """
+        elif source == "wikipedia":
+            correct_answer = q.get('correct_answer', '')
+            if correct_answer:
+                content_html = f"""
+                <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px; border-left: 3px solid #10b981;">
+                    <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
+                    <div style="color: #34d399;">{correct_answer}</div>
+                </div>
+                """
+        elif source == "reddit":
+            # Add subreddit to metadata if available
+            if 'subreddit' in q:
+                subreddit = q.get('subreddit', '')
+                metadata_html = metadata_html.replace(
+                    'posted on',
+                    f'posted in <span style="color: #60a5fa; font-weight: 500;">r/{subreddit}</span> on'
+                )
+        # If no specific content is set, try to use any available content fields
+        if not content_html:
+            if 'body' in q:
+                content_html = f"""
+                <div class="question-content" style="margin-top: 20px; font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; color: #e2e8f0; line-height: 1.6;">
+                    {process_body(q['body'], title)}
+                </div>
+                """
+            elif 'correct_answer' in q:
+                content_html = f"""
+                <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px;">
+                    <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
+                    <div style="color: #e2e8f0;">{q['correct_answer']}</div>
+                </div>
+                """
+        # Get source-specific icon and color
         source_icon = {
+            'stack_exchange': '⚡', # Lightning bolt for Stack Exchange
+            'reddit': '🔸',  # Orange diamond for Reddit
+            'wikipedia': '📚',  # Books for Wikipedia
+            'trivia': '🎯',  # Target/bullseye for Trivia
+        }.get(source, '❔')  # Question mark as fallback
         source_color = {
+            'stack_exchange': '#60a5fa',  # Blue
+            'reddit': '#f97316',  # Orange
+            'wikipedia': '#22c55e',  # Green
+            'trivia': '#eab308',  # Yellow
+        }.get(source, '#60a5fa')  # Default blue
+        # Create the source badge with icon
         source_display = source.title() if source else "Unknown"
         source_badge = f"""
         <div class="source-badge" style="display: inline-flex; align-items: center; padding: 4px 8px; background: rgba(51, 65, 85, 0.5); border-radius: 4px; margin-right: 10px; border: 1px solid {source_color}25;">
         </div>
         """
+        # Handle matching interests display
         matching_interests = q.get('matching_interests', [])
         interest_types = q.get('interest_types', [])
         interests_with_types = []
                     'type': type_
                 })
+        # Format interests by type
         keywords = [i['name'] for i in interests_with_types if i['type'] == 'keyword']
         topics = [i['name'] for i in interests_with_types if i['type'] == 'topic']
+        # Create interests display string
         interests_display = []
         if keywords:
             interests_display.append(f"Keywords: {format_interest_list(set(keywords), max_items=3)}")
             interests_display.append(f"Topics: {format_interest_list(set(topics), max_items=3)}")
         interests_str = " | ".join(interests_display) if interests_display else "No common interests found"
+        # Calculate relevance score display
         relevance_score = q.get('relevance_score', 0)
         score_display = f"""
         <div class="relevance-score" style="display: inline-block; padding: 4px 8px; background: rgba(59, 130, 246, 0.2); border-radius: 4px; margin-left: 10px;">
         </div>
         """ if relevance_score > 0 else ""
+        # Create the question card HTML
+        question_html = f"""
         <div class="question-card" style="background: rgba(51, 65, 85, 0.5); padding: 20px; border-radius: 8px; margin: 15px 0; border: 1px solid rgba(148, 163, 184, 0.2);">
             <div class="question-header" style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px;">
                 <div style="flex: 1; display: flex; align-items: center;">
         </div>
         """
+        return question_html
     except Exception as e:
         logger.error(f"Error formatting question: {str(e)}")
         return f"""
         </div>
         """
+def loading_message() -> Tuple[str, str, str]:
+    """Return loading message in proper HTML format."""
+    loading_html = """
+    <div class="loading-spinner">
+        <div style="text-align: center;">
+            <div style="border: 4px solid #60a5fa; border-top: 4px solid transparent; border-radius: 50%; width: 40px; height: 40px; animation: spin 1s linear infinite; margin: 20px auto;"></div>
+            <div style="color: #60a5fa; margin-top: 10px;">Analyzing interests and finding recommendations...</div>
+        </div>
+    </div>
+    """
+    return loading_html, loading_html, loading_html
 def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dict]]:
     """Main function to get recommendations and user interests."""
+    # Extract actual usernames from the formatted strings
     user1 = user1.split(" (")[0] if " (" in user1 else user1
     user2 = user2.split(" (")[0] if " (" in user2 else user2
     recommender = QuestionRecommender()
     try:
+        # Get interests for both users
         user1_interests = recommender.get_user_interests(user1)
         user2_interests = recommender.get_user_interests(user2)
+        # Find common interests
         common_keywords = user1_interests['keywords'] & user2_interests['keywords']
         common_topics = user1_interests['topics'] & user2_interests['topics']
+        # Format interests summary
         interests_summary = f"""
+<div class="interests-summary">
+    <div class="user-interests">
+        <h3>{user1}'s Interests</h3>
+        <div class="interest-section">
+            <strong>Keywords:</strong> {format_interest_list(user1_interests['keywords'], max_items=8)}
         </div>
+        <div class="interest-section">
+            <strong>Topics:</strong> {format_interest_list(user1_interests['topics'], max_items=5)}
+        </div>
+    </div>
+    <div class="user-interests">
+        <h3>{user2}'s Interests</h3>
+        <div class="interest-section">
+            <strong>Keywords:</strong> {format_interest_list(user2_interests['keywords'], max_items=8)}
+        </div>
+        <div class="interest-section">
+            <strong>Topics:</strong> {format_interest_list(user2_interests['topics'], max_items=5)}
+        </div>
+    </div>
+    <div class="common-interests">
+        <h3>Common Interests</h3>
+        <div class="interest-section">
+            <strong>Keywords:</strong> {format_interest_list(common_keywords, max_items=8)}
+        </div>
+        <div class="interest-section">
+            <strong>Topics:</strong> {format_interest_list(common_topics, max_items=5)}
+        </div>
+    </div>
+</div>
+"""
+        # Get all recommended questions
         questions = recommender.find_common_questions(user1, user2, max_questions=50)
         if questions:
     finally:
         recommender.close()
+# Custom CSS for better styling
 custom_css = """
 .gradio-container {
     max-width: 1200px !important;
 """
 def main():
+    # Create Gradio interface
     recommender = QuestionRecommender()
     users = recommender.get_all_users()
     recommender.close()
+    with gr.Blocks(title="Question Recommender (Local Debug)", theme=gr.themes.Soft(), css=custom_css) as iface:
         gr.Markdown("""
+        # 🤝 Question Recommender (Local Debug Version)
         Find questions that two users might be interested in discussing together based on their common interests.
+        > This is the local debug version using the test database.
         """)
         with gr.Row(equal_height=True):
         recommendation_type = gr.HTML()
         questions_output = gr.HTML()
+        def recommend_and_store(user1, user2):
+            """Get recommendations and store questions."""
+            interests, rec_type, questions_html, questions_data = recommend_questions(user1, user2)
+            return interests, rec_type, questions_html
+        # Wire up the components
         recommend_btn.click(
+            fn=loading_message,
+            outputs=[interests_output, recommendation_type, questions_output],
+            queue=False
+        ).then(
+            fn=recommend_and_store,
             inputs=[user1_dropdown, user2_dropdown],
             outputs=[interests_output, recommendation_type, questions_output]
         )