Spaces:

NimaKL
/

LetsTalk

Runtime error

App Files Files Community

NimaKL commited on May 27

Commit

c08b5c8

verified ·

1 Parent(s): 25ea748

Update app.py

Browse files

Files changed (1) hide show

app.py +131 -474

app.py CHANGED Viewed

@@ -5,30 +5,32 @@ from typing import List, Dict, Tuple
 import pandas as pd
 from datetime import datetime
 import os
-# Set up logging with more detailed format for debugging
 logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S'
 )
 logger = logging.getLogger(__name__)
-# Get Neo4j credentials from Hugging Face secrets
-NEO4J_URL = os.environ['NEO4J_URL']
-NEO4J_USER = os.environ['NEO4J_USER']
-NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
 def format_neo4j_datetime(dt) -> str:
     """Convert Neo4j datetime to string format."""
     if dt is None:
-        logger.info("Received None datetime")
         return 'Unknown date'
     try:
-        logger.info(f"Formatting datetime: {dt} of type {type(dt)}")
         if hasattr(dt, 'to_native'):
             dt = dt.to_native()
-            logger.info(f"Converted to native: {dt} of type {type(dt)}")
         return dt.strftime('%Y-%m-%d')
     except Exception as e:
         logger.warning(f"Error formatting datetime: {e}")
@@ -43,6 +45,85 @@ def format_interest_list(interests: set, max_items: int = 10) -> str:
         return ', '.join(sorted_interests)
     return f"{', '.join(sorted_interests[:max_items])} (+{len(sorted_interests) - max_items} more)"
 class QuestionRecommender:
     def __init__(self):
         try:
@@ -50,126 +131,12 @@ class QuestionRecommender:
                 NEO4J_URL,
                 auth=(NEO4J_USER, NEO4J_PASSWORD)
             )
-            logger.info("Initializing QuestionRecommender with debug database")
             # Test connection immediately
             self.driver.verify_connectivity()
-            logger.info("Successfully connected to Neo4j database")
-            self.verify_connection()
-            # Inspect question types on initialization
-            self.inspect_question_types()
         except Exception as e:
             logger.error(f"Failed to initialize database connection: {str(e)}")
             raise
-    def verify_connection(self):
-        """Verify database connection and log basic statistics."""
-        try:
-            with self.driver.session() as session:
-                # First try a simple query to verify connection
-                test_result = session.run("MATCH (n) RETURN count(n) as count").single()
-                if not test_result:
-                    raise Exception("Could not execute test query")
-                logger.info(f"Database contains {test_result['count']} total nodes")
-                # Get database statistics with relationship counts
-                stats = session.run("""
-                    // Count nodes
-                    MATCH (u:User)
-                    WITH COUNT(u) as user_count
-                    MATCH (k:Keyword)
-                    WITH user_count, COUNT(k) as keyword_count
-                    MATCH (q:Question)
-                    WITH user_count, keyword_count, COUNT(q) as question_count
-                    MATCH (t:Topic)
-                    WITH user_count, keyword_count, question_count, COUNT(t) as topic_count
-                    // Count relationships
-                    OPTIONAL MATCH ()-[r:INTERESTED_IN_KEYWORD]->()
-                    WITH user_count, keyword_count, question_count, topic_count, COUNT(r) as keyword_rel_count
-                    OPTIONAL MATCH ()-[r:INTERESTED_IN_TOPIC]->()
-                    WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, COUNT(r) as topic_rel_count
-                    OPTIONAL MATCH ()-[r:HAS_KEYWORD]->()
-                    WITH user_count, keyword_count, question_count, topic_count, keyword_rel_count, topic_rel_count, COUNT(r) as question_keyword_count
-                    OPTIONAL MATCH ()-[r:HAS_TOPIC]->()
-                    RETURN
-                        user_count, keyword_count, question_count, topic_count,
-                        keyword_rel_count, topic_rel_count,
-                        question_keyword_count, COUNT(r) as question_topic_count
-                """).single()
-                if not stats:
-                    raise Exception("Could not retrieve database statistics")
-                logger.info("=== Database Statistics ===")
-                logger.info(f"Nodes:")
-                logger.info(f"  Users: {stats['user_count']}")
-                logger.info(f"  Keywords: {stats['keyword_count']}")
-                logger.info(f"  Questions: {stats['question_count']}")
-                logger.info(f"  Topics: {stats['topic_count']}")
-                logger.info(f"\nRelationships:")
-                logger.info(f"  User->Keyword (INTERESTED_IN_KEYWORD): {stats['keyword_rel_count']}")
-                logger.info(f"  User->Topic (INTERESTED_IN_TOPIC): {stats['topic_rel_count']}")
-                logger.info(f"  Question->Keyword (HAS_KEYWORD): {stats['question_keyword_count']}")
-                logger.info(f"  Question->Topic (HAS_TOPIC): {stats['question_topic_count']}")
-        except Exception as e:
-            logger.error(f"Database verification failed: {str(e)}")
-            logger.error(f"URL: {NEO4J_URL}")
-            logger.error(f"User: {NEO4J_USER}")
-            raise Exception(f"Failed to verify database connection: {str(e)}")
-    def inspect_question_types(self):
-        """Inspect different types of questions and their attributes in the database."""
-        with self.driver.session() as session:
-            try:
-                # Get all distinct question types and their properties
-                result = session.run("""
-                    MATCH (q:Question)
-                    WITH DISTINCT keys(q) as props, labels(q) as types
-                    RETURN types, props, count(*) as count
-                    ORDER BY count DESC
-                """)
-                logger.info("\n=== Question Types and Properties ===")
-                for record in result:
-                    types = record["types"]
-                    props = record["props"]
-                    count = record["count"]
-                    logger.info(f"\nType: {types}")
-                    logger.info(f"Count: {count}")
-                    logger.info("Properties:")
-                    for prop in props:
-                        # Get a sample value for this property
-                        sample = session.run("""
-                            MATCH (q:Question)
-                            WHERE $prop in keys(q)
-                            RETURN q[$prop] as value
-                            LIMIT 1
-                        """, prop=prop).single()
-                        value = sample["value"] if sample else None
-                        value_type = type(value).__name__ if value is not None else "None"
-                        logger.info(f"  - {prop}: {value_type} (example: {str(value)[:100]}{'...' if str(value)[100:] else ''})")
-                # Get relationships specific to different question types
-                result = session.run("""
-                    MATCH (q:Question)-[r]->(target)
-                    WITH DISTINCT type(r) as rel_type, labels(target) as target_labels, count(*) as count
-                    RETURN rel_type, target_labels, count
-                    ORDER BY count DESC
-                """)
-                logger.info("\n=== Question Relationships ===")
-                for record in result:
-                    rel_type = record["rel_type"]
-                    target_labels = record["target_labels"]
-                    count = record["count"]
-                    logger.info(f"Relationship: {rel_type} -> {target_labels} (Count: {count})")
-            except Exception as e:
-                logger.error(f"Error inspecting question types: {str(e)}")
-                raise
     def close(self):
         self.driver.close()
@@ -177,7 +144,6 @@ class QuestionRecommender:
         """Get list of all users with interest counts."""
         with self.driver.session() as session:
             try:
-                # Get users with their interest counts using proper relationship patterns
                 result = session.run("""
                     MATCH (u:User)
                     OPTIONAL MATCH (u)-[r:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest)
@@ -200,15 +166,8 @@ class QuestionRecommender:
                 ) for record in result if record["username"]]
                 if not users_with_counts:
-                    logger.warning("No users found with interests")
                     return []
-                logger.info(f"Retrieved {len(users_with_counts)} users with interests")
-                logger.info("Top 5 users by interest count:")
-                for username, kw_count, topic_count in users_with_counts[:5]:
-                    logger.info(f"  - {username}: {kw_count} keywords, {topic_count} topics")
-                # Format usernames with their counts
                 return [
                     f"{username} ({kw_count} keywords, {topic_count} topics)"
                     for username, kw_count, topic_count in users_with_counts
@@ -220,166 +179,53 @@ class QuestionRecommender:
     def get_user_interests(self, username: str) -> Dict[str, set]:
         """Get keywords and topics a user is interested in."""
         with self.driver.session() as session:
-            # Get keywords the user is interested in
             keyword_result = session.run("""
                 MATCH (u:User {name: $username})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)
                 RETURN DISTINCT k.keyword as keyword
             """, username=username)
             keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
-            # Log keyword count for debugging
-            logger.debug(f"Found {len(keywords)} keywords for user {username}")
-            # Get topics the user is interested in
             topic_result = session.run("""
                 MATCH (u:User {name: $username})-[:INTERESTED_IN_TOPIC]->(t:Topic)
                 RETURN DISTINCT t.topic as topic
             """, username=username)
             topics = {str(record["topic"]) for record in topic_result if record["topic"]}
-            # Log topic count for debugging
-            logger.debug(f"Found {len(topics)} topics for user {username}")
             return {"keywords": keywords or set(), "topics": topics or set()}
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
-        """Find questions to recommend based on common interests using advanced Neo4j features."""
         with self.driver.session() as session:
-            # Debug: Check if users exist and have interests
-            user_check = session.run("""
-                MATCH (u1:User {name: $user1})
-                MATCH (u2:User {name: $user2})
-                OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
-                OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
-                RETURN
-                    COUNT(DISTINCT u1) as user1_exists,
-                    COUNT(DISTINCT u2) as user2_exists,
-                    COUNT(DISTINCT interest1) as user1_interests,
-                    COUNT(DISTINCT interest2) as user2_interests
-            """, user1=user1, user2=user2).single()
-            if not (user_check and user_check['user1_exists'] and user_check['user2_exists']):
-                logger.error(f"One or both users not found: {user1}, {user2}")
-                return []
-            logger.info(f"User {user1} has {user_check['user1_interests']} total interests")
-            logger.info(f"User {user2} has {user_check['user2_interests']} total interests")
-            # Advanced question recommendation query using Neo4j path finding and scoring
-            questions_query = """
-            // Find all interests (both keywords and topics) for both users
             MATCH (u1:User {name: $user1})
             MATCH (u2:User {name: $user2})
-            // Get all interests for both users
             OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
             OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
             WITH u1, u2,
                  COLLECT(DISTINCT interest1) as u1_interests,
                  COLLECT(DISTINCT interest2) as u2_interests
-            // Find questions related to either user's interests for each source
-            CALL {
-                WITH u1, u2, u1_interests, u2_interests
-                UNWIND u1_interests + u2_interests as interest
-                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
-                WHERE
-                    q.author <> $user1 AND
-                    q.author <> $user2 AND
-                    q.source = 'stack_exchange' AND
-                    (
-                        (interest IN u1_interests AND interest IN u2_interests) OR
-                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
-                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
-                    )
-                WITH q, interest, type(r) as rel_type,
-                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
-                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
-                     sum(interest_weight) as base_score
-                RETURN q, interests, base_score
-                ORDER BY base_score * rand() DESC
-                LIMIT 15 // Increased from 10 to get more variety
-                UNION
-                WITH u1, u2, u1_interests, u2_interests
-                UNWIND u1_interests + u2_interests as interest
-                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
-                WHERE
-                    q.source = 'trivia' AND
-                    (
-                        (interest IN u1_interests AND interest IN u2_interests) OR
-                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
-                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
-                    )
-                WITH q, interest, type(r) as rel_type,
-                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
-                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
-                     sum(interest_weight) as base_score
-                RETURN q, interests, base_score
-                ORDER BY base_score * rand() DESC
-                LIMIT 15 // Increased from 10 to get more variety
-                UNION
-                WITH u1, u2, u1_interests, u2_interests
-                UNWIND u1_interests + u2_interests as interest
-                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
-                WHERE
-                    q.source = 'wikipedia' AND
-                    (
-                        (interest IN u1_interests AND interest IN u2_interests) OR
-                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
-                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
-                    )
-                WITH q, interest, type(r) as rel_type,
-                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
-                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
-                     sum(interest_weight) as base_score
-                RETURN q, interests, base_score
-                ORDER BY base_score * rand() DESC
-                LIMIT 15 // Increased from 10 to get more variety
-                UNION
-                WITH u1, u2, u1_interests, u2_interests
-                UNWIND u1_interests + u2_interests as interest
-                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
-                WHERE
-                    q.source = 'reddit' AND
-                    (
-                        (interest IN u1_interests AND interest IN u2_interests) OR
-                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
-                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
-                    )
-                WITH q, interest, type(r) as rel_type,
-                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
-                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
-                     sum(interest_weight) as base_score
-                RETURN q, interests, base_score
-                ORDER BY base_score * rand() DESC
-                LIMIT 15 // Increased from 10 to get more variety
-            }
-            // Calculate temporal relevance for the combined results
             WITH q, interests, base_score,
                  CASE
                      WHEN q.created_utc_ts IS NOT NULL
                      THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
                      ELSE base_score
-                 END as temporal_score,
-                 // Add source-specific random boost to ensure better mixing
-                 CASE q.source
-                     WHEN 'stack_exchange' THEN rand() * 0.4
-                     WHEN 'trivia' THEN rand() * 0.4
-                     WHEN 'wikipedia' THEN rand() * 0.4
-                     WHEN 'reddit' THEN rand() * 0.4
-                     ELSE rand() * 0.4
-                 END as source_random_boost
-            // Return results with all metadata
-            WITH q, interests, temporal_score, source_random_boost,
-                 temporal_score * (0.6 + 0.8 * rand()) + source_random_boost as final_score
             RETURN DISTINCT
                 q.title as title,
                 q.body as body,
@@ -399,131 +245,25 @@ class QuestionRecommender:
                     WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
                     ELSE 'topic'
                 END] as interest_types,
-                final_score as relevance_score
-            ORDER BY final_score DESC
             LIMIT $max_questions
-            """
-            questions = [dict(record) for record in session.run(questions_query,
-                                                              user1=user1,
-                                                              user2=user2,
-                                                              max_questions=max_questions)]
-            if questions:
-                first_q = questions[0]
-                logger.info(f"Sample question:")
-                logger.info(f"Title: {first_q.get('title', 'No title')}")
-                logger.info(f"Author: {first_q.get('author', 'No author')}")
-                logger.info(f"Score: {first_q.get('relevance_score', 0)}")
-                logger.info(f"Interests: {first_q.get('matching_interests', [])}")
-            logger.info(f"Found {len(questions)} questions with common interests")
-            return questions
-def process_body(text, title):
-    """Process question body to handle images and HTML."""
-    if not text:
-        logger.warning(f"Empty body for question: {title}")
-        return ""
-    try:
-        from bs4 import BeautifulSoup
-        # Parse the HTML content
-        soup = BeautifulSoup(str(text), 'html.parser')
-        # Function to fix Stack Exchange URLs
-        def fix_stack_exchange_url(url):
-            if not url:
-                return url
-            if url.startswith(('http://', 'https://')):
-                return url
-            if url.startswith('//'):
-                return 'https:' + url
-            if url.startswith('/'):
-                return 'https://i.stack.imgur.com' + url
-            return 'https://i.stack.imgur.com/' + url
-        # Find all img tags and replace with preview cards
-        for img in soup.find_all('img'):
-            src = img.get('src', '')
-            if not src:
-                continue
-            fixed_src = fix_stack_exchange_url(src)
-            alt_text = img.get('alt', '').strip()
-            if not alt_text or alt_text.lower() == 'enter image description here':
-                alt_text = 'Question image'
-            # Create an image preview card
-            preview_html = f"""
-            <div class="image-preview" style="margin: 10px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px;">
-                <div style="display: flex; align-items: center; margin-bottom: 8px;">
-                    <span style="font-size: 20px; margin-right: 8px;">🖼️</span>
-                    <span style="color: #93c5fd;">{alt_text}</span>
-                </div>
-                <a href="{fixed_src}" target="_blank" rel="noopener noreferrer"
-                   style="color: #60a5fa; text-decoration: none;">View image</a>
-            </div>
-            """
-            new_soup = BeautifulSoup(preview_html, 'html.parser')
-            img.replace_with(new_soup)
-        # Style other elements
-        for link in soup.find_all('a'):
-            if 'View Image' not in (link.get_text() or ''):
-                href = link.get('href', '')
-                if href and not href.startswith(('http://', 'https://')):
-                    link['href'] = fix_stack_exchange_url(href)
-                link['target'] = '_blank'
-                link['rel'] = 'noopener noreferrer'
-                link['style'] = 'color: #60a5fa; text-decoration: none;'
-        # Add paragraph styling
-        for p in soup.find_all(['p', 'div']):
-            if not any(cls in (p.get('class', []) or []) for cls in ['image-preview', 'question-card']):
-                current_style = p.get('style', '')
-                p['style'] = f"{current_style}; margin: 0.8em 0; line-height: 1.6; color: #e2e8f0;"
-        # Add list styling
-        for ul in soup.find_all(['ul', 'ol']):
-            ul['style'] = 'margin: 0.8em 0; padding-left: 1.5em; color: #e2e8f0;'
-        for li in soup.find_all('li'):
-            li['style'] = 'margin: 0.4em 0; line-height: 1.6; color: #e2e8f0;'
-        # Add code block styling
-        for code in soup.find_all(['code', 'pre']):
-            code['style'] = 'background: rgba(30, 41, 59, 0.5); padding: 0.2em 0.4em; border-radius: 4px; font-family: monospace; color: #e2e8f0;'
-        return str(soup)
-    except Exception as e:
-        logger.error(f"Error processing question body: {str(e)}")
-        return str(text) if text else ""
 def format_question(q: Dict) -> str:
     """Format a question for display based on its source."""
     try:
-        # Extract and validate basic question data
         title = q.get('title', 'Untitled')
-        source = q.get('source', '').lower()  # Convert to lowercase for consistent comparison
-        # Log available fields for debugging
-        logger.info(f"Question fields: {list(q.keys())}")
-        if 'created_utc_ts' in q:
-            logger.info(f"Raw created_utc_ts value: {q['created_utc_ts']}")
-        # Format metadata section based on source
         metadata_html = ""
         content_html = ""
-        # Default metadata for questions with author/date
         if 'author' in q or 'created_utc_ts' in q:
             author = q.get('author', 'Unknown author')
             created_date = format_neo4j_datetime(q.get('created_utc_ts'))
-            logger.info(f"Question {title}: author={author}, date={created_date}")
             upvotes = q.get('upvotes', 0)
             num_comments = q.get('num_comments', 0)
@@ -539,7 +279,6 @@ def format_question(q: Dict) -> str:
             </div>
             """
-        # Handle content based on source and available fields
         if source == "stack_exchange":
             body = q.get('body', '')
             if body:
@@ -548,12 +287,10 @@ def format_question(q: Dict) -> str:
                     {process_body(body, title)}
                 </div>
                 """
         elif source == "trivia":
             correct_answer = q.get('correct_answer', '')
             incorrect_answers = q.get('incorrect_answers', [])
-            # Create answer options HTML
             answers = [correct_answer] + incorrect_answers if incorrect_answers else [correct_answer]
             answers_html = "".join([
                 f"""
@@ -572,58 +309,21 @@ def format_question(q: Dict) -> str:
                 {answers_html}
             </div>
             """
-        elif source == "wikipedia":
-            correct_answer = q.get('correct_answer', '')
-            if correct_answer:
-                content_html = f"""
-                <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px; border-left: 3px solid #10b981;">
-                    <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
-                    <div style="color: #34d399;">{correct_answer}</div>
-                </div>
-                """
-        elif source == "reddit":
-            # Add subreddit to metadata if available
-            if 'subreddit' in q:
-                subreddit = q.get('subreddit', '')
-                metadata_html = metadata_html.replace(
-                    'posted on',
-                    f'posted in <span style="color: #60a5fa; font-weight: 500;">r/{subreddit}</span> on'
-                )
-        # If no specific content is set, try to use any available content fields
-        if not content_html:
-            if 'body' in q:
-                content_html = f"""
-                <div class="question-content" style="margin-top: 20px; font-family: 'Segoe UI', system-ui, -apple-system, sans-serif; color: #e2e8f0; line-height: 1.6;">
-                    {process_body(q['body'], title)}
-                </div>
-                """
-            elif 'correct_answer' in q:
-                content_html = f"""
-                <div class="answer" style="margin-top: 15px; padding: 15px; background: rgba(51, 65, 85, 0.4); border-radius: 6px;">
-                    <div style="color: #94a3b8; margin-bottom: 10px;">Answer:</div>
-                    <div style="color: #e2e8f0;">{q['correct_answer']}</div>
-                </div>
-                """
-        # Get source-specific icon and color
         source_icon = {
-            'stack_exchange': '⚡', # Lightning bolt for Stack Exchange
-            'reddit': '🔸',  # Orange diamond for Reddit
-            'wikipedia': '📚',  # Books for Wikipedia
-            'trivia': '🎯',  # Target/bullseye for Trivia
-        }.get(source, '❔')  # Question mark as fallback
         source_color = {
-            'stack_exchange': '#60a5fa',  # Blue
-            'reddit': '#f97316',  # Orange
-            'wikipedia': '#22c55e',  # Green
-            'trivia': '#eab308',  # Yellow
-        }.get(source, '#60a5fa')  # Default blue
-        # Create the source badge with icon
         source_display = source.title() if source else "Unknown"
         source_badge = f"""
         <div class="source-badge" style="display: inline-flex; align-items: center; padding: 4px 8px; background: rgba(51, 65, 85, 0.5); border-radius: 4px; margin-right: 10px; border: 1px solid {source_color}25;">
@@ -632,7 +332,6 @@ def format_question(q: Dict) -> str:
         </div>
         """
-        # Handle matching interests display
         matching_interests = q.get('matching_interests', [])
         interest_types = q.get('interest_types', [])
         interests_with_types = []
@@ -643,11 +342,9 @@ def format_question(q: Dict) -> str:
                     'type': type_
                 })
-        # Format interests by type
         keywords = [i['name'] for i in interests_with_types if i['type'] == 'keyword']
         topics = [i['name'] for i in interests_with_types if i['type'] == 'topic']
-        # Create interests display string
         interests_display = []
         if keywords:
             interests_display.append(f"Keywords: {format_interest_list(set(keywords), max_items=3)}")
@@ -655,7 +352,6 @@ def format_question(q: Dict) -> str:
             interests_display.append(f"Topics: {format_interest_list(set(topics), max_items=3)}")
         interests_str = " | ".join(interests_display) if interests_display else "No common interests found"
-        # Calculate relevance score display
         relevance_score = q.get('relevance_score', 0)
         score_display = f"""
         <div class="relevance-score" style="display: inline-block; padding: 4px 8px; background: rgba(59, 130, 246, 0.2); border-radius: 4px; margin-left: 10px;">
@@ -663,8 +359,7 @@ def format_question(q: Dict) -> str:
         </div>
         """ if relevance_score > 0 else ""
-        # Create the question card HTML
-        question_html = f"""
         <div class="question-card" style="background: rgba(51, 65, 85, 0.5); padding: 20px; border-radius: 8px; margin: 15px 0; border: 1px solid rgba(148, 163, 184, 0.2);">
             <div class="question-header" style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px;">
                 <div style="flex: 1; display: flex; align-items: center;">
@@ -685,8 +380,6 @@ def format_question(q: Dict) -> str:
         </div>
         """
-        return question_html
     except Exception as e:
         logger.error(f"Error formatting question: {str(e)}")
         return f"""
@@ -695,35 +388,19 @@ def format_question(q: Dict) -> str:
         </div>
         """
-def loading_message() -> Tuple[str, str, str]:
-    """Return loading message in proper HTML format."""
-    loading_html = """
-    <div class="loading-spinner">
-        <div style="text-align: center;">
-            <div style="border: 4px solid #60a5fa; border-top: 4px solid transparent; border-radius: 50%; width: 40px; height: 40px; animation: spin 1s linear infinite; margin: 20px auto;"></div>
-            <div style="color: #60a5fa; margin-top: 10px;">Analyzing interests and finding recommendations...</div>
-        </div>
-    </div>
-    """
-    return loading_html, loading_html, loading_html
 def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dict]]:
     """Main function to get recommendations and user interests."""
-    # Extract actual usernames from the formatted strings
     user1 = user1.split(" (")[0] if " (" in user1 else user1
     user2 = user2.split(" (")[0] if " (" in user2 else user2
     recommender = QuestionRecommender()
     try:
-        # Get interests for both users
         user1_interests = recommender.get_user_interests(user1)
         user2_interests = recommender.get_user_interests(user2)
-        # Find common interests
         common_keywords = user1_interests['keywords'] & user2_interests['keywords']
         common_topics = user1_interests['topics'] & user2_interests['topics']
-        # Format interests summary
         interests_summary = f"""
         <div class="interests-summary">
             <div class="user-interests">
@@ -758,7 +435,6 @@ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dic
         </div>
         """
-        # Get all recommended questions
         questions = recommender.find_common_questions(user1, user2, max_questions=50)
         if questions:
@@ -784,7 +460,7 @@ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dic
     finally:
         recommender.close()
-# Custom CSS for better styling
 custom_css = """
 .gradio-container {
     max-width: 1200px !important;
@@ -962,17 +638,14 @@ strong {
 """
 def main():
-    # Create Gradio interface
     recommender = QuestionRecommender()
     users = recommender.get_all_users()
     recommender.close()
-    with gr.Blocks(title="Question Recommender (Local Debug)", theme=gr.themes.Soft(), css=custom_css) as iface:
         gr.Markdown("""
-        # 🤝 Question Recommender (Local Debug Version)
         Find questions that two users might be interested in discussing together based on their common interests.
-        > This is the local debug version using the test database.
         """)
         with gr.Row(equal_height=True):
@@ -1003,29 +676,13 @@ def main():
         recommendation_type = gr.HTML()
         questions_output = gr.HTML()
-        def recommend_and_store(user1, user2):
-            """Get recommendations and store questions."""
-            interests, rec_type, questions_html, questions_data = recommend_questions(user1, user2)
-            return interests, rec_type, questions_html
-        # Wire up the components
         recommend_btn.click(
-            fn=loading_message,
-            outputs=[interests_output, recommendation_type, questions_output],
-            queue=False
-        ).then(
-            fn=recommend_and_store,
             inputs=[user1_dropdown, user2_dropdown],
             outputs=[interests_output, recommendation_type, questions_output]
         )
-    # Launch with additional debug info
-    logger.info("Starting local debug version of Question Recommender")
-    iface.launch(
-        show_error=True,
-        server_name="127.0.0.1",
-        server_port=7860
-    )
 if __name__ == "__main__":
-    main()

 import pandas as pd
 from datetime import datetime
 import os
+import re
+from html import escape
+from bs4 import BeautifulSoup
+# Set up basic logging
 logging.basicConfig(
+    level=logging.WARNING,
+    format='%(levelname)s: %(message)s'
 )
 logger = logging.getLogger(__name__)
+# Get database credentials from environment variables
+NEO4J_URL = os.getenv("NEO4J_URL")
+NEO4J_USER = os.getenv("NEO4J_USER")
+NEO4J_PASSWORD = os.getenv("NEO4J_PASSWORD")
+if not all([NEO4J_URL, NEO4J_USER, NEO4J_PASSWORD]):
+    raise ValueError("Missing required environment variables for database connection")
 def format_neo4j_datetime(dt) -> str:
     """Convert Neo4j datetime to string format."""
     if dt is None:
         return 'Unknown date'
     try:
         if hasattr(dt, 'to_native'):
             dt = dt.to_native()
         return dt.strftime('%Y-%m-%d')
     except Exception as e:
         logger.warning(f"Error formatting datetime: {e}")
         return ', '.join(sorted_interests)
     return f"{', '.join(sorted_interests[:max_items])} (+{len(sorted_interests) - max_items} more)"
+def process_body(text, title):
+    """Process question body to handle images and HTML."""
+    if not text:
+        return ""
+    try:
+        # Parse the HTML content
+        soup = BeautifulSoup(str(text), 'html.parser')
+        # Function to fix Stack Exchange URLs
+        def fix_stack_exchange_url(url):
+            if not url:
+                return url
+            if url.startswith(('http://', 'https://')):
+                return url
+            if url.startswith('//'):
+                return 'https:' + url
+            if url.startswith('/'):
+                return 'https://i.stack.imgur.com' + url
+            return 'https://i.stack.imgur.com/' + url
+        # Find all img tags and replace with preview cards
+        for img in soup.find_all('img'):
+            src = img.get('src', '')
+            if not src:
+                continue
+            fixed_src = fix_stack_exchange_url(src)
+            alt_text = img.get('alt', '').strip()
+            if not alt_text or alt_text.lower() == 'enter image description here':
+                alt_text = 'Question image'
+            preview_html = f"""
+            <div class="image-preview" style="margin: 10px 0; padding: 10px; background: rgba(30, 41, 59, 0.4); border-radius: 6px;">
+                <div style="display: flex; align-items: center; margin-bottom: 8px;">
+                    <span style="font-size: 20px; margin-right: 8px;">🖼️</span>
+                    <span style="color: #93c5fd;">{alt_text}</span>
+                </div>
+                <a href="{fixed_src}" target="_blank" rel="noopener noreferrer"
+                   style="color: #60a5fa; text-decoration: none;">View image</a>
+            </div>
+            """
+            new_soup = BeautifulSoup(preview_html, 'html.parser')
+            img.replace_with(new_soup)
+        # Style other elements
+        for link in soup.find_all('a'):
+            if 'View Image' not in (link.get_text() or ''):
+                href = link.get('href', '')
+                if href and not href.startswith(('http://', 'https://')):
+                    link['href'] = fix_stack_exchange_url(href)
+                link['target'] = '_blank'
+                link['rel'] = 'noopener noreferrer'
+                link['style'] = 'color: #60a5fa; text-decoration: none;'
+        # Add paragraph styling
+        for p in soup.find_all(['p', 'div']):
+            if not any(cls in (p.get('class', []) or []) for cls in ['image-preview', 'question-card']):
+                current_style = p.get('style', '')
+                p['style'] = f"{current_style}; margin: 0.8em 0; line-height: 1.6; color: #e2e8f0;"
+        # Add list styling
+        for ul in soup.find_all(['ul', 'ol']):
+            ul['style'] = 'margin: 0.8em 0; padding-left: 1.5em; color: #e2e8f0;'
+        for li in soup.find_all('li'):
+            li['style'] = 'margin: 0.4em 0; line-height: 1.6; color: #e2e8f0;'
+        # Add code block styling
+        for code in soup.find_all(['code', 'pre']):
+            code['style'] = 'background: rgba(30, 41, 59, 0.5); padding: 0.2em 0.4em; border-radius: 4px; font-family: monospace; color: #e2e8f0;'
+        return str(soup)
+    except Exception as e:
+        logger.warning(f"Error processing question body: {str(e)}")
+        return str(text) if text else ""
 class QuestionRecommender:
     def __init__(self):
         try:
                 NEO4J_URL,
                 auth=(NEO4J_USER, NEO4J_PASSWORD)
             )
             # Test connection immediately
             self.driver.verify_connectivity()
         except Exception as e:
             logger.error(f"Failed to initialize database connection: {str(e)}")
             raise
     def close(self):
         self.driver.close()
         """Get list of all users with interest counts."""
         with self.driver.session() as session:
             try:
                 result = session.run("""
                     MATCH (u:User)
                     OPTIONAL MATCH (u)-[r:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest)
                 ) for record in result if record["username"]]
                 if not users_with_counts:
                     return []
                 return [
                     f"{username} ({kw_count} keywords, {topic_count} topics)"
                     for username, kw_count, topic_count in users_with_counts
     def get_user_interests(self, username: str) -> Dict[str, set]:
         """Get keywords and topics a user is interested in."""
         with self.driver.session() as session:
             keyword_result = session.run("""
                 MATCH (u:User {name: $username})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)
                 RETURN DISTINCT k.keyword as keyword
             """, username=username)
             keywords = {str(record["keyword"]) for record in keyword_result if record["keyword"]}
             topic_result = session.run("""
                 MATCH (u:User {name: $username})-[:INTERESTED_IN_TOPIC]->(t:Topic)
                 RETURN DISTINCT t.topic as topic
             """, username=username)
             topics = {str(record["topic"]) for record in topic_result if record["topic"]}
             return {"keywords": keywords or set(), "topics": topics or set()}
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
+        """Find questions to recommend based on common interests."""
         with self.driver.session() as session:
+            questions = session.run("""
             MATCH (u1:User {name: $user1})
             MATCH (u2:User {name: $user2})
             OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
             OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
             WITH u1, u2,
                  COLLECT(DISTINCT interest1) as u1_interests,
                  COLLECT(DISTINCT interest2) as u2_interests
+            UNWIND u1_interests + u2_interests as interest
+            MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+            WHERE
+                q.author <> $user1 AND
+                q.author <> $user2 AND
+                (
+                    (interest IN u1_interests AND interest IN u2_interests) OR
+                    (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                    (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                )
+            WITH q, interest, type(r) as rel_type,
+                 CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+            WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                 sum(interest_weight) as base_score
             WITH q, interests, base_score,
                  CASE
                      WHEN q.created_utc_ts IS NOT NULL
                      THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
                      ELSE base_score
+                 END as temporal_score
             RETURN DISTINCT
                 q.title as title,
                 q.body as body,
                     WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
                     ELSE 'topic'
                 END] as interest_types,
+                temporal_score as relevance_score
+            ORDER BY temporal_score DESC
             LIMIT $max_questions
+            """, user1=user1, user2=user2, max_questions=max_questions)
+            return [dict(record) for record in questions]
 def format_question(q: Dict) -> str:
     """Format a question for display based on its source."""
     try:
         title = q.get('title', 'Untitled')
+        source = q.get('source', '').lower()
         metadata_html = ""
         content_html = ""
         if 'author' in q or 'created_utc_ts' in q:
             author = q.get('author', 'Unknown author')
             created_date = format_neo4j_datetime(q.get('created_utc_ts'))
             upvotes = q.get('upvotes', 0)
             num_comments = q.get('num_comments', 0)
             </div>
             """
         if source == "stack_exchange":
             body = q.get('body', '')
             if body:
                     {process_body(body, title)}
                 </div>
                 """
         elif source == "trivia":
             correct_answer = q.get('correct_answer', '')
             incorrect_answers = q.get('incorrect_answers', [])
             answers = [correct_answer] + incorrect_answers if incorrect_answers else [correct_answer]
             answers_html = "".join([
                 f"""
                 {answers_html}
             </div>
             """
         source_icon = {
+            'stack_exchange': '⚡',
+            'reddit': '🔸',
+            'wikipedia': '📚',
+            'trivia': '🎯',
+        }.get(source, '❔')
         source_color = {
+            'stack_exchange': '#60a5fa',
+            'reddit': '#f97316',
+            'wikipedia': '#22c55e',
+            'trivia': '#eab308',
+        }.get(source, '#60a5fa')
         source_display = source.title() if source else "Unknown"
         source_badge = f"""
         <div class="source-badge" style="display: inline-flex; align-items: center; padding: 4px 8px; background: rgba(51, 65, 85, 0.5); border-radius: 4px; margin-right: 10px; border: 1px solid {source_color}25;">
         </div>
         """
         matching_interests = q.get('matching_interests', [])
         interest_types = q.get('interest_types', [])
         interests_with_types = []
                     'type': type_
                 })
         keywords = [i['name'] for i in interests_with_types if i['type'] == 'keyword']
         topics = [i['name'] for i in interests_with_types if i['type'] == 'topic']
         interests_display = []
         if keywords:
             interests_display.append(f"Keywords: {format_interest_list(set(keywords), max_items=3)}")
             interests_display.append(f"Topics: {format_interest_list(set(topics), max_items=3)}")
         interests_str = " | ".join(interests_display) if interests_display else "No common interests found"
         relevance_score = q.get('relevance_score', 0)
         score_display = f"""
         <div class="relevance-score" style="display: inline-block; padding: 4px 8px; background: rgba(59, 130, 246, 0.2); border-radius: 4px; margin-left: 10px;">
         </div>
         """ if relevance_score > 0 else ""
+        return f"""
         <div class="question-card" style="background: rgba(51, 65, 85, 0.5); padding: 20px; border-radius: 8px; margin: 15px 0; border: 1px solid rgba(148, 163, 184, 0.2);">
             <div class="question-header" style="display: flex; justify-content: space-between; align-items: flex-start; margin-bottom: 15px;">
                 <div style="flex: 1; display: flex; align-items: center;">
         </div>
         """
     except Exception as e:
         logger.error(f"Error formatting question: {str(e)}")
         return f"""
         </div>
         """
 def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str, List[Dict]]:
     """Main function to get recommendations and user interests."""
     user1 = user1.split(" (")[0] if " (" in user1 else user1
     user2 = user2.split(" (")[0] if " (" in user2 else user2
     recommender = QuestionRecommender()
     try:
         user1_interests = recommender.get_user_interests(user1)
         user2_interests = recommender.get_user_interests(user2)
         common_keywords = user1_interests['keywords'] & user2_interests['keywords']
         common_topics = user1_interests['topics'] & user2_interests['topics']
         interests_summary = f"""
         <div class="interests-summary">
             <div class="user-interests">
         </div>
         """
         questions = recommender.find_common_questions(user1, user2, max_questions=50)
         if questions:
     finally:
         recommender.close()
+# Custom CSS for styling
 custom_css = """
 .gradio-container {
     max-width: 1200px !important;
 """
 def main():
     recommender = QuestionRecommender()
     users = recommender.get_all_users()
     recommender.close()
+    with gr.Blocks(title="Question Recommender", theme=gr.themes.Soft(), css=custom_css) as iface:
         gr.Markdown("""
+        # 🤝 Question Recommender
         Find questions that two users might be interested in discussing together based on their common interests.
         """)
         with gr.Row(equal_height=True):
         recommendation_type = gr.HTML()
         questions_output = gr.HTML()
         recommend_btn.click(
+            fn=recommend_questions,
             inputs=[user1_dropdown, user2_dropdown],
             outputs=[interests_output, recommendation_type, questions_output]
         )
+    iface.launch()
 if __name__ == "__main__":
+    main()