Spaces:

NimaKL
/

LetsTalk

Runtime error

App Files Files Community

NimaKL commited on May 27

Commit

ef3d758

verified ·

1 Parent(s): c08b5c8

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -51

app.py CHANGED Viewed

@@ -196,61 +196,82 @@ class QuestionRecommender:
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
         """Find questions to recommend based on common interests."""
         with self.driver.session() as session:
-            questions = session.run("""
-            MATCH (u1:User {name: $user1})
-            MATCH (u2:User {name: $user2})
-            OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
-            OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
-            WITH u1, u2,
-                 COLLECT(DISTINCT interest1) as u1_interests,
-                 COLLECT(DISTINCT interest2) as u2_interests
-            UNWIND u1_interests + u2_interests as interest
-            MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
-            WHERE
-                q.author <> $user1 AND
-                q.author <> $user2 AND
-                (
-                    (interest IN u1_interests AND interest IN u2_interests) OR
-                    (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
-                    (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
                 )
-            WITH q, interest, type(r) as rel_type,
-                 CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
-            WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
-                 sum(interest_weight) as base_score
-            WITH q, interests, base_score,
-                 CASE
-                     WHEN q.created_utc_ts IS NOT NULL
-                     THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
-                     ELSE base_score
-                 END as temporal_score
-            RETURN DISTINCT
-                q.title as title,
-                q.body as body,
-                q.created_utc_ts as created_utc_ts,
-                q.author as author,
-                q.source as source,
-                q.correct_answer as correct_answer,
-                q.incorrect_answers as incorrect_answers,
-                q.upvotes as upvotes,
-                q.num_comments as num_comments,
-                q.subreddit as subreddit,
-                [i in interests | CASE
-                    WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
-                    ELSE i.interest.topic
-                END] as matching_interests,
-                [i in interests | CASE
-                    WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
-                    ELSE 'topic'
-                END] as interest_types,
-                temporal_score as relevance_score
-            ORDER BY temporal_score DESC
-            LIMIT $max_questions
-            """, user1=user1, user2=user2, max_questions=max_questions)
-            return [dict(record) for record in questions]
 def format_question(q: Dict) -> str:
     """Format a question for display based on its source."""

     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
         """Find questions to recommend based on common interests."""
         with self.driver.session() as session:
+            # First, get questions per source
+            questions_per_source = max_questions // 4  # Divide evenly among 4 sources
+            remaining_slots = max_questions % 4  # Handle any remainder
+            # Query for each source separately to ensure balanced representation
+            sources = ['stack_exchange', 'reddit', 'wikipedia', 'trivia']
+            all_questions = []
+            for source in sources:
+                source_questions = session.run("""
+                MATCH (u1:User {name: $user1})
+                MATCH (u2:User {name: $user2})
+                OPTIONAL MATCH (u1)-[r1:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest1)
+                OPTIONAL MATCH (u2)-[r2:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]->(interest2)
+                WITH u1, u2,
+                     COLLECT(DISTINCT interest1) as u1_interests,
+                     COLLECT(DISTINCT interest2) as u2_interests
+                UNWIND u1_interests + u2_interests as interest
+                MATCH (q:Question)-[r:HAS_KEYWORD|HAS_TOPIC]->(interest)
+                WHERE
+                    q.author <> $user1 AND
+                    q.author <> $user2 AND
+                    q.source = $source AND
+                    (
+                        (interest IN u1_interests AND interest IN u2_interests) OR
+                        (interest IN u1_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u2))) OR
+                        (interest IN u2_interests AND EXISTS((q)-[:HAS_KEYWORD|HAS_TOPIC]->()<-[:INTERESTED_IN_KEYWORD|INTERESTED_IN_TOPIC]-(u1)))
+                    )
+                WITH q, interest, type(r) as rel_type,
+                     CASE WHEN interest IN u1_interests AND interest IN u2_interests THEN 2.0 ELSE 1.0 END as interest_weight
+                WITH q, collect({interest: interest, weight: interest_weight, type: rel_type}) as interests,
+                     sum(interest_weight) as base_score
+                WITH q, interests, base_score,
+                     CASE
+                         WHEN q.created_utc_ts IS NOT NULL
+                         THEN base_score * (1.0 + 0.1 * (1.0 - duration.between(q.created_utc_ts, datetime()).days / 365.0))
+                         ELSE base_score
+                     END as temporal_score
+                RETURN DISTINCT
+                    q.title as title,
+                    q.body as body,
+                    q.created_utc_ts as created_utc_ts,
+                    q.author as author,
+                    q.source as source,
+                    q.correct_answer as correct_answer,
+                    q.incorrect_answers as incorrect_answers,
+                    q.upvotes as upvotes,
+                    q.num_comments as num_comments,
+                    q.subreddit as subreddit,
+                    [i in interests | CASE
+                        WHEN i.type = 'HAS_KEYWORD' THEN i.interest.keyword
+                        ELSE i.interest.topic
+                    END] as matching_interests,
+                    [i in interests | CASE
+                        WHEN i.type = 'HAS_KEYWORD' THEN 'keyword'
+                        ELSE 'topic'
+                    END] as interest_types,
+                    temporal_score as relevance_score
+                ORDER BY temporal_score DESC
+                LIMIT $limit
+                """,
+                user1=user1,
+                user2=user2,
+                source=source,
+                limit=questions_per_source + (1 if remaining_slots > 0 else 0)
                 )
+                source_results = [dict(record) for record in source_questions]
+                all_questions.extend(source_results)
+                remaining_slots = max(0, remaining_slots - 1)
+            # Sort all questions by relevance score for final ordering
+            all_questions.sort(key=lambda x: x.get('relevance_score', 0), reverse=True)
+            return all_questions
 def format_question(q: Dict) -> str:
     """Format a question for display based on its source."""