Spaces:

NimaKL
/

LetsTalk

Runtime error

App Files Files Community

NimaKL commited on May 26

Commit

07b7648

verified ·

1 Parent(s): 21b2cd1

Update app.py

Browse files

Files changed (1) hide show

app.py +72 -28

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import os
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# Get Neo4j credentials from environment variables
 NEO4J_URL = os.environ['NEO4J_URL']
 NEO4J_USER = os.environ['NEO4J_USER']
 NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
@@ -20,7 +20,6 @@ def format_neo4j_datetime(dt) -> str:
     if dt is None:
         return 'Unknown date'
     try:
-        # Convert Neo4j datetime to Python datetime
         if hasattr(dt, 'to_native'):
             dt = dt.to_native()
         return dt.strftime('%Y-%m-%d')
@@ -79,19 +78,28 @@ class QuestionRecommender:
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
         """Find questions to recommend based on common interests."""
         with self.driver.session() as session:
-            # First try to find questions with common keywords
             keyword_questions = session.run("""
                 // Find keywords that both users are interested in
                 MATCH (u1:User {name: $user1})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)<-[:INTERESTED_IN_KEYWORD]-(u2:User {name: $user2})
-                WITH DISTINCT k
-                // Find questions with these common keywords
                 MATCH (q:Question)-[:HAS_KEYWORD]->(k)
                 WHERE q.author <> $user1 AND q.author <> $user2
-                // Count how many common keywords each question matches
-                WITH q, k, COLLECT(k.keyword) as matching_keywords
-                WITH q, matching_keywords, SIZE(matching_keywords) as relevance_score
                 // Return questions with their details
                 RETURN DISTINCT
@@ -102,8 +110,8 @@ class QuestionRecommender:
                     matching_keywords as keywords,
                     relevance_score
                 ORDER BY relevance_score DESC, q.created_utc_ts DESC
-                LIMIT $limit
-            """, user1=user1, user2=user2, limit=max_questions)
             questions = [dict(record) for record in keyword_questions]
@@ -118,8 +126,8 @@ class QuestionRecommender:
                     MATCH (q:Question)-[:HAS_TOPIC]->(t)
                     WHERE q.author <> $user1 AND q.author <> $user2
-                    // Count how many common topics each question matches
-                    WITH q, t, COLLECT(t.topic) as matching_topics
                     WITH q, matching_topics, SIZE(matching_topics) as relevance_score
                     // Return questions with their details
@@ -136,7 +144,39 @@ class QuestionRecommender:
                 questions = [dict(record) for record in topic_questions]
-            return questions
 def format_question(q: Dict) -> str:
     """Format a question for display."""
@@ -166,6 +206,18 @@ def format_question(q: Dict) -> str:
 </div>
 """
 def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
     """Main function to get recommendations and user interests."""
     recommender = QuestionRecommender()
@@ -240,19 +292,7 @@ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
     finally:
         recommender.close()
-def loading_message() -> Tuple[str, str, str]:
-    """Return loading message in proper HTML format."""
-    loading_html = """
-    <div class="loading-spinner">
-        <div style="text-align: center;">
-            <div style="border: 4px solid #60a5fa; border-top: 4px solid transparent; border-radius: 50%; width: 40px; height: 40px; animation: spin 1s linear infinite; margin: 20px auto;"></div>
-            <div style="color: #60a5fa; margin-top: 10px;">Analyzing interests and finding recommendations...</div>
-        </div>
-    </div>
-    """
-    return loading_html, loading_html, loading_html
-# Update the custom CSS with dark theme compatible colors
 custom_css = """
 .gradio-container {
     max-width: 1200px !important;
@@ -397,6 +437,11 @@ strong {
     margin-bottom: 15px;
     font-size: 1.2rem;
 }
 """
 # Create Gradio interface
@@ -447,5 +492,4 @@ with gr.Blocks(title="Question Recommender", theme=gr.themes.Soft(), css=custom_
         outputs=[interests_output, recommendation_type, questions_output]
     )
-if __name__ == "__main__":
-    iface.launch(share=True)

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Get Neo4j credentials from Hugging Face secrets
 NEO4J_URL = os.environ['NEO4J_URL']
 NEO4J_USER = os.environ['NEO4J_USER']
 NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
     if dt is None:
         return 'Unknown date'
     try:
         if hasattr(dt, 'to_native'):
             dt = dt.to_native()
         return dt.strftime('%Y-%m-%d')
     def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
         """Find questions to recommend based on common interests."""
         with self.driver.session() as session:
+            # First try to find questions with common keywords, but be more selective
             keyword_questions = session.run("""
                 // Find keywords that both users are interested in
                 MATCH (u1:User {name: $user1})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)<-[:INTERESTED_IN_KEYWORD]-(u2:User {name: $user2})
+                // Calculate how specific each keyword is based on total user interest
+                MATCH (anyUser:User)-[:INTERESTED_IN_KEYWORD]->(k)
+                WITH k, COUNT(anyUser) as keyword_popularity
+                WHERE keyword_popularity < 1000  // Filter out extremely common keywords
+                // Find questions with these more specific common keywords
                 MATCH (q:Question)-[:HAS_KEYWORD]->(k)
                 WHERE q.author <> $user1 AND q.author <> $user2
+                // Group questions and calculate relevance
+                WITH q, k, 1.0/keyword_popularity as keyword_specificity
+                WITH q,
+                     COLLECT(DISTINCT k.keyword) as matching_keywords,
+                     SUM(keyword_specificity) as relevance_score
+                // Ensure we have enough matching keywords but not too many
+                WHERE SIZE(matching_keywords) >= 2 AND SIZE(matching_keywords) <= 5
                 // Return questions with their details
                 RETURN DISTINCT
                     matching_keywords as keywords,
                     relevance_score
                 ORDER BY relevance_score DESC, q.created_utc_ts DESC
+                LIMIT 25
+            """, user1=user1, user2=user2)
             questions = [dict(record) for record in keyword_questions]
                     MATCH (q:Question)-[:HAS_TOPIC]->(t)
                     WHERE q.author <> $user1 AND q.author <> $user2
+                    // Group questions and calculate relevance
+                    WITH q, COLLECT(DISTINCT t.topic) as matching_topics
                     WITH q, matching_topics, SIZE(matching_topics) as relevance_score
                     // Return questions with their details
                 questions = [dict(record) for record in topic_questions]
+            # Post-process to remove duplicate/similar questions and ensure diversity
+            seen_titles = set()
+            filtered_questions = []
+            for q in questions:
+                # Create a simplified version of the title for comparison
+                simple_title = q['title'].lower().strip()
+                # Skip if we've seen a very similar title
+                if any(self._titles_are_similar(simple_title, seen) for seen in seen_titles):
+                    continue
+                seen_titles.add(simple_title)
+                filtered_questions.append(q)
+                if len(filtered_questions) >= max_questions:
+                    break
+            return filtered_questions
+    def _titles_are_similar(self, title1: str, title2: str, similarity_threshold: float = 0.8) -> bool:
+        """Check if two titles are very similar to avoid recommending duplicate questions."""
+        # Remove common punctuation and convert to set of words
+        words1 = set(title1.replace('?', '').replace('!', '').replace('.', '').split())
+        words2 = set(title2.replace('?', '').replace('!', '').replace('.', '').split())
+        # Calculate Jaccard similarity
+        intersection = len(words1 & words2)
+        union = len(words1 | words2)
+        if union == 0:
+            return False
+        return intersection / union >= similarity_threshold
 def format_question(q: Dict) -> str:
     """Format a question for display."""
 </div>
 """
+def loading_message() -> Tuple[str, str, str]:
+    """Return loading message in proper HTML format."""
+    loading_html = """
+    <div class="loading-spinner">
+        <div style="text-align: center;">
+            <div style="border: 4px solid #60a5fa; border-top: 4px solid transparent; border-radius: 50%; width: 40px; height: 40px; animation: spin 1s linear infinite; margin: 20px auto;"></div>
+            <div style="color: #60a5fa; margin-top: 10px;">Analyzing interests and finding recommendations...</div>
+        </div>
+    </div>
+    """
+    return loading_html, loading_html, loading_html
 def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
     """Main function to get recommendations and user interests."""
     recommender = QuestionRecommender()
     finally:
         recommender.close()
+# Custom CSS for better styling
 custom_css = """
 .gradio-container {
     max-width: 1200px !important;
     margin-bottom: 15px;
     font-size: 1.2rem;
 }
+@keyframes spin {
+    0% { transform: rotate(0deg); }
+    100% { transform: rotate(360deg); }
+}
 """
 # Create Gradio interface
         outputs=[interests_output, recommendation_type, questions_output]
     )
+iface.launch()