Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ import os
|
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
-
# Get Neo4j credentials from
|
14 |
NEO4J_URL = os.environ['NEO4J_URL']
|
15 |
NEO4J_USER = os.environ['NEO4J_USER']
|
16 |
NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
|
@@ -20,7 +20,6 @@ def format_neo4j_datetime(dt) -> str:
|
|
20 |
if dt is None:
|
21 |
return 'Unknown date'
|
22 |
try:
|
23 |
-
# Convert Neo4j datetime to Python datetime
|
24 |
if hasattr(dt, 'to_native'):
|
25 |
dt = dt.to_native()
|
26 |
return dt.strftime('%Y-%m-%d')
|
@@ -79,19 +78,28 @@ class QuestionRecommender:
|
|
79 |
def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
|
80 |
"""Find questions to recommend based on common interests."""
|
81 |
with self.driver.session() as session:
|
82 |
-
# First try to find questions with common keywords
|
83 |
keyword_questions = session.run("""
|
84 |
// Find keywords that both users are interested in
|
85 |
MATCH (u1:User {name: $user1})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)<-[:INTERESTED_IN_KEYWORD]-(u2:User {name: $user2})
|
86 |
-
WITH DISTINCT k
|
87 |
|
88 |
-
//
|
|
|
|
|
|
|
|
|
|
|
89 |
MATCH (q:Question)-[:HAS_KEYWORD]->(k)
|
90 |
WHERE q.author <> $user1 AND q.author <> $user2
|
91 |
|
92 |
-
//
|
93 |
-
WITH q, k,
|
94 |
-
WITH q,
|
|
|
|
|
|
|
|
|
|
|
95 |
|
96 |
// Return questions with their details
|
97 |
RETURN DISTINCT
|
@@ -102,8 +110,8 @@ class QuestionRecommender:
|
|
102 |
matching_keywords as keywords,
|
103 |
relevance_score
|
104 |
ORDER BY relevance_score DESC, q.created_utc_ts DESC
|
105 |
-
LIMIT
|
106 |
-
""", user1=user1, user2=user2
|
107 |
|
108 |
questions = [dict(record) for record in keyword_questions]
|
109 |
|
@@ -118,8 +126,8 @@ class QuestionRecommender:
|
|
118 |
MATCH (q:Question)-[:HAS_TOPIC]->(t)
|
119 |
WHERE q.author <> $user1 AND q.author <> $user2
|
120 |
|
121 |
-
//
|
122 |
-
WITH q,
|
123 |
WITH q, matching_topics, SIZE(matching_topics) as relevance_score
|
124 |
|
125 |
// Return questions with their details
|
@@ -136,7 +144,39 @@ class QuestionRecommender:
|
|
136 |
|
137 |
questions = [dict(record) for record in topic_questions]
|
138 |
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
140 |
|
141 |
def format_question(q: Dict) -> str:
|
142 |
"""Format a question for display."""
|
@@ -166,6 +206,18 @@ def format_question(q: Dict) -> str:
|
|
166 |
</div>
|
167 |
"""
|
168 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
169 |
def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
|
170 |
"""Main function to get recommendations and user interests."""
|
171 |
recommender = QuestionRecommender()
|
@@ -240,19 +292,7 @@ def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
|
|
240 |
finally:
|
241 |
recommender.close()
|
242 |
|
243 |
-
|
244 |
-
"""Return loading message in proper HTML format."""
|
245 |
-
loading_html = """
|
246 |
-
<div class="loading-spinner">
|
247 |
-
<div style="text-align: center;">
|
248 |
-
<div style="border: 4px solid #60a5fa; border-top: 4px solid transparent; border-radius: 50%; width: 40px; height: 40px; animation: spin 1s linear infinite; margin: 20px auto;"></div>
|
249 |
-
<div style="color: #60a5fa; margin-top: 10px;">Analyzing interests and finding recommendations...</div>
|
250 |
-
</div>
|
251 |
-
</div>
|
252 |
-
"""
|
253 |
-
return loading_html, loading_html, loading_html
|
254 |
-
|
255 |
-
# Update the custom CSS with dark theme compatible colors
|
256 |
custom_css = """
|
257 |
.gradio-container {
|
258 |
max-width: 1200px !important;
|
@@ -397,6 +437,11 @@ strong {
|
|
397 |
margin-bottom: 15px;
|
398 |
font-size: 1.2rem;
|
399 |
}
|
|
|
|
|
|
|
|
|
|
|
400 |
"""
|
401 |
|
402 |
# Create Gradio interface
|
@@ -447,5 +492,4 @@ with gr.Blocks(title="Question Recommender", theme=gr.themes.Soft(), css=custom_
|
|
447 |
outputs=[interests_output, recommendation_type, questions_output]
|
448 |
)
|
449 |
|
450 |
-
|
451 |
-
iface.launch(share=True)
|
|
|
10 |
logging.basicConfig(level=logging.INFO)
|
11 |
logger = logging.getLogger(__name__)
|
12 |
|
13 |
+
# Get Neo4j credentials from Hugging Face secrets
|
14 |
NEO4J_URL = os.environ['NEO4J_URL']
|
15 |
NEO4J_USER = os.environ['NEO4J_USER']
|
16 |
NEO4J_PASSWORD = os.environ['NEO4J_PASSWORD']
|
|
|
20 |
if dt is None:
|
21 |
return 'Unknown date'
|
22 |
try:
|
|
|
23 |
if hasattr(dt, 'to_native'):
|
24 |
dt = dt.to_native()
|
25 |
return dt.strftime('%Y-%m-%d')
|
|
|
78 |
def find_common_questions(self, user1: str, user2: str, max_questions: int = 5) -> List[Dict]:
|
79 |
"""Find questions to recommend based on common interests."""
|
80 |
with self.driver.session() as session:
|
81 |
+
# First try to find questions with common keywords, but be more selective
|
82 |
keyword_questions = session.run("""
|
83 |
// Find keywords that both users are interested in
|
84 |
MATCH (u1:User {name: $user1})-[:INTERESTED_IN_KEYWORD]->(k:Keyword)<-[:INTERESTED_IN_KEYWORD]-(u2:User {name: $user2})
|
|
|
85 |
|
86 |
+
// Calculate how specific each keyword is based on total user interest
|
87 |
+
MATCH (anyUser:User)-[:INTERESTED_IN_KEYWORD]->(k)
|
88 |
+
WITH k, COUNT(anyUser) as keyword_popularity
|
89 |
+
WHERE keyword_popularity < 1000 // Filter out extremely common keywords
|
90 |
+
|
91 |
+
// Find questions with these more specific common keywords
|
92 |
MATCH (q:Question)-[:HAS_KEYWORD]->(k)
|
93 |
WHERE q.author <> $user1 AND q.author <> $user2
|
94 |
|
95 |
+
// Group questions and calculate relevance
|
96 |
+
WITH q, k, 1.0/keyword_popularity as keyword_specificity
|
97 |
+
WITH q,
|
98 |
+
COLLECT(DISTINCT k.keyword) as matching_keywords,
|
99 |
+
SUM(keyword_specificity) as relevance_score
|
100 |
+
|
101 |
+
// Ensure we have enough matching keywords but not too many
|
102 |
+
WHERE SIZE(matching_keywords) >= 2 AND SIZE(matching_keywords) <= 5
|
103 |
|
104 |
// Return questions with their details
|
105 |
RETURN DISTINCT
|
|
|
110 |
matching_keywords as keywords,
|
111 |
relevance_score
|
112 |
ORDER BY relevance_score DESC, q.created_utc_ts DESC
|
113 |
+
LIMIT 25
|
114 |
+
""", user1=user1, user2=user2)
|
115 |
|
116 |
questions = [dict(record) for record in keyword_questions]
|
117 |
|
|
|
126 |
MATCH (q:Question)-[:HAS_TOPIC]->(t)
|
127 |
WHERE q.author <> $user1 AND q.author <> $user2
|
128 |
|
129 |
+
// Group questions and calculate relevance
|
130 |
+
WITH q, COLLECT(DISTINCT t.topic) as matching_topics
|
131 |
WITH q, matching_topics, SIZE(matching_topics) as relevance_score
|
132 |
|
133 |
// Return questions with their details
|
|
|
144 |
|
145 |
questions = [dict(record) for record in topic_questions]
|
146 |
|
147 |
+
# Post-process to remove duplicate/similar questions and ensure diversity
|
148 |
+
seen_titles = set()
|
149 |
+
filtered_questions = []
|
150 |
+
for q in questions:
|
151 |
+
# Create a simplified version of the title for comparison
|
152 |
+
simple_title = q['title'].lower().strip()
|
153 |
+
|
154 |
+
# Skip if we've seen a very similar title
|
155 |
+
if any(self._titles_are_similar(simple_title, seen) for seen in seen_titles):
|
156 |
+
continue
|
157 |
+
|
158 |
+
seen_titles.add(simple_title)
|
159 |
+
filtered_questions.append(q)
|
160 |
+
|
161 |
+
if len(filtered_questions) >= max_questions:
|
162 |
+
break
|
163 |
+
|
164 |
+
return filtered_questions
|
165 |
+
|
166 |
+
def _titles_are_similar(self, title1: str, title2: str, similarity_threshold: float = 0.8) -> bool:
|
167 |
+
"""Check if two titles are very similar to avoid recommending duplicate questions."""
|
168 |
+
# Remove common punctuation and convert to set of words
|
169 |
+
words1 = set(title1.replace('?', '').replace('!', '').replace('.', '').split())
|
170 |
+
words2 = set(title2.replace('?', '').replace('!', '').replace('.', '').split())
|
171 |
+
|
172 |
+
# Calculate Jaccard similarity
|
173 |
+
intersection = len(words1 & words2)
|
174 |
+
union = len(words1 | words2)
|
175 |
+
|
176 |
+
if union == 0:
|
177 |
+
return False
|
178 |
+
|
179 |
+
return intersection / union >= similarity_threshold
|
180 |
|
181 |
def format_question(q: Dict) -> str:
|
182 |
"""Format a question for display."""
|
|
|
206 |
</div>
|
207 |
"""
|
208 |
|
209 |
+
def loading_message() -> Tuple[str, str, str]:
|
210 |
+
"""Return loading message in proper HTML format."""
|
211 |
+
loading_html = """
|
212 |
+
<div class="loading-spinner">
|
213 |
+
<div style="text-align: center;">
|
214 |
+
<div style="border: 4px solid #60a5fa; border-top: 4px solid transparent; border-radius: 50%; width: 40px; height: 40px; animation: spin 1s linear infinite; margin: 20px auto;"></div>
|
215 |
+
<div style="color: #60a5fa; margin-top: 10px;">Analyzing interests and finding recommendations...</div>
|
216 |
+
</div>
|
217 |
+
</div>
|
218 |
+
"""
|
219 |
+
return loading_html, loading_html, loading_html
|
220 |
+
|
221 |
def recommend_questions(user1: str, user2: str) -> Tuple[str, str, str]:
|
222 |
"""Main function to get recommendations and user interests."""
|
223 |
recommender = QuestionRecommender()
|
|
|
292 |
finally:
|
293 |
recommender.close()
|
294 |
|
295 |
+
# Custom CSS for better styling
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
296 |
custom_css = """
|
297 |
.gradio-container {
|
298 |
max-width: 1200px !important;
|
|
|
437 |
margin-bottom: 15px;
|
438 |
font-size: 1.2rem;
|
439 |
}
|
440 |
+
|
441 |
+
@keyframes spin {
|
442 |
+
0% { transform: rotate(0deg); }
|
443 |
+
100% { transform: rotate(360deg); }
|
444 |
+
}
|
445 |
"""
|
446 |
|
447 |
# Create Gradio interface
|
|
|
492 |
outputs=[interests_output, recommendation_type, questions_output]
|
493 |
)
|
494 |
|
495 |
+
iface.launch()
|
|