Spaces:
Running
Running
Gianpaolo Macario
commited on
Commit
·
988d01d
1
Parent(s):
103a972
refactor: enhance webhook processing and add health check endpoint
Browse files
app.py
CHANGED
@@ -70,10 +70,10 @@ RECOGNIZED_TAGS = {
|
|
70 |
|
71 |
|
72 |
class WebhookEvent(BaseModel):
|
73 |
-
event: Dict[str, str]
|
74 |
-
comment: Dict[str, Any]
|
75 |
-
discussion: Dict[str, Any]
|
76 |
-
repo: Dict[str, str]
|
77 |
|
78 |
|
79 |
app = FastAPI(title="HF Tagging Bot")
|
@@ -160,304 +160,457 @@ def extract_tags_from_text(text: str) -> List[str]:
|
|
160 |
return valid_tags
|
161 |
|
162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
163 |
async def process_webhook_comment(webhook_data: Dict[str, Any]):
|
164 |
-
"""
|
|
|
|
|
|
|
165 |
print("🏷️ Starting process_webhook_comment...")
|
166 |
-
|
167 |
try:
|
|
|
168 |
comment_content = webhook_data["comment"]["content"]
|
169 |
discussion_title = webhook_data["discussion"]["title"]
|
170 |
repo_name = webhook_data["repo"]["name"]
|
171 |
discussion_num = webhook_data["discussion"]["num"]
|
172 |
-
# Author is an object with "id" field
|
173 |
comment_author = webhook_data["comment"]["author"].get("id", "unknown")
|
174 |
-
|
175 |
-
print(f"📝 Comment
|
176 |
-
print(f"📰 Discussion
|
177 |
print(f"📦 Repository: {repo_name}")
|
|
|
|
|
|
|
178 |
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
""
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
|
232 |
-
|
233 |
-
|
234 |
-
|
235 |
-
|
236 |
-
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
):
|
251 |
-
msg = f"Tag '{tag}': Already exists"
|
252 |
-
elif (
|
253 |
-
"pr" in full_response.lower()
|
254 |
-
or "pull request" in full_response.lower()
|
255 |
-
):
|
256 |
-
if tag_mentioned:
|
257 |
-
msg = f"Tag '{tag}': PR created successfully"
|
258 |
-
else:
|
259 |
-
msg = (
|
260 |
-
f"Tag '{tag}': Processed "
|
261 |
-
"(PR may have been created)"
|
262 |
-
)
|
263 |
-
elif "success" in full_response.lower() and tag_mentioned:
|
264 |
-
msg = f"Tag '{tag}': Successfully processed"
|
265 |
-
elif "error" in full_response.lower() and tag_mentioned:
|
266 |
-
msg = f"Tag '{tag}': Error during processing"
|
267 |
-
else:
|
268 |
-
msg = f"Tag '{tag}': Processed by agent"
|
269 |
-
|
270 |
-
print(f"✅ Result for tag '{tag}': {msg}")
|
271 |
-
result_messages.append(msg)
|
272 |
-
|
273 |
-
except Exception as agent_error:
|
274 |
-
print(f"⚠️ Agent streaming failed: {str(agent_error)}")
|
275 |
-
print("🔄 Falling back to direct MCP tool calls...")
|
276 |
-
|
277 |
-
# Import the MCP server functions directly as fallback
|
278 |
-
try:
|
279 |
-
import sys
|
280 |
-
import importlib.util
|
281 |
-
|
282 |
-
# Load the MCP server module
|
283 |
-
spec = importlib.util.spec_from_file_location(
|
284 |
-
"mcp_server", "./mcp_server.py"
|
285 |
-
)
|
286 |
-
mcp_module = importlib.util.module_from_spec(spec) # type: ignore
|
287 |
-
spec.loader.exec_module(mcp_module) # type: ignore
|
288 |
-
|
289 |
-
# Use the MCP tools directly for each tag
|
290 |
-
for tag in all_tags:
|
291 |
-
try:
|
292 |
-
print(
|
293 |
-
f"🔧 Directly calling get_current_tags for '{tag}'"
|
294 |
-
)
|
295 |
-
current_tags_result = mcp_module.get_current_tags(
|
296 |
-
repo_name
|
297 |
-
)
|
298 |
-
print(
|
299 |
-
f"📄 Current tags result: {current_tags_result}"
|
300 |
-
)
|
301 |
-
|
302 |
-
# Parse the JSON result
|
303 |
-
import json
|
304 |
-
|
305 |
-
tags_data = json.loads(current_tags_result)
|
306 |
-
|
307 |
-
if tags_data.get("status") == "success":
|
308 |
-
current_tags = tags_data.get("current_tags", [])
|
309 |
-
if tag in current_tags:
|
310 |
-
msg = f"Tag '{tag}': Already exists"
|
311 |
-
print(f"✅ {msg}")
|
312 |
-
else:
|
313 |
-
print(
|
314 |
-
f"🔧 Directly calling add_new_tag for '{tag}'"
|
315 |
-
)
|
316 |
-
add_result = mcp_module.add_new_tag(
|
317 |
-
repo_name, tag
|
318 |
-
)
|
319 |
-
print(f"📄 Add tag result: {add_result}")
|
320 |
-
|
321 |
-
add_data = json.loads(add_result)
|
322 |
-
if add_data.get("status") == "success":
|
323 |
-
pr_url = add_data.get("pr_url", "")
|
324 |
-
msg = f"Tag '{tag}': PR created - {pr_url}"
|
325 |
-
elif (
|
326 |
-
add_data.get("status")
|
327 |
-
== "already_exists"
|
328 |
-
):
|
329 |
-
msg = f"Tag '{tag}': Already exists"
|
330 |
-
else:
|
331 |
-
msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
|
332 |
-
print(f"✅ {msg}")
|
333 |
-
else:
|
334 |
-
error_msg = tags_data.get(
|
335 |
-
"error", "Unknown error"
|
336 |
-
)
|
337 |
-
msg = f"Tag '{tag}': Error - {error_msg}"
|
338 |
-
print(f"❌ {msg}")
|
339 |
-
|
340 |
-
result_messages.append(msg)
|
341 |
-
|
342 |
-
except Exception as direct_error:
|
343 |
-
error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
|
344 |
-
print(f"❌ {error_msg}")
|
345 |
-
result_messages.append(error_msg)
|
346 |
-
|
347 |
-
except Exception as fallback_error:
|
348 |
-
error_msg = (
|
349 |
-
f"Fallback approach failed: {str(fallback_error)}"
|
350 |
-
)
|
351 |
-
print(f"❌ {error_msg}")
|
352 |
-
result_messages.append(error_msg)
|
353 |
-
|
354 |
-
except Exception as e:
|
355 |
-
error_msg = f"Error during agent processing: {str(e)}"
|
356 |
-
print(f"❌ {error_msg}")
|
357 |
-
result_messages.append(error_msg)
|
358 |
-
|
359 |
-
# Store the interaction
|
360 |
-
base_url = "https://huggingface.co"
|
361 |
-
discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
|
362 |
-
|
363 |
-
interaction = {
|
364 |
-
"timestamp": datetime.now().isoformat(),
|
365 |
-
"repo": repo_name,
|
366 |
-
"discussion_title": discussion_title,
|
367 |
-
"discussion_num": discussion_num,
|
368 |
-
"discussion_url": discussion_url,
|
369 |
-
"original_comment": comment_content,
|
370 |
-
"comment_author": comment_author,
|
371 |
-
"detected_tags": all_tags,
|
372 |
-
"results": result_messages,
|
373 |
-
}
|
374 |
-
|
375 |
-
tag_operations_store.append(interaction)
|
376 |
-
final_result = " | ".join(result_messages)
|
377 |
-
print(f"💾 Stored interaction and returning result: {final_result}")
|
378 |
-
return final_result
|
379 |
|
380 |
-
|
381 |
-
|
382 |
-
print(error_msg)
|
383 |
-
return error_msg
|
384 |
|
385 |
|
386 |
-
@app.
|
387 |
-
async def
|
388 |
-
"""
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
393 |
|
394 |
-
payload = await request.json()
|
395 |
-
print(f"📥 Received webhook payload: {json.dumps(payload, indent=2)}")
|
396 |
|
397 |
-
|
398 |
-
|
399 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
|
401 |
-
print(f"🔍 Event details - scope: {scope}, action: {action}")
|
402 |
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
if scope_check and action_check:
|
413 |
-
# Verify we have the required fields
|
414 |
-
required_fields = ["comment", "discussion", "repo"]
|
415 |
-
missing_fields = [field for field in required_fields if field not in payload]
|
416 |
-
|
417 |
-
if missing_fields:
|
418 |
-
error_msg = f"Missing required fields: {missing_fields}"
|
419 |
-
print(f"❌ {error_msg}")
|
420 |
-
return {"error": error_msg}
|
421 |
-
|
422 |
-
print(f"🚀 Processing webhook for repo: {payload['repo']['name']}")
|
423 |
-
background_tasks.add_task(process_webhook_comment, payload)
|
424 |
-
return {"status": "processing"}
|
425 |
-
|
426 |
-
print(f"⏭️ Ignoring webhook - scope: {scope}, action: {action}")
|
427 |
-
return {"status": "ignored"}
|
428 |
-
|
429 |
-
|
430 |
-
async def simulate_webhook(
|
431 |
-
repo_name: str, discussion_title: str, comment_content: str
|
432 |
-
) -> str:
|
433 |
-
"""Simulate webhook for testing"""
|
434 |
-
if not all([repo_name, discussion_title, comment_content]):
|
435 |
-
return "Please fill in all fields."
|
436 |
-
|
437 |
-
mock_payload = {
|
438 |
-
"event": {"action": "create", "scope": "discussion"},
|
439 |
-
"comment": {
|
440 |
-
"content": comment_content,
|
441 |
-
"author": {"id": "test-user-id"},
|
442 |
-
"id": "mock-comment-id",
|
443 |
-
"hidden": False,
|
444 |
-
},
|
445 |
-
"discussion": {
|
446 |
-
"title": discussion_title,
|
447 |
-
"num": len(tag_operations_store) + 1,
|
448 |
-
"id": "mock-discussion-id",
|
449 |
-
"status": "open",
|
450 |
-
"isPullRequest": False,
|
451 |
-
},
|
452 |
-
"repo": {
|
453 |
-
"name": repo_name,
|
454 |
-
"type": "model",
|
455 |
-
"private": False,
|
456 |
-
},
|
457 |
}
|
458 |
|
459 |
-
|
460 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
|
462 |
|
463 |
def create_gradio_app():
|
@@ -501,11 +654,11 @@ def create_gradio_app():
|
|
501 |
with gr.Column():
|
502 |
sim_result = gr.Textbox(label="Result", lines=8)
|
503 |
|
504 |
-
sim_btn.click(
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
)
|
509 |
|
510 |
gr.Markdown(f"""
|
511 |
## Recognized Tags:
|
|
|
70 |
|
71 |
|
72 |
class WebhookEvent(BaseModel):
|
73 |
+
event: Dict[str, str] # Contains action and scope information
|
74 |
+
comment: Dict[str, Any] # Comment content and metadata
|
75 |
+
discussion: Dict[str, Any] # Discussion information
|
76 |
+
repo: Dict[str, str] # Repository details
|
77 |
|
78 |
|
79 |
app = FastAPI(title="HF Tagging Bot")
|
|
|
160 |
return valid_tags
|
161 |
|
162 |
|
163 |
+
# async def process_webhook_comment(webhook_data: Dict[str, Any]):
|
164 |
+
# """Process webhook to detect and add tags"""
|
165 |
+
# print("🏷️ Starting process_webhook_comment...")
|
166 |
+
|
167 |
+
# try:
|
168 |
+
# comment_content = webhook_data["comment"]["content"]
|
169 |
+
# discussion_title = webhook_data["discussion"]["title"]
|
170 |
+
# repo_name = webhook_data["repo"]["name"]
|
171 |
+
# discussion_num = webhook_data["discussion"]["num"]
|
172 |
+
# # Author is an object with "id" field
|
173 |
+
# comment_author = webhook_data["comment"]["author"].get("id", "unknown")
|
174 |
+
|
175 |
+
# print(f"📝 Comment content: {comment_content}")
|
176 |
+
# print(f"📰 Discussion title: {discussion_title}")
|
177 |
+
# print(f"📦 Repository: {repo_name}")
|
178 |
+
|
179 |
+
# # Extract potential tags from the comment and discussion title
|
180 |
+
# comment_tags = extract_tags_from_text(comment_content)
|
181 |
+
# title_tags = extract_tags_from_text(discussion_title)
|
182 |
+
# all_tags = list(set(comment_tags + title_tags))
|
183 |
+
|
184 |
+
# print(f"🔍 Comment tags found: {comment_tags}")
|
185 |
+
# print(f"🔍 Title tags found: {title_tags}")
|
186 |
+
# print(f"🏷️ All unique tags: {all_tags}")
|
187 |
+
|
188 |
+
# result_messages = []
|
189 |
+
|
190 |
+
# if not all_tags:
|
191 |
+
# msg = "No recognizable tags found in the discussion."
|
192 |
+
# print(f"❌ {msg}")
|
193 |
+
# result_messages.append(msg)
|
194 |
+
# else:
|
195 |
+
# print("🤖 Getting agent instance...")
|
196 |
+
# agent = await get_agent()
|
197 |
+
# if not agent:
|
198 |
+
# msg = "Error: Agent not configured (missing HF_TOKEN)"
|
199 |
+
# print(f"❌ {msg}")
|
200 |
+
# result_messages.append(msg)
|
201 |
+
# else:
|
202 |
+
# print("✅ Agent instance obtained successfully")
|
203 |
+
|
204 |
+
# # Process all tags in a single conversation with the agent
|
205 |
+
# try:
|
206 |
+
# # Create a comprehensive prompt for the agent
|
207 |
+
# user_prompt = f"""
|
208 |
+
# I need to add the following tags to the repository '{repo_name}': {", ".join(all_tags)}
|
209 |
+
|
210 |
+
# For each tag, please:
|
211 |
+
# 1. Check if the tag already exists on the repository using get_current_tags
|
212 |
+
# 2. If the tag doesn't exist, add it using add_new_tag
|
213 |
+
# 3. Provide a summary of what was done for each tag
|
214 |
+
|
215 |
+
# Please process all {len(all_tags)} tags: {", ".join(all_tags)}
|
216 |
+
# """
|
217 |
+
|
218 |
+
# print("💬 Sending comprehensive prompt to agent...")
|
219 |
+
# print(f"📝 Prompt: {user_prompt}")
|
220 |
+
|
221 |
+
# # Let the agent handle the entire conversation
|
222 |
+
# conversation_result = []
|
223 |
+
|
224 |
+
# try:
|
225 |
+
# async for item in agent.run(user_prompt):
|
226 |
+
# # The agent yields different types of items
|
227 |
+
# item_str = str(item)
|
228 |
+
# conversation_result.append(item_str)
|
229 |
+
|
230 |
+
# # Log important events
|
231 |
+
# if (
|
232 |
+
# "tool_call" in item_str.lower()
|
233 |
+
# or "function" in item_str.lower()
|
234 |
+
# ):
|
235 |
+
# print(f"🔧 Agent using tools: {item_str[:200]}...")
|
236 |
+
# elif "content" in item_str and len(item_str) < 500:
|
237 |
+
# print(f"💭 Agent response: {item_str}")
|
238 |
+
|
239 |
+
# # Extract the final response from the conversation
|
240 |
+
# full_response = " ".join(conversation_result)
|
241 |
+
# print(f"📋 Agent conversation completed successfully")
|
242 |
+
|
243 |
+
# # Try to extract meaningful results for each tag
|
244 |
+
# for tag in all_tags:
|
245 |
+
# tag_mentioned = tag.lower() in full_response.lower()
|
246 |
+
|
247 |
+
# if (
|
248 |
+
# "already exists" in full_response.lower()
|
249 |
+
# and tag_mentioned
|
250 |
+
# ):
|
251 |
+
# msg = f"Tag '{tag}': Already exists"
|
252 |
+
# elif (
|
253 |
+
# "pr" in full_response.lower()
|
254 |
+
# or "pull request" in full_response.lower()
|
255 |
+
# ):
|
256 |
+
# if tag_mentioned:
|
257 |
+
# msg = f"Tag '{tag}': PR created successfully"
|
258 |
+
# else:
|
259 |
+
# msg = (
|
260 |
+
# f"Tag '{tag}': Processed "
|
261 |
+
# "(PR may have been created)"
|
262 |
+
# )
|
263 |
+
# elif "success" in full_response.lower() and tag_mentioned:
|
264 |
+
# msg = f"Tag '{tag}': Successfully processed"
|
265 |
+
# elif "error" in full_response.lower() and tag_mentioned:
|
266 |
+
# msg = f"Tag '{tag}': Error during processing"
|
267 |
+
# else:
|
268 |
+
# msg = f"Tag '{tag}': Processed by agent"
|
269 |
+
|
270 |
+
# print(f"✅ Result for tag '{tag}': {msg}")
|
271 |
+
# result_messages.append(msg)
|
272 |
+
|
273 |
+
# except Exception as agent_error:
|
274 |
+
# print(f"⚠️ Agent streaming failed: {str(agent_error)}")
|
275 |
+
# print("🔄 Falling back to direct MCP tool calls...")
|
276 |
+
|
277 |
+
# # Import the MCP server functions directly as fallback
|
278 |
+
# try:
|
279 |
+
# import sys
|
280 |
+
# import importlib.util
|
281 |
+
|
282 |
+
# # Load the MCP server module
|
283 |
+
# spec = importlib.util.spec_from_file_location(
|
284 |
+
# "mcp_server", "./mcp_server.py"
|
285 |
+
# )
|
286 |
+
# mcp_module = importlib.util.module_from_spec(spec) # type: ignore
|
287 |
+
# spec.loader.exec_module(mcp_module) # type: ignore
|
288 |
+
|
289 |
+
# # Use the MCP tools directly for each tag
|
290 |
+
# for tag in all_tags:
|
291 |
+
# try:
|
292 |
+
# print(
|
293 |
+
# f"🔧 Directly calling get_current_tags for '{tag}'"
|
294 |
+
# )
|
295 |
+
# current_tags_result = mcp_module.get_current_tags(
|
296 |
+
# repo_name
|
297 |
+
# )
|
298 |
+
# print(
|
299 |
+
# f"📄 Current tags result: {current_tags_result}"
|
300 |
+
# )
|
301 |
+
|
302 |
+
# # Parse the JSON result
|
303 |
+
# import json
|
304 |
+
|
305 |
+
# tags_data = json.loads(current_tags_result)
|
306 |
+
|
307 |
+
# if tags_data.get("status") == "success":
|
308 |
+
# current_tags = tags_data.get("current_tags", [])
|
309 |
+
# if tag in current_tags:
|
310 |
+
# msg = f"Tag '{tag}': Already exists"
|
311 |
+
# print(f"✅ {msg}")
|
312 |
+
# else:
|
313 |
+
# print(
|
314 |
+
# f"🔧 Directly calling add_new_tag for '{tag}'"
|
315 |
+
# )
|
316 |
+
# add_result = mcp_module.add_new_tag(
|
317 |
+
# repo_name, tag
|
318 |
+
# )
|
319 |
+
# print(f"📄 Add tag result: {add_result}")
|
320 |
+
|
321 |
+
# add_data = json.loads(add_result)
|
322 |
+
# if add_data.get("status") == "success":
|
323 |
+
# pr_url = add_data.get("pr_url", "")
|
324 |
+
# msg = f"Tag '{tag}': PR created - {pr_url}"
|
325 |
+
# elif (
|
326 |
+
# add_data.get("status")
|
327 |
+
# == "already_exists"
|
328 |
+
# ):
|
329 |
+
# msg = f"Tag '{tag}': Already exists"
|
330 |
+
# else:
|
331 |
+
# msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
|
332 |
+
# print(f"✅ {msg}")
|
333 |
+
# else:
|
334 |
+
# error_msg = tags_data.get(
|
335 |
+
# "error", "Unknown error"
|
336 |
+
# )
|
337 |
+
# msg = f"Tag '{tag}': Error - {error_msg}"
|
338 |
+
# print(f"❌ {msg}")
|
339 |
+
|
340 |
+
# result_messages.append(msg)
|
341 |
+
|
342 |
+
# except Exception as direct_error:
|
343 |
+
# error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
|
344 |
+
# print(f"❌ {error_msg}")
|
345 |
+
# result_messages.append(error_msg)
|
346 |
+
|
347 |
+
# except Exception as fallback_error:
|
348 |
+
# error_msg = (
|
349 |
+
# f"Fallback approach failed: {str(fallback_error)}"
|
350 |
+
# )
|
351 |
+
# print(f"❌ {error_msg}")
|
352 |
+
# result_messages.append(error_msg)
|
353 |
+
|
354 |
+
# except Exception as e:
|
355 |
+
# error_msg = f"Error during agent processing: {str(e)}"
|
356 |
+
# print(f"❌ {error_msg}")
|
357 |
+
# result_messages.append(error_msg)
|
358 |
+
|
359 |
+
# # Store the interaction
|
360 |
+
# base_url = "https://huggingface.co"
|
361 |
+
# discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
|
362 |
+
|
363 |
+
# interaction = {
|
364 |
+
# "timestamp": datetime.now().isoformat(),
|
365 |
+
# "repo": repo_name,
|
366 |
+
# "discussion_title": discussion_title,
|
367 |
+
# "discussion_num": discussion_num,
|
368 |
+
# "discussion_url": discussion_url,
|
369 |
+
# "original_comment": comment_content,
|
370 |
+
# "comment_author": comment_author,
|
371 |
+
# "detected_tags": all_tags,
|
372 |
+
# "results": result_messages,
|
373 |
+
# }
|
374 |
+
|
375 |
+
# tag_operations_store.append(interaction)
|
376 |
+
# final_result = " | ".join(result_messages)
|
377 |
+
# print(f"💾 Stored interaction and returning result: {final_result}")
|
378 |
+
# return final_result
|
379 |
+
|
380 |
+
# except Exception as e:
|
381 |
+
# error_msg = f"❌ Fatal error in process_webhook_comment: {str(e)}"
|
382 |
+
# print(error_msg)
|
383 |
+
# return error_msg
|
384 |
+
|
385 |
+
|
386 |
+
@app.post("/webhook")
|
387 |
+
async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
|
388 |
+
"""
|
389 |
+
Handle incoming webhooks from Hugging Face Hub
|
390 |
+
Following the pattern from: https://raw.githubusercontent.com/huggingface/hub-docs/refs/heads/main/docs/hub/webhooks-guide-discussion-bot.md
|
391 |
+
"""
|
392 |
+
print("🔔 Webhook received!")
|
393 |
+
|
394 |
+
# Step 1: Validate webhook secret (security)
|
395 |
+
webhook_secret = request.headers.get("X-Webhook-Secret")
|
396 |
+
if webhook_secret != WEBHOOK_SECRET:
|
397 |
+
print("❌ Invalid webhook secret")
|
398 |
+
return {"error": "Invalid webhook secret"}, 400
|
399 |
+
|
400 |
+
# Step 2: Parse webhook data
|
401 |
+
try:
|
402 |
+
webhook_data = await request.json()
|
403 |
+
print(f"📥 Webhook data: {json.dumps(webhook_data, indent=2)}")
|
404 |
+
except Exception as e:
|
405 |
+
print(f"❌ Error parsing webhook data: {str(e)}")
|
406 |
+
return {"error": "invalid JSON"}, 400
|
407 |
+
|
408 |
+
# Step 3: Validate event structure
|
409 |
+
event = webhook_data.get("event", {})
|
410 |
+
if not event:
|
411 |
+
print("❌ No event data in webhook")
|
412 |
+
return {"error": "missing event data"}, 400
|
413 |
+
|
414 |
+
scope = event.get("scope")
|
415 |
+
action = event.get("action")
|
416 |
+
|
417 |
+
print(f"🔍 Event details - scope: {scope}, action: {action}")
|
418 |
+
|
419 |
+
# Step 4: Check if this is a discussion comment creation
|
420 |
+
# Following the webhook guide pattern:
|
421 |
+
if (
|
422 |
+
action == "create" and
|
423 |
+
scope == "discussion.comment"
|
424 |
+
):
|
425 |
+
print("✅ Valid discussion comment creation event")
|
426 |
+
|
427 |
+
# Process in background to return quickly to Hub
|
428 |
+
background_tasks.add_task(process_webhook_comment, webhook_data)
|
429 |
+
|
430 |
+
return {
|
431 |
+
"status": "accepted",
|
432 |
+
"message": "Comment processing started",
|
433 |
+
"timestamp": datetime.now().isoformat()
|
434 |
+
}
|
435 |
+
else:
|
436 |
+
print(f"ℹ️ Ignoring event: action={event.get('action')}, scope={event.get('scope')}")
|
437 |
+
return {
|
438 |
+
"status": "ignored",
|
439 |
+
"reason": "Not a discussion comment creation"
|
440 |
+
}
|
441 |
+
|
442 |
+
|
443 |
async def process_webhook_comment(webhook_data: Dict[str, Any]):
|
444 |
+
"""
|
445 |
+
Process webhook comment to detect and add tags
|
446 |
+
Integrates with our MCP client for Hub interactions
|
447 |
+
"""
|
448 |
print("🏷️ Starting process_webhook_comment...")
|
449 |
+
|
450 |
try:
|
451 |
+
# Extract comment and repository information
|
452 |
comment_content = webhook_data["comment"]["content"]
|
453 |
discussion_title = webhook_data["discussion"]["title"]
|
454 |
repo_name = webhook_data["repo"]["name"]
|
455 |
discussion_num = webhook_data["discussion"]["num"]
|
|
|
456 |
comment_author = webhook_data["comment"]["author"].get("id", "unknown")
|
457 |
+
|
458 |
+
print(f"📝 Comment from {comment_author}: {comment_content}")
|
459 |
+
print(f"📰 Discussion: {discussion_title}")
|
460 |
print(f"📦 Repository: {repo_name}")
|
461 |
+
except Exception as e:
|
462 |
+
print(f"❌ Error parsing webhook data: {str(e)}")
|
463 |
+
return {"error": "invalid JSON"}, 400
|
464 |
|
465 |
+
# Extract potential tags from comment and title
|
466 |
+
comment_tags = extract_tags_from_text(comment_content)
|
467 |
+
title_tags = extract_tags_from_text(discussion_title)
|
468 |
+
all_tags = list(set(comment_tags + title_tags))
|
469 |
+
|
470 |
+
print(f"🔍 Found tags: {all_tags}")
|
471 |
+
|
472 |
+
# Store operation for monitoring
|
473 |
+
operation = {
|
474 |
+
"timestamp": datetime.now().isoformat(),
|
475 |
+
"repo_name": repo_name,
|
476 |
+
"discussion_num": discussion_num,
|
477 |
+
"comment_author": comment_author,
|
478 |
+
"extracted_tags": all_tags,
|
479 |
+
"comment_preview": comment_content[:100] + "..." if len(comment_content) > 100 else comment_content,
|
480 |
+
"status": "processing"
|
481 |
+
}
|
482 |
+
tag_operations_store.append(operation)
|
483 |
+
|
484 |
+
if not all_tags:
|
485 |
+
operation["status"] = "no_tags"
|
486 |
+
operation["message"] = "No recognizable tags found"
|
487 |
+
print("❌ No tags found to process")
|
488 |
+
return
|
489 |
+
|
490 |
+
# Get MCP agent for tag processing
|
491 |
+
agent = await get_agent()
|
492 |
+
if not agent:
|
493 |
+
operation["status"] = "error"
|
494 |
+
operation["message"] = "Agent not configured (missing HF_TOKEN)"
|
495 |
+
print("❌ No agent available")
|
496 |
+
return
|
497 |
+
|
498 |
+
# Process each extracted tag
|
499 |
+
operation["results"] = []
|
500 |
+
for tag in all_tags:
|
501 |
+
try:
|
502 |
+
print(f"🤖 Processing tag '{tag}' for repo '{repo_name}'")
|
503 |
+
|
504 |
+
# Create prompt for agent to handle tag processing
|
505 |
+
prompt = f"""
|
506 |
+
Analyze the repository '{repo_name}' and determine if the tag '{tag}' should be added.
|
507 |
+
|
508 |
+
First, check the current tags using get_current_tags.
|
509 |
+
If '{tag}' is not already present and it's a valid tag, add it using add_new_tag.
|
510 |
+
|
511 |
+
Repository: {repo_name}
|
512 |
+
Tag to process: {tag}
|
513 |
+
|
514 |
+
Provide a clear summary of what was done.
|
515 |
+
"""
|
516 |
+
|
517 |
+
response = await agent.run(prompt) # type: ignore
|
518 |
+
print(f"🤖 Agent response for '{tag}': {response}")
|
519 |
+
|
520 |
+
# Parse response and store result
|
521 |
+
tag_result = {
|
522 |
+
"tag": tag,
|
523 |
+
"response": response,
|
524 |
+
"timestamp": datetime.now().isoformat()
|
525 |
+
}
|
526 |
+
operation["results"].append(tag_result)
|
527 |
+
|
528 |
+
except Exception as e:
|
529 |
+
error_msg = f"❌ Error processing tag '{tag}': {str(e)}"
|
530 |
+
print(error_msg)
|
531 |
+
operation["results"].append({
|
532 |
+
"tag": tag,
|
533 |
+
"error": str(e),
|
534 |
+
"timestamp": datetime.now().isoformat()
|
535 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
536 |
|
537 |
+
operation["status"] = "completed"
|
538 |
+
print(f"✅ Completed processing {len(all_tags)} tags")
|
|
|
|
|
539 |
|
540 |
|
541 |
+
@app.get("/")
|
542 |
+
async def root():
|
543 |
+
"""Root endpoint with basic information"""
|
544 |
+
return {
|
545 |
+
"name": "HF Tagging Bot",
|
546 |
+
"status": "running",
|
547 |
+
"description": "Webhook listener for automatic model tagging",
|
548 |
+
"endpoints": {
|
549 |
+
"webhook": "/webhook",
|
550 |
+
"health": "/health",
|
551 |
+
"operations": "/operations"
|
552 |
+
}
|
553 |
+
}
|
554 |
|
|
|
|
|
555 |
|
556 |
+
@app.get("/health")
|
557 |
+
async def health_check():
|
558 |
+
"""Health check endpoint for monitoring"""
|
559 |
+
agent = await get_agent()
|
560 |
+
|
561 |
+
return {
|
562 |
+
"status": "healthy",
|
563 |
+
"timestamp": datetime.now().isoformat(),
|
564 |
+
"components": {
|
565 |
+
"webhook_secret": "configured" if WEBHOOK_SECRET else "missing",
|
566 |
+
"hf_token": "configured" if HF_TOKEN else "missing",
|
567 |
+
"mcp_agent": "ready" if agent else "not_ready"
|
568 |
+
}
|
569 |
+
}
|
570 |
|
|
|
571 |
|
572 |
+
@app.get("/operations")
|
573 |
+
async def get_operations():
|
574 |
+
"""Get recent tag operations for monitoring"""
|
575 |
+
# Return last 50 operations
|
576 |
+
recent_ops = tag_operations_store[-50:] if tag_operations_store else []
|
577 |
+
return {
|
578 |
+
"total_operations": len(tag_operations_store),
|
579 |
+
"recent_operations": recent_ops
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
580 |
}
|
581 |
|
582 |
+
|
583 |
+
# async def simulate_webhook(
|
584 |
+
# repo_name: str, discussion_title: str, comment_content: str
|
585 |
+
# ) -> str:
|
586 |
+
# """Simulate webhook for testing"""
|
587 |
+
# if not all([repo_name, discussion_title, comment_content]):
|
588 |
+
# return "Please fill in all fields."
|
589 |
+
#
|
590 |
+
# mock_payload = {
|
591 |
+
# "event": {"action": "create", "scope": "discussion"},
|
592 |
+
# "comment": {
|
593 |
+
# "content": comment_content,
|
594 |
+
# "author": {"id": "test-user-id"},
|
595 |
+
# "id": "mock-comment-id",
|
596 |
+
# "hidden": False,
|
597 |
+
# },
|
598 |
+
# "discussion": {
|
599 |
+
# "title": discussion_title,
|
600 |
+
# "num": len(tag_operations_store) + 1,
|
601 |
+
# "id": "mock-discussion-id",
|
602 |
+
# "status": "open",
|
603 |
+
# "isPullRequest": False,
|
604 |
+
# },
|
605 |
+
# "repo": {
|
606 |
+
# "name": repo_name,
|
607 |
+
# "type": "model",
|
608 |
+
# "private": False,
|
609 |
+
# },
|
610 |
+
# }
|
611 |
+
#
|
612 |
+
# response = await process_webhook_comment(mock_payload)
|
613 |
+
# return f"✅ Processed! Results: {response}"
|
614 |
|
615 |
|
616 |
def create_gradio_app():
|
|
|
654 |
with gr.Column():
|
655 |
sim_result = gr.Textbox(label="Result", lines=8)
|
656 |
|
657 |
+
# sim_btn.click(
|
658 |
+
# fn=simulate_webhook,
|
659 |
+
# inputs=[sim_repo, sim_title, sim_comment],
|
660 |
+
# outputs=sim_result,
|
661 |
+
# )
|
662 |
|
663 |
gr.Markdown(f"""
|
664 |
## Recognized Tags:
|