Gianpaolo Macario commited on
Commit
988d01d
·
1 Parent(s): 103a972

refactor: enhance webhook processing and add health check endpoint

Browse files
Files changed (1) hide show
  1. app.py +441 -288
app.py CHANGED
@@ -70,10 +70,10 @@ RECOGNIZED_TAGS = {
70
 
71
 
72
  class WebhookEvent(BaseModel):
73
- event: Dict[str, str]
74
- comment: Dict[str, Any]
75
- discussion: Dict[str, Any]
76
- repo: Dict[str, str]
77
 
78
 
79
  app = FastAPI(title="HF Tagging Bot")
@@ -160,304 +160,457 @@ def extract_tags_from_text(text: str) -> List[str]:
160
  return valid_tags
161
 
162
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  async def process_webhook_comment(webhook_data: Dict[str, Any]):
164
- """Process webhook to detect and add tags"""
 
 
 
165
  print("🏷️ Starting process_webhook_comment...")
166
-
167
  try:
 
168
  comment_content = webhook_data["comment"]["content"]
169
  discussion_title = webhook_data["discussion"]["title"]
170
  repo_name = webhook_data["repo"]["name"]
171
  discussion_num = webhook_data["discussion"]["num"]
172
- # Author is an object with "id" field
173
  comment_author = webhook_data["comment"]["author"].get("id", "unknown")
174
-
175
- print(f"📝 Comment content: {comment_content}")
176
- print(f"📰 Discussion title: {discussion_title}")
177
  print(f"📦 Repository: {repo_name}")
 
 
 
178
 
179
- # Extract potential tags from the comment and discussion title
180
- comment_tags = extract_tags_from_text(comment_content)
181
- title_tags = extract_tags_from_text(discussion_title)
182
- all_tags = list(set(comment_tags + title_tags))
183
-
184
- print(f"🔍 Comment tags found: {comment_tags}")
185
- print(f"🔍 Title tags found: {title_tags}")
186
- print(f"🏷️ All unique tags: {all_tags}")
187
-
188
- result_messages = []
189
-
190
- if not all_tags:
191
- msg = "No recognizable tags found in the discussion."
192
- print(f" {msg}")
193
- result_messages.append(msg)
194
- else:
195
- print("🤖 Getting agent instance...")
196
- agent = await get_agent()
197
- if not agent:
198
- msg = "Error: Agent not configured (missing HF_TOKEN)"
199
- print(f" {msg}")
200
- result_messages.append(msg)
201
- else:
202
- print("✅ Agent instance obtained successfully")
203
-
204
- # Process all tags in a single conversation with the agent
205
- try:
206
- # Create a comprehensive prompt for the agent
207
- user_prompt = f"""
208
- I need to add the following tags to the repository '{repo_name}': {", ".join(all_tags)}
209
-
210
- For each tag, please:
211
- 1. Check if the tag already exists on the repository using get_current_tags
212
- 2. If the tag doesn't exist, add it using add_new_tag
213
- 3. Provide a summary of what was done for each tag
214
-
215
- Please process all {len(all_tags)} tags: {", ".join(all_tags)}
216
- """
217
-
218
- print("💬 Sending comprehensive prompt to agent...")
219
- print(f"📝 Prompt: {user_prompt}")
220
-
221
- # Let the agent handle the entire conversation
222
- conversation_result = []
223
-
224
- try:
225
- async for item in agent.run(user_prompt):
226
- # The agent yields different types of items
227
- item_str = str(item)
228
- conversation_result.append(item_str)
229
-
230
- # Log important events
231
- if (
232
- "tool_call" in item_str.lower()
233
- or "function" in item_str.lower()
234
- ):
235
- print(f"🔧 Agent using tools: {item_str[:200]}...")
236
- elif "content" in item_str and len(item_str) < 500:
237
- print(f"💭 Agent response: {item_str}")
238
-
239
- # Extract the final response from the conversation
240
- full_response = " ".join(conversation_result)
241
- print(f"📋 Agent conversation completed successfully")
242
-
243
- # Try to extract meaningful results for each tag
244
- for tag in all_tags:
245
- tag_mentioned = tag.lower() in full_response.lower()
246
-
247
- if (
248
- "already exists" in full_response.lower()
249
- and tag_mentioned
250
- ):
251
- msg = f"Tag '{tag}': Already exists"
252
- elif (
253
- "pr" in full_response.lower()
254
- or "pull request" in full_response.lower()
255
- ):
256
- if tag_mentioned:
257
- msg = f"Tag '{tag}': PR created successfully"
258
- else:
259
- msg = (
260
- f"Tag '{tag}': Processed "
261
- "(PR may have been created)"
262
- )
263
- elif "success" in full_response.lower() and tag_mentioned:
264
- msg = f"Tag '{tag}': Successfully processed"
265
- elif "error" in full_response.lower() and tag_mentioned:
266
- msg = f"Tag '{tag}': Error during processing"
267
- else:
268
- msg = f"Tag '{tag}': Processed by agent"
269
-
270
- print(f"✅ Result for tag '{tag}': {msg}")
271
- result_messages.append(msg)
272
-
273
- except Exception as agent_error:
274
- print(f"⚠️ Agent streaming failed: {str(agent_error)}")
275
- print("🔄 Falling back to direct MCP tool calls...")
276
-
277
- # Import the MCP server functions directly as fallback
278
- try:
279
- import sys
280
- import importlib.util
281
-
282
- # Load the MCP server module
283
- spec = importlib.util.spec_from_file_location(
284
- "mcp_server", "./mcp_server.py"
285
- )
286
- mcp_module = importlib.util.module_from_spec(spec) # type: ignore
287
- spec.loader.exec_module(mcp_module) # type: ignore
288
-
289
- # Use the MCP tools directly for each tag
290
- for tag in all_tags:
291
- try:
292
- print(
293
- f"🔧 Directly calling get_current_tags for '{tag}'"
294
- )
295
- current_tags_result = mcp_module.get_current_tags(
296
- repo_name
297
- )
298
- print(
299
- f"📄 Current tags result: {current_tags_result}"
300
- )
301
-
302
- # Parse the JSON result
303
- import json
304
-
305
- tags_data = json.loads(current_tags_result)
306
-
307
- if tags_data.get("status") == "success":
308
- current_tags = tags_data.get("current_tags", [])
309
- if tag in current_tags:
310
- msg = f"Tag '{tag}': Already exists"
311
- print(f"✅ {msg}")
312
- else:
313
- print(
314
- f"🔧 Directly calling add_new_tag for '{tag}'"
315
- )
316
- add_result = mcp_module.add_new_tag(
317
- repo_name, tag
318
- )
319
- print(f"📄 Add tag result: {add_result}")
320
-
321
- add_data = json.loads(add_result)
322
- if add_data.get("status") == "success":
323
- pr_url = add_data.get("pr_url", "")
324
- msg = f"Tag '{tag}': PR created - {pr_url}"
325
- elif (
326
- add_data.get("status")
327
- == "already_exists"
328
- ):
329
- msg = f"Tag '{tag}': Already exists"
330
- else:
331
- msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
332
- print(f"✅ {msg}")
333
- else:
334
- error_msg = tags_data.get(
335
- "error", "Unknown error"
336
- )
337
- msg = f"Tag '{tag}': Error - {error_msg}"
338
- print(f"❌ {msg}")
339
-
340
- result_messages.append(msg)
341
-
342
- except Exception as direct_error:
343
- error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
344
- print(f"❌ {error_msg}")
345
- result_messages.append(error_msg)
346
-
347
- except Exception as fallback_error:
348
- error_msg = (
349
- f"Fallback approach failed: {str(fallback_error)}"
350
- )
351
- print(f"❌ {error_msg}")
352
- result_messages.append(error_msg)
353
-
354
- except Exception as e:
355
- error_msg = f"Error during agent processing: {str(e)}"
356
- print(f"❌ {error_msg}")
357
- result_messages.append(error_msg)
358
-
359
- # Store the interaction
360
- base_url = "https://huggingface.co"
361
- discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
362
-
363
- interaction = {
364
- "timestamp": datetime.now().isoformat(),
365
- "repo": repo_name,
366
- "discussion_title": discussion_title,
367
- "discussion_num": discussion_num,
368
- "discussion_url": discussion_url,
369
- "original_comment": comment_content,
370
- "comment_author": comment_author,
371
- "detected_tags": all_tags,
372
- "results": result_messages,
373
- }
374
-
375
- tag_operations_store.append(interaction)
376
- final_result = " | ".join(result_messages)
377
- print(f"💾 Stored interaction and returning result: {final_result}")
378
- return final_result
379
 
380
- except Exception as e:
381
- error_msg = f" Fatal error in process_webhook_comment: {str(e)}"
382
- print(error_msg)
383
- return error_msg
384
 
385
 
386
- @app.post("/webhook")
387
- async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
388
- """Handle HF Hub webhooks"""
389
- webhook_secret = request.headers.get("X-Webhook-Secret")
390
- if webhook_secret != WEBHOOK_SECRET:
391
- print(" Invalid webhook secret")
392
- return {"error": "Invalid webhook secret"}
 
 
 
 
 
 
393
 
394
- payload = await request.json()
395
- print(f"📥 Received webhook payload: {json.dumps(payload, indent=2)}")
396
 
397
- event = payload.get("event", {})
398
- scope = event.get("scope")
399
- action = event.get("action")
 
 
 
 
 
 
 
 
 
 
 
400
 
401
- print(f"🔍 Event details - scope: {scope}, action: {action}")
402
 
403
- # Check if this is a discussion comment creation
404
- scope_check = scope == "discussion"
405
- action_check = action == "create"
406
- not_pr = not payload["discussion"]["isPullRequest"]
407
- scope_check = scope_check and not_pr
408
- print(f"✅ not_pr: {not_pr}")
409
- print(f"✅ scope_check: {scope_check}")
410
- print(f"✅ action_check: {action_check}")
411
-
412
- if scope_check and action_check:
413
- # Verify we have the required fields
414
- required_fields = ["comment", "discussion", "repo"]
415
- missing_fields = [field for field in required_fields if field not in payload]
416
-
417
- if missing_fields:
418
- error_msg = f"Missing required fields: {missing_fields}"
419
- print(f"❌ {error_msg}")
420
- return {"error": error_msg}
421
-
422
- print(f"🚀 Processing webhook for repo: {payload['repo']['name']}")
423
- background_tasks.add_task(process_webhook_comment, payload)
424
- return {"status": "processing"}
425
-
426
- print(f"⏭️ Ignoring webhook - scope: {scope}, action: {action}")
427
- return {"status": "ignored"}
428
-
429
-
430
- async def simulate_webhook(
431
- repo_name: str, discussion_title: str, comment_content: str
432
- ) -> str:
433
- """Simulate webhook for testing"""
434
- if not all([repo_name, discussion_title, comment_content]):
435
- return "Please fill in all fields."
436
-
437
- mock_payload = {
438
- "event": {"action": "create", "scope": "discussion"},
439
- "comment": {
440
- "content": comment_content,
441
- "author": {"id": "test-user-id"},
442
- "id": "mock-comment-id",
443
- "hidden": False,
444
- },
445
- "discussion": {
446
- "title": discussion_title,
447
- "num": len(tag_operations_store) + 1,
448
- "id": "mock-discussion-id",
449
- "status": "open",
450
- "isPullRequest": False,
451
- },
452
- "repo": {
453
- "name": repo_name,
454
- "type": "model",
455
- "private": False,
456
- },
457
  }
458
 
459
- response = await process_webhook_comment(mock_payload)
460
- return f"✅ Processed! Results: {response}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
461
 
462
 
463
  def create_gradio_app():
@@ -501,11 +654,11 @@ def create_gradio_app():
501
  with gr.Column():
502
  sim_result = gr.Textbox(label="Result", lines=8)
503
 
504
- sim_btn.click(
505
- fn=simulate_webhook,
506
- inputs=[sim_repo, sim_title, sim_comment],
507
- outputs=sim_result,
508
- )
509
 
510
  gr.Markdown(f"""
511
  ## Recognized Tags:
 
70
 
71
 
72
  class WebhookEvent(BaseModel):
73
+ event: Dict[str, str] # Contains action and scope information
74
+ comment: Dict[str, Any] # Comment content and metadata
75
+ discussion: Dict[str, Any] # Discussion information
76
+ repo: Dict[str, str] # Repository details
77
 
78
 
79
  app = FastAPI(title="HF Tagging Bot")
 
160
  return valid_tags
161
 
162
 
163
+ # async def process_webhook_comment(webhook_data: Dict[str, Any]):
164
+ # """Process webhook to detect and add tags"""
165
+ # print("🏷️ Starting process_webhook_comment...")
166
+
167
+ # try:
168
+ # comment_content = webhook_data["comment"]["content"]
169
+ # discussion_title = webhook_data["discussion"]["title"]
170
+ # repo_name = webhook_data["repo"]["name"]
171
+ # discussion_num = webhook_data["discussion"]["num"]
172
+ # # Author is an object with "id" field
173
+ # comment_author = webhook_data["comment"]["author"].get("id", "unknown")
174
+
175
+ # print(f"📝 Comment content: {comment_content}")
176
+ # print(f"📰 Discussion title: {discussion_title}")
177
+ # print(f"📦 Repository: {repo_name}")
178
+
179
+ # # Extract potential tags from the comment and discussion title
180
+ # comment_tags = extract_tags_from_text(comment_content)
181
+ # title_tags = extract_tags_from_text(discussion_title)
182
+ # all_tags = list(set(comment_tags + title_tags))
183
+
184
+ # print(f"🔍 Comment tags found: {comment_tags}")
185
+ # print(f"🔍 Title tags found: {title_tags}")
186
+ # print(f"🏷️ All unique tags: {all_tags}")
187
+
188
+ # result_messages = []
189
+
190
+ # if not all_tags:
191
+ # msg = "No recognizable tags found in the discussion."
192
+ # print(f"❌ {msg}")
193
+ # result_messages.append(msg)
194
+ # else:
195
+ # print("🤖 Getting agent instance...")
196
+ # agent = await get_agent()
197
+ # if not agent:
198
+ # msg = "Error: Agent not configured (missing HF_TOKEN)"
199
+ # print(f"❌ {msg}")
200
+ # result_messages.append(msg)
201
+ # else:
202
+ # print("✅ Agent instance obtained successfully")
203
+
204
+ # # Process all tags in a single conversation with the agent
205
+ # try:
206
+ # # Create a comprehensive prompt for the agent
207
+ # user_prompt = f"""
208
+ # I need to add the following tags to the repository '{repo_name}': {", ".join(all_tags)}
209
+
210
+ # For each tag, please:
211
+ # 1. Check if the tag already exists on the repository using get_current_tags
212
+ # 2. If the tag doesn't exist, add it using add_new_tag
213
+ # 3. Provide a summary of what was done for each tag
214
+
215
+ # Please process all {len(all_tags)} tags: {", ".join(all_tags)}
216
+ # """
217
+
218
+ # print("💬 Sending comprehensive prompt to agent...")
219
+ # print(f"📝 Prompt: {user_prompt}")
220
+
221
+ # # Let the agent handle the entire conversation
222
+ # conversation_result = []
223
+
224
+ # try:
225
+ # async for item in agent.run(user_prompt):
226
+ # # The agent yields different types of items
227
+ # item_str = str(item)
228
+ # conversation_result.append(item_str)
229
+
230
+ # # Log important events
231
+ # if (
232
+ # "tool_call" in item_str.lower()
233
+ # or "function" in item_str.lower()
234
+ # ):
235
+ # print(f"🔧 Agent using tools: {item_str[:200]}...")
236
+ # elif "content" in item_str and len(item_str) < 500:
237
+ # print(f"💭 Agent response: {item_str}")
238
+
239
+ # # Extract the final response from the conversation
240
+ # full_response = " ".join(conversation_result)
241
+ # print(f"📋 Agent conversation completed successfully")
242
+
243
+ # # Try to extract meaningful results for each tag
244
+ # for tag in all_tags:
245
+ # tag_mentioned = tag.lower() in full_response.lower()
246
+
247
+ # if (
248
+ # "already exists" in full_response.lower()
249
+ # and tag_mentioned
250
+ # ):
251
+ # msg = f"Tag '{tag}': Already exists"
252
+ # elif (
253
+ # "pr" in full_response.lower()
254
+ # or "pull request" in full_response.lower()
255
+ # ):
256
+ # if tag_mentioned:
257
+ # msg = f"Tag '{tag}': PR created successfully"
258
+ # else:
259
+ # msg = (
260
+ # f"Tag '{tag}': Processed "
261
+ # "(PR may have been created)"
262
+ # )
263
+ # elif "success" in full_response.lower() and tag_mentioned:
264
+ # msg = f"Tag '{tag}': Successfully processed"
265
+ # elif "error" in full_response.lower() and tag_mentioned:
266
+ # msg = f"Tag '{tag}': Error during processing"
267
+ # else:
268
+ # msg = f"Tag '{tag}': Processed by agent"
269
+
270
+ # print(f"✅ Result for tag '{tag}': {msg}")
271
+ # result_messages.append(msg)
272
+
273
+ # except Exception as agent_error:
274
+ # print(f"⚠️ Agent streaming failed: {str(agent_error)}")
275
+ # print("🔄 Falling back to direct MCP tool calls...")
276
+
277
+ # # Import the MCP server functions directly as fallback
278
+ # try:
279
+ # import sys
280
+ # import importlib.util
281
+
282
+ # # Load the MCP server module
283
+ # spec = importlib.util.spec_from_file_location(
284
+ # "mcp_server", "./mcp_server.py"
285
+ # )
286
+ # mcp_module = importlib.util.module_from_spec(spec) # type: ignore
287
+ # spec.loader.exec_module(mcp_module) # type: ignore
288
+
289
+ # # Use the MCP tools directly for each tag
290
+ # for tag in all_tags:
291
+ # try:
292
+ # print(
293
+ # f"🔧 Directly calling get_current_tags for '{tag}'"
294
+ # )
295
+ # current_tags_result = mcp_module.get_current_tags(
296
+ # repo_name
297
+ # )
298
+ # print(
299
+ # f"📄 Current tags result: {current_tags_result}"
300
+ # )
301
+
302
+ # # Parse the JSON result
303
+ # import json
304
+
305
+ # tags_data = json.loads(current_tags_result)
306
+
307
+ # if tags_data.get("status") == "success":
308
+ # current_tags = tags_data.get("current_tags", [])
309
+ # if tag in current_tags:
310
+ # msg = f"Tag '{tag}': Already exists"
311
+ # print(f"✅ {msg}")
312
+ # else:
313
+ # print(
314
+ # f"🔧 Directly calling add_new_tag for '{tag}'"
315
+ # )
316
+ # add_result = mcp_module.add_new_tag(
317
+ # repo_name, tag
318
+ # )
319
+ # print(f"📄 Add tag result: {add_result}")
320
+
321
+ # add_data = json.loads(add_result)
322
+ # if add_data.get("status") == "success":
323
+ # pr_url = add_data.get("pr_url", "")
324
+ # msg = f"Tag '{tag}': PR created - {pr_url}"
325
+ # elif (
326
+ # add_data.get("status")
327
+ # == "already_exists"
328
+ # ):
329
+ # msg = f"Tag '{tag}': Already exists"
330
+ # else:
331
+ # msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
332
+ # print(f"✅ {msg}")
333
+ # else:
334
+ # error_msg = tags_data.get(
335
+ # "error", "Unknown error"
336
+ # )
337
+ # msg = f"Tag '{tag}': Error - {error_msg}"
338
+ # print(f"❌ {msg}")
339
+
340
+ # result_messages.append(msg)
341
+
342
+ # except Exception as direct_error:
343
+ # error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
344
+ # print(f"❌ {error_msg}")
345
+ # result_messages.append(error_msg)
346
+
347
+ # except Exception as fallback_error:
348
+ # error_msg = (
349
+ # f"Fallback approach failed: {str(fallback_error)}"
350
+ # )
351
+ # print(f"❌ {error_msg}")
352
+ # result_messages.append(error_msg)
353
+
354
+ # except Exception as e:
355
+ # error_msg = f"Error during agent processing: {str(e)}"
356
+ # print(f"❌ {error_msg}")
357
+ # result_messages.append(error_msg)
358
+
359
+ # # Store the interaction
360
+ # base_url = "https://huggingface.co"
361
+ # discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
362
+
363
+ # interaction = {
364
+ # "timestamp": datetime.now().isoformat(),
365
+ # "repo": repo_name,
366
+ # "discussion_title": discussion_title,
367
+ # "discussion_num": discussion_num,
368
+ # "discussion_url": discussion_url,
369
+ # "original_comment": comment_content,
370
+ # "comment_author": comment_author,
371
+ # "detected_tags": all_tags,
372
+ # "results": result_messages,
373
+ # }
374
+
375
+ # tag_operations_store.append(interaction)
376
+ # final_result = " | ".join(result_messages)
377
+ # print(f"💾 Stored interaction and returning result: {final_result}")
378
+ # return final_result
379
+
380
+ # except Exception as e:
381
+ # error_msg = f"❌ Fatal error in process_webhook_comment: {str(e)}"
382
+ # print(error_msg)
383
+ # return error_msg
384
+
385
+
386
+ @app.post("/webhook")
387
+ async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
388
+ """
389
+ Handle incoming webhooks from Hugging Face Hub
390
+ Following the pattern from: https://raw.githubusercontent.com/huggingface/hub-docs/refs/heads/main/docs/hub/webhooks-guide-discussion-bot.md
391
+ """
392
+ print("🔔 Webhook received!")
393
+
394
+ # Step 1: Validate webhook secret (security)
395
+ webhook_secret = request.headers.get("X-Webhook-Secret")
396
+ if webhook_secret != WEBHOOK_SECRET:
397
+ print("❌ Invalid webhook secret")
398
+ return {"error": "Invalid webhook secret"}, 400
399
+
400
+ # Step 2: Parse webhook data
401
+ try:
402
+ webhook_data = await request.json()
403
+ print(f"📥 Webhook data: {json.dumps(webhook_data, indent=2)}")
404
+ except Exception as e:
405
+ print(f"❌ Error parsing webhook data: {str(e)}")
406
+ return {"error": "invalid JSON"}, 400
407
+
408
+ # Step 3: Validate event structure
409
+ event = webhook_data.get("event", {})
410
+ if not event:
411
+ print("❌ No event data in webhook")
412
+ return {"error": "missing event data"}, 400
413
+
414
+ scope = event.get("scope")
415
+ action = event.get("action")
416
+
417
+ print(f"🔍 Event details - scope: {scope}, action: {action}")
418
+
419
+ # Step 4: Check if this is a discussion comment creation
420
+ # Following the webhook guide pattern:
421
+ if (
422
+ action == "create" and
423
+ scope == "discussion.comment"
424
+ ):
425
+ print("✅ Valid discussion comment creation event")
426
+
427
+ # Process in background to return quickly to Hub
428
+ background_tasks.add_task(process_webhook_comment, webhook_data)
429
+
430
+ return {
431
+ "status": "accepted",
432
+ "message": "Comment processing started",
433
+ "timestamp": datetime.now().isoformat()
434
+ }
435
+ else:
436
+ print(f"ℹ️ Ignoring event: action={event.get('action')}, scope={event.get('scope')}")
437
+ return {
438
+ "status": "ignored",
439
+ "reason": "Not a discussion comment creation"
440
+ }
441
+
442
+
443
  async def process_webhook_comment(webhook_data: Dict[str, Any]):
444
+ """
445
+ Process webhook comment to detect and add tags
446
+ Integrates with our MCP client for Hub interactions
447
+ """
448
  print("🏷️ Starting process_webhook_comment...")
449
+
450
  try:
451
+ # Extract comment and repository information
452
  comment_content = webhook_data["comment"]["content"]
453
  discussion_title = webhook_data["discussion"]["title"]
454
  repo_name = webhook_data["repo"]["name"]
455
  discussion_num = webhook_data["discussion"]["num"]
 
456
  comment_author = webhook_data["comment"]["author"].get("id", "unknown")
457
+
458
+ print(f"📝 Comment from {comment_author}: {comment_content}")
459
+ print(f"📰 Discussion: {discussion_title}")
460
  print(f"📦 Repository: {repo_name}")
461
+ except Exception as e:
462
+ print(f"❌ Error parsing webhook data: {str(e)}")
463
+ return {"error": "invalid JSON"}, 400
464
 
465
+ # Extract potential tags from comment and title
466
+ comment_tags = extract_tags_from_text(comment_content)
467
+ title_tags = extract_tags_from_text(discussion_title)
468
+ all_tags = list(set(comment_tags + title_tags))
469
+
470
+ print(f"🔍 Found tags: {all_tags}")
471
+
472
+ # Store operation for monitoring
473
+ operation = {
474
+ "timestamp": datetime.now().isoformat(),
475
+ "repo_name": repo_name,
476
+ "discussion_num": discussion_num,
477
+ "comment_author": comment_author,
478
+ "extracted_tags": all_tags,
479
+ "comment_preview": comment_content[:100] + "..." if len(comment_content) > 100 else comment_content,
480
+ "status": "processing"
481
+ }
482
+ tag_operations_store.append(operation)
483
+
484
+ if not all_tags:
485
+ operation["status"] = "no_tags"
486
+ operation["message"] = "No recognizable tags found"
487
+ print("❌ No tags found to process")
488
+ return
489
+
490
+ # Get MCP agent for tag processing
491
+ agent = await get_agent()
492
+ if not agent:
493
+ operation["status"] = "error"
494
+ operation["message"] = "Agent not configured (missing HF_TOKEN)"
495
+ print("❌ No agent available")
496
+ return
497
+
498
+ # Process each extracted tag
499
+ operation["results"] = []
500
+ for tag in all_tags:
501
+ try:
502
+ print(f"🤖 Processing tag '{tag}' for repo '{repo_name}'")
503
+
504
+ # Create prompt for agent to handle tag processing
505
+ prompt = f"""
506
+ Analyze the repository '{repo_name}' and determine if the tag '{tag}' should be added.
507
+
508
+ First, check the current tags using get_current_tags.
509
+ If '{tag}' is not already present and it's a valid tag, add it using add_new_tag.
510
+
511
+ Repository: {repo_name}
512
+ Tag to process: {tag}
513
+
514
+ Provide a clear summary of what was done.
515
+ """
516
+
517
+ response = await agent.run(prompt) # type: ignore
518
+ print(f"🤖 Agent response for '{tag}': {response}")
519
+
520
+ # Parse response and store result
521
+ tag_result = {
522
+ "tag": tag,
523
+ "response": response,
524
+ "timestamp": datetime.now().isoformat()
525
+ }
526
+ operation["results"].append(tag_result)
527
+
528
+ except Exception as e:
529
+ error_msg = f"❌ Error processing tag '{tag}': {str(e)}"
530
+ print(error_msg)
531
+ operation["results"].append({
532
+ "tag": tag,
533
+ "error": str(e),
534
+ "timestamp": datetime.now().isoformat()
535
+ })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
536
 
537
+ operation["status"] = "completed"
538
+ print(f" Completed processing {len(all_tags)} tags")
 
 
539
 
540
 
541
+ @app.get("/")
542
+ async def root():
543
+ """Root endpoint with basic information"""
544
+ return {
545
+ "name": "HF Tagging Bot",
546
+ "status": "running",
547
+ "description": "Webhook listener for automatic model tagging",
548
+ "endpoints": {
549
+ "webhook": "/webhook",
550
+ "health": "/health",
551
+ "operations": "/operations"
552
+ }
553
+ }
554
 
 
 
555
 
556
+ @app.get("/health")
557
+ async def health_check():
558
+ """Health check endpoint for monitoring"""
559
+ agent = await get_agent()
560
+
561
+ return {
562
+ "status": "healthy",
563
+ "timestamp": datetime.now().isoformat(),
564
+ "components": {
565
+ "webhook_secret": "configured" if WEBHOOK_SECRET else "missing",
566
+ "hf_token": "configured" if HF_TOKEN else "missing",
567
+ "mcp_agent": "ready" if agent else "not_ready"
568
+ }
569
+ }
570
 
 
571
 
572
+ @app.get("/operations")
573
+ async def get_operations():
574
+ """Get recent tag operations for monitoring"""
575
+ # Return last 50 operations
576
+ recent_ops = tag_operations_store[-50:] if tag_operations_store else []
577
+ return {
578
+ "total_operations": len(tag_operations_store),
579
+ "recent_operations": recent_ops
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
580
  }
581
 
582
+
583
+ # async def simulate_webhook(
584
+ # repo_name: str, discussion_title: str, comment_content: str
585
+ # ) -> str:
586
+ # """Simulate webhook for testing"""
587
+ # if not all([repo_name, discussion_title, comment_content]):
588
+ # return "Please fill in all fields."
589
+ #
590
+ # mock_payload = {
591
+ # "event": {"action": "create", "scope": "discussion"},
592
+ # "comment": {
593
+ # "content": comment_content,
594
+ # "author": {"id": "test-user-id"},
595
+ # "id": "mock-comment-id",
596
+ # "hidden": False,
597
+ # },
598
+ # "discussion": {
599
+ # "title": discussion_title,
600
+ # "num": len(tag_operations_store) + 1,
601
+ # "id": "mock-discussion-id",
602
+ # "status": "open",
603
+ # "isPullRequest": False,
604
+ # },
605
+ # "repo": {
606
+ # "name": repo_name,
607
+ # "type": "model",
608
+ # "private": False,
609
+ # },
610
+ # }
611
+ #
612
+ # response = await process_webhook_comment(mock_payload)
613
+ # return f"✅ Processed! Results: {response}"
614
 
615
 
616
  def create_gradio_app():
 
654
  with gr.Column():
655
  sim_result = gr.Textbox(label="Result", lines=8)
656
 
657
+ # sim_btn.click(
658
+ # fn=simulate_webhook,
659
+ # inputs=[sim_repo, sim_title, sim_comment],
660
+ # outputs=sim_result,
661
+ # )
662
 
663
  gr.Markdown(f"""
664
  ## Recognized Tags: