asmaa105 commited on
Commit
f0b262b
Β·
verified Β·
1 Parent(s): b4b3790

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +524 -524
app.py CHANGED
@@ -1,524 +1,524 @@
1
- import os
2
- import re
3
- import json
4
- from datetime import datetime
5
- from typing import List, Dict, Any, Optional, Literal
6
-
7
- from fastapi import FastAPI, Request, BackgroundTasks
8
- from fastapi.middleware.cors import CORSMiddleware
9
- import gradio as gr
10
- import uvicorn
11
- from pydantic import BaseModel
12
- from huggingface_hub.inference._mcp.agent import Agent
13
- from dotenv import load_dotenv
14
-
15
- load_dotenv()
16
-
17
- # Configuration
18
- WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET", "5a775af722adc63d0b895454e3fb7a50cbc62bfa3f97e37d50d1a986c91d8781")
19
- HF_TOKEN = os.getenv("HF_TOKEN")
20
- HF_MODEL = os.getenv("HF_MODEL", "microsoft/DialoGPT-medium")
21
- # Use a valid provider literal from the documentation
22
- DEFAULT_PROVIDER: Literal["hf-inference"] = "hf-inference"
23
- HF_PROVIDER = os.getenv("HF_PROVIDER", DEFAULT_PROVIDER)
24
-
25
- # Simple storage for processed tag operations
26
- tag_operations_store: List[Dict[str, Any]] = []
27
-
28
- # Agent instance
29
- agent_instance: Optional[Agent] = None
30
-
31
- # Common ML tags that we recognize for auto-tagging
32
- RECOGNIZED_TAGS = {
33
- "pytorch",
34
- "tensorflow",
35
- "jax",
36
- "transformers",
37
- "diffusers",
38
- "text-generation",
39
- "text-classification",
40
- "question-answering",
41
- "text-to-image",
42
- "image-classification",
43
- "object-detection",
44
- " ",
45
- "fill-mask",
46
- "token-classification",
47
- "translation",
48
- "summarization",
49
- "feature-extraction",
50
- "sentence-similarity",
51
- "zero-shot-classification",
52
- "image-to-text",
53
- "automatic-speech-recognition",
54
- "audio-classification",
55
- "voice-activity-detection",
56
- "depth-estimation",
57
- "image-segmentation",
58
- "video-classification",
59
- "reinforcement-learning",
60
- "tabular-classification",
61
- "tabular-regression",
62
- "time-series-forecasting",
63
- "graph-ml",
64
- "robotics",
65
- "computer-vision",
66
- "nlp",
67
- "cv",
68
- "multimodal",
69
- }
70
-
71
-
72
- class WebhookEvent(BaseModel):
73
- event: Dict[str, str]
74
- comment: Dict[str, Any]
75
- discussion: Dict[str, Any]
76
- repo: Dict[str, str]
77
-
78
-
79
- app = FastAPI(title="HF Tagging Bot")
80
- app.add_middleware(CORSMiddleware, allow_origins=["*"])
81
-
82
-
83
- async def get_agent():
84
- """Get or create Agent instance"""
85
- print("πŸ€– get_agent() called...")
86
- global agent_instance
87
- if agent_instance is None and HF_TOKEN:
88
- print("πŸ”§ Creating new Agent instance...")
89
- print(f"πŸ”‘ HF_TOKEN present: {bool(HF_TOKEN)}")
90
- print(f"πŸ€– Model: {HF_MODEL}")
91
- print(f"πŸ”— Provider: {DEFAULT_PROVIDER}")
92
-
93
- try:
94
- agent_instance = Agent(
95
- model=HF_MODEL,
96
- provider=DEFAULT_PROVIDER,
97
- api_key=HF_TOKEN,
98
- servers=[
99
- {
100
- "type": "stdio",
101
- "config": {
102
- "command": "python",
103
- "args": ["mcp_server.py"],
104
- "cwd": ".", # Ensure correct working directory
105
- "env": {"HF_TOKEN": HF_TOKEN} if HF_TOKEN else {},
106
- },
107
- }
108
- ],
109
- )
110
- print("βœ… Agent instance created successfully")
111
- print("πŸ”§ Loading tools...")
112
- await agent_instance.load_tools()
113
- print("βœ… Tools loaded successfully")
114
- except Exception as e:
115
- print(f"❌ Error creating/loading agent: {str(e)}")
116
- agent_instance = None
117
- elif agent_instance is None:
118
- print("❌ No HF_TOKEN available, cannot create agent")
119
- else:
120
- print("βœ… Using existing agent instance")
121
-
122
- return agent_instance
123
-
124
-
125
- def extract_tags_from_text(text: str) -> List[str]:
126
- """Extract potential tags from discussion text"""
127
- text_lower = text.lower()
128
-
129
- # Look for explicit tag mentions like "tag: pytorch" or "#pytorch"
130
- explicit_tags = []
131
-
132
- # Pattern 1: "tag: something" or "tags: something"
133
- tag_pattern = r"tags?:\s*([a-zA-Z0-9-_,\s]+)"
134
- matches = re.findall(tag_pattern, text_lower)
135
- for match in matches:
136
- # Split by comma and clean up
137
- tags = [tag.strip() for tag in match.split(",")]
138
- explicit_tags.extend(tags)
139
-
140
- # Pattern 2: "#hashtag" style
141
- hashtag_pattern = r"#([a-zA-Z0-9-_]+)"
142
- hashtag_matches = re.findall(hashtag_pattern, text_lower)
143
- explicit_tags.extend(hashtag_matches)
144
-
145
- # Pattern 3: Look for recognized tags mentioned in natural text
146
- mentioned_tags = []
147
- for tag in RECOGNIZED_TAGS:
148
- if tag in text_lower:
149
- mentioned_tags.append(tag)
150
-
151
- # Combine and deduplicate
152
- all_tags = list(set(explicit_tags + mentioned_tags))
153
-
154
- # Filter to only include recognized tags or explicitly mentioned ones
155
- valid_tags = []
156
- for tag in all_tags:
157
- if tag in RECOGNIZED_TAGS or tag in explicit_tags:
158
- valid_tags.append(tag)
159
-
160
- return valid_tags
161
-
162
-
163
- async def process_webhook_comment(webhook_data: Dict[str, Any]):
164
- """Process webhook to detect and add tags"""
165
- print("🏷️ Starting process_webhook_comment...")
166
-
167
- try:
168
- comment_content = webhook_data["comment"]["content"]
169
- discussion_title = webhook_data["discussion"]["title"]
170
- repo_name = webhook_data["repo"]["name"]
171
- discussion_num = webhook_data["discussion"]["num"]
172
- # Author is an object with "id" field
173
- comment_author = webhook_data["comment"]["author"].get("id", "unknown")
174
-
175
- print(f"πŸ“ Comment content: {comment_content}")
176
- print(f"πŸ“° Discussion title: {discussion_title}")
177
- print(f"πŸ“¦ Repository: {repo_name}")
178
-
179
- # Extract potential tags from the comment and discussion title
180
- comment_tags = extract_tags_from_text(comment_content)
181
- title_tags = extract_tags_from_text(discussion_title)
182
- all_tags = list(set(comment_tags + title_tags))
183
-
184
- print(f"πŸ” Comment tags found: {comment_tags}")
185
- print(f"πŸ” Title tags found: {title_tags}")
186
- print(f"🏷️ All unique tags: {all_tags}")
187
-
188
- result_messages = []
189
-
190
- if not all_tags:
191
- msg = "No recognizable tags found in the discussion."
192
- print(f"❌ {msg}")
193
- result_messages.append(msg)
194
- else:
195
- print("πŸ€– Getting agent instance...")
196
- agent = await get_agent()
197
- if not agent:
198
- msg = "Error: Agent not configured (missing HF_TOKEN)"
199
- print(f"❌ {msg}")
200
- result_messages.append(msg)
201
- else:
202
- print("βœ… Agent instance obtained successfully")
203
-
204
- # Process all tags in a single conversation with the agent
205
- try:
206
- # Create a comprehensive prompt for the agent
207
- user_prompt = f"""
208
- I need to add the following tags to the repository '{repo_name}': {", ".join(all_tags)}
209
-
210
- For each tag, please:
211
- 1. Check if the tag already exists on the repository using get_current_tags
212
- 2. If the tag doesn't exist, add it using add_new_tag
213
- 3. Provide a summary of what was done for each tag
214
-
215
- Please process all {len(all_tags)} tags: {", ".join(all_tags)}
216
- """
217
-
218
- print("πŸ’¬ Sending comprehensive prompt to agent...")
219
- print(f"πŸ“ Prompt: {user_prompt}")
220
-
221
- # Let the agent handle the entire conversation
222
- conversation_result = []
223
-
224
- try:
225
- async for item in agent.run(user_prompt):
226
- # The agent yields different types of items
227
- item_str = str(item)
228
- conversation_result.append(item_str)
229
-
230
- # Log important events
231
- if (
232
- "tool_call" in item_str.lower()
233
- or "function" in item_str.lower()
234
- ):
235
- print(f"πŸ”§ Agent using tools: {item_str[:200]}...")
236
- elif "content" in item_str and len(item_str) < 500:
237
- print(f"πŸ’­ Agent response: {item_str}")
238
-
239
- # Extract the final response from the conversation
240
- full_response = " ".join(conversation_result)
241
- print(f"πŸ“‹ Agent conversation completed successfully")
242
-
243
- # Try to extract meaningful results for each tag
244
- for tag in all_tags:
245
- tag_mentioned = tag.lower() in full_response.lower()
246
-
247
- if (
248
- "already exists" in full_response.lower()
249
- and tag_mentioned
250
- ):
251
- msg = f"Tag '{tag}': Already exists"
252
- elif (
253
- "pr" in full_response.lower()
254
- or "pull request" in full_response.lower()
255
- ):
256
- if tag_mentioned:
257
- msg = f"Tag '{tag}': PR created successfully"
258
- else:
259
- msg = (
260
- f"Tag '{tag}': Processed "
261
- "(PR may have been created)"
262
- )
263
- elif "success" in full_response.lower() and tag_mentioned:
264
- msg = f"Tag '{tag}': Successfully processed"
265
- elif "error" in full_response.lower() and tag_mentioned:
266
- msg = f"Tag '{tag}': Error during processing"
267
- else:
268
- msg = f"Tag '{tag}': Processed by agent"
269
-
270
- print(f"βœ… Result for tag '{tag}': {msg}")
271
- result_messages.append(msg)
272
-
273
- except Exception as agent_error:
274
- print(f"⚠️ Agent streaming failed: {str(agent_error)}")
275
- print("πŸ”„ Falling back to direct MCP tool calls...")
276
-
277
- # Import the MCP server functions directly as fallback
278
- try:
279
- import sys
280
- import importlib.util
281
-
282
- # Load the MCP server module
283
- spec = importlib.util.spec_from_file_location(
284
- "mcp_server", "./mcp_server.py"
285
- )
286
- mcp_module = importlib.util.module_from_spec(spec)
287
- spec.loader.exec_module(mcp_module)
288
-
289
- # Use the MCP tools directly for each tag
290
- for tag in all_tags:
291
- try:
292
- print(
293
- f"πŸ”§ Directly calling get_current_tags for '{tag}'"
294
- )
295
- current_tags_result = mcp_module.get_current_tags(
296
- repo_name
297
- )
298
- print(
299
- f"πŸ“„ Current tags result: {current_tags_result}"
300
- )
301
-
302
- # Parse the JSON result
303
- import json
304
-
305
- tags_data = json.loads(current_tags_result)
306
-
307
- if tags_data.get("status") == "success":
308
- current_tags = tags_data.get("current_tags", [])
309
- if tag in current_tags:
310
- msg = f"Tag '{tag}': Already exists"
311
- print(f"βœ… {msg}")
312
- else:
313
- print(
314
- f"πŸ”§ Directly calling add_new_tag for '{tag}'"
315
- )
316
- add_result = mcp_module.add_new_tag(
317
- repo_name, tag
318
- )
319
- print(f"πŸ“„ Add tag result: {add_result}")
320
-
321
- add_data = json.loads(add_result)
322
- if add_data.get("status") == "success":
323
- pr_url = add_data.get("pr_url", "")
324
- msg = f"Tag '{tag}': PR created - {pr_url}"
325
- elif (
326
- add_data.get("status")
327
- == "already_exists"
328
- ):
329
- msg = f"Tag '{tag}': Already exists"
330
- else:
331
- msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
332
- print(f"βœ… {msg}")
333
- else:
334
- error_msg = tags_data.get(
335
- "error", "Unknown error"
336
- )
337
- msg = f"Tag '{tag}': Error - {error_msg}"
338
- print(f"❌ {msg}")
339
-
340
- result_messages.append(msg)
341
-
342
- except Exception as direct_error:
343
- error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
344
- print(f"❌ {error_msg}")
345
- result_messages.append(error_msg)
346
-
347
- except Exception as fallback_error:
348
- error_msg = (
349
- f"Fallback approach failed: {str(fallback_error)}"
350
- )
351
- print(f"❌ {error_msg}")
352
- result_messages.append(error_msg)
353
-
354
- except Exception as e:
355
- error_msg = f"Error during agent processing: {str(e)}"
356
- print(f"❌ {error_msg}")
357
- result_messages.append(error_msg)
358
-
359
- # Store the interaction
360
- base_url = "https://huggingface.co"
361
- discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
362
-
363
- interaction = {
364
- "timestamp": datetime.now().isoformat(),
365
- "repo": repo_name,
366
- "discussion_title": discussion_title,
367
- "discussion_num": discussion_num,
368
- "discussion_url": discussion_url,
369
- "original_comment": comment_content,
370
- "comment_author": comment_author,
371
- "detected_tags": all_tags,
372
- "results": result_messages,
373
- }
374
-
375
- tag_operations_store.append(interaction)
376
- final_result = " | ".join(result_messages)
377
- print(f"πŸ’Ύ Stored interaction and returning result: {final_result}")
378
- return final_result
379
-
380
- except Exception as e:
381
- error_msg = f"❌ Fatal error in process_webhook_comment: {str(e)}"
382
- print(error_msg)
383
- return error_msg
384
-
385
-
386
- @app.post("/webhook")
387
- async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
388
- """Handle HF Hub webhooks"""
389
- webhook_secret = request.headers.get("X-Webhook-Secret")
390
- if webhook_secret != WEBHOOK_SECRET:
391
- print("❌ Invalid webhook secret")
392
- return {"error": "Invalid webhook secret"}
393
-
394
- payload = await request.json()
395
- print(f"πŸ“₯ Received webhook payload: {json.dumps(payload, indent=2)}")
396
-
397
- event = payload.get("event", {})
398
- scope = event.get("scope")
399
- action = event.get("action")
400
-
401
- print(f"πŸ” Event details - scope: {scope}, action: {action}")
402
-
403
- # Check if this is a discussion comment creation
404
- scope_check = scope == "discussion"
405
- action_check = action == "create"
406
- not_pr = not payload["discussion"]["isPullRequest"]
407
- scope_check = scope_check and not_pr
408
- print(f"βœ… not_pr: {not_pr}")
409
- print(f"βœ… scope_check: {scope_check}")
410
- print(f"βœ… action_check: {action_check}")
411
-
412
- if scope_check and action_check:
413
- # Verify we have the required fields
414
- required_fields = ["comment", "discussion", "repo"]
415
- missing_fields = [field for field in required_fields if field not in payload]
416
-
417
- if missing_fields:
418
- error_msg = f"Missing required fields: {missing_fields}"
419
- print(f"❌ {error_msg}")
420
- return {"error": error_msg}
421
-
422
- print(f"πŸš€ Processing webhook for repo: {payload['repo']['name']}")
423
- background_tasks.add_task(process_webhook_comment, payload)
424
- return {"status": "processing"}
425
-
426
- print(f"⏭️ Ignoring webhook - scope: {scope}, action: {action}")
427
- return {"status": "ignored"}
428
-
429
-
430
- async def simulate_webhook(
431
- repo_name: str, discussion_title: str, comment_content: str
432
- ) -> str:
433
- """Simulate webhook for testing"""
434
- if not all([repo_name, discussion_title, comment_content]):
435
- return "Please fill in all fields."
436
-
437
- mock_payload = {
438
- "event": {"action": "create", "scope": "discussion"},
439
- "comment": {
440
- "content": comment_content,
441
- "author": {"id": "test-user-id"},
442
- "id": "mock-comment-id",
443
- "hidden": False,
444
- },
445
- "discussion": {
446
- "title": discussion_title,
447
- "num": len(tag_operations_store) + 1,
448
- "id": "mock-discussion-id",
449
- "status": "open",
450
- "isPullRequest": False,
451
- },
452
- "repo": {
453
- "name": repo_name,
454
- "type": "model",
455
- "private": False,
456
- },
457
- }
458
-
459
- response = await process_webhook_comment(mock_payload)
460
- return f"βœ… Processed! Results: {response}"
461
-
462
-
463
- def create_gradio_app():
464
- """Create Gradio interface"""
465
- with gr.Blocks(title="HF Tagging Bot", theme=gr.themes.Soft()) as demo:
466
- gr.Markdown("# 🏷️ HF Tagging Bot Dashboard")
467
- gr.Markdown("*Automatically adds tags to models when mentioned in discussions*")
468
-
469
- gr.Markdown("""
470
- ## How it works:
471
- - Monitors HuggingFace Hub discussions
472
- - Detects tag mentions in comments (e.g., "tag: pytorch",
473
- "#transformers")
474
- - Automatically adds recognized tags to the model repository
475
- - Supports common ML tags like: pytorch, tensorflow,
476
- text-generation, etc.
477
- """)
478
-
479
- with gr.Column():
480
- sim_repo = gr.Textbox(
481
- label="Repository",
482
- value="burtenshaw/play-mcp-repo-bot",
483
- placeholder="username/model-name",
484
- )
485
- sim_title = gr.Textbox(
486
- label="Discussion Title",
487
- value="Add pytorch tag",
488
- placeholder="Discussion title",
489
- )
490
- sim_comment = gr.Textbox(
491
- label="Comment",
492
- lines=3,
493
- value="This model should have tags: pytorch, text-generation",
494
- placeholder="Comment mentioning tags...",
495
- )
496
- sim_btn = gr.Button("🏷️ Test Tag Detection")
497
-
498
- with gr.Column():
499
- sim_result = gr.Textbox(label="Result", lines=8)
500
-
501
- sim_btn.click(
502
- fn=simulate_webhook,
503
- inputs=[sim_repo, sim_title, sim_comment],
504
- outputs=sim_result,
505
- )
506
-
507
- gr.Markdown(f"""
508
- ## Recognized Tags:
509
- {", ".join(sorted(RECOGNIZED_TAGS))}
510
- """)
511
-
512
- return demo
513
-
514
-
515
- # Mount Gradio app
516
- gradio_app = create_gradio_app()
517
- app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
518
-
519
-
520
- if __name__ == "__main__":
521
- print("πŸš€ Starting HF Tagging Bot...")
522
- print("πŸ“Š Dashboard: http://localhost:7860/gradio")
523
- print("πŸ”— Webhook: http://localhost:7860/webhook")
524
- uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)
 
1
+ import os
2
+ import re
3
+ import json
4
+ from datetime import datetime
5
+ from typing import List, Dict, Any, Optional, Literal
6
+
7
+ from fastapi import FastAPI, Request, BackgroundTasks
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ import gradio as gr
10
+ import uvicorn
11
+ from pydantic import BaseModel
12
+ from huggingface_hub.inference._mcp.agent import Agent
13
+ from dotenv import load_dotenv
14
+
15
+ load_dotenv()
16
+
17
+ # Configuration
18
+ WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET", "5a775af722adc63d0b895454e3fb7a50cbc62bfa3f97e37d50d1a986c91d8781")
19
+ HF_TOKEN = os.getenv("HF_TOKEN")
20
+ HF_MODEL = os.getenv("HF_MODEL", "microsoft/DialoGPT-medium")
21
+ # Use a valid provider literal from the documentation
22
+ DEFAULT_PROVIDER: Literal["hf-inference"] = "hf-inference"
23
+ HF_PROVIDER = os.getenv("HF_PROVIDER", DEFAULT_PROVIDER)
24
+
25
+ # Simple storage for processed tag operations
26
+ tag_operations_store: List[Dict[str, Any]] = []
27
+
28
+ # Agent instance
29
+ agent_instance: Optional[Agent] = None
30
+
31
+ # Common ML tags that we recognize for auto-tagging
32
+ RECOGNIZED_TAGS = {
33
+ "pytorch",
34
+ "tensorflow",
35
+ "jax",
36
+ "transformers",
37
+ "diffusers",
38
+ "text-generation",
39
+ "text-classification",
40
+ "question-answering",
41
+ "text-to-image",
42
+ "image-classification",
43
+ "object-detection",
44
+ " ",
45
+ "fill-mask",
46
+ "token-classification",
47
+ "translation",
48
+ "summarization",
49
+ "feature-extraction",
50
+ "sentence-similarity",
51
+ "zero-shot-classification",
52
+ "image-to-text",
53
+ "automatic-speech-recognition",
54
+ "audio-classification",
55
+ "voice-activity-detection",
56
+ "depth-estimation",
57
+ "image-segmentation",
58
+ "video-classification",
59
+ "reinforcement-learning",
60
+ "tabular-classification",
61
+ "tabular-regression",
62
+ "time-series-forecasting",
63
+ "graph-ml",
64
+ "robotics",
65
+ "computer-vision",
66
+ "nlp",
67
+ "cv",
68
+ "multimodal",
69
+ }
70
+
71
+
72
+ class WebhookEvent(BaseModel):
73
+ event: Dict[str, str]
74
+ comment: Dict[str, Any]
75
+ discussion: Dict[str, Any]
76
+ repo: Dict[str, str]
77
+
78
+
79
+ app = FastAPI(title="HF Tagging Bot")
80
+ app.add_middleware(CORSMiddleware, allow_origins=["*"])
81
+
82
+
83
+ async def get_agent():
84
+ """Get or create Agent instance"""
85
+ print("πŸ€– get_agent() called...")
86
+ global agent_instance
87
+ if agent_instance is None and HF_TOKEN:
88
+ print("πŸ”§ Creating new Agent instance...")
89
+ print(f"πŸ”‘ HF_TOKEN present: {bool(HF_TOKEN)}")
90
+ print(f"πŸ€– Model: {HF_MODEL}")
91
+ print(f"πŸ”— Provider: {DEFAULT_PROVIDER}")
92
+
93
+ try:
94
+ agent_instance = Agent(
95
+ model=HF_MODEL,
96
+ provider=DEFAULT_PROVIDER,
97
+ api_key=HF_TOKEN,
98
+ servers=[
99
+ {
100
+ "type": "stdio",
101
+ "config": {
102
+ "command": "python",
103
+ "args": ["mcp_server.py"],
104
+ "cwd": ".", # Ensure correct working directory
105
+ "env": {"HF_TOKEN": HF_TOKEN} if HF_TOKEN else {},
106
+ },
107
+ }
108
+ ],
109
+ )
110
+ print("βœ… Agent instance created successfully")
111
+ print("πŸ”§ Loading tools...")
112
+ await agent_instance.load_tools()
113
+ print("βœ… Tools loaded successfully")
114
+ except Exception as e:
115
+ print(f"❌ Error creating/loading agent: {str(e)}")
116
+ agent_instance = None
117
+ elif agent_instance is None:
118
+ print("❌ No HF_TOKEN available, cannot create agent")
119
+ else:
120
+ print("βœ… Using existing agent instance")
121
+
122
+ return agent_instance
123
+
124
+
125
+ def extract_tags_from_text(text: str) -> List[str]:
126
+ """Extract potential tags from discussion text"""
127
+ text_lower = text.lower()
128
+
129
+ # Look for explicit tag mentions like "tag: pytorch" or "#pytorch"
130
+ explicit_tags = []
131
+
132
+ # Pattern 1: "tag: something" or "tags: something"
133
+ tag_pattern = r"tags?:\s*([a-zA-Z0-9-_,\s]+)"
134
+ matches = re.findall(tag_pattern, text_lower)
135
+ for match in matches:
136
+ # Split by comma and clean up
137
+ tags = [tag.strip() for tag in match.split(",")]
138
+ explicit_tags.extend(tags)
139
+
140
+ # Pattern 2: "#hashtag" style
141
+ hashtag_pattern = r"#([a-zA-Z0-9-_]+)"
142
+ hashtag_matches = re.findall(hashtag_pattern, text_lower)
143
+ explicit_tags.extend(hashtag_matches)
144
+
145
+ # Pattern 3: Look for recognized tags mentioned in natural text
146
+ mentioned_tags = []
147
+ for tag in RECOGNIZED_TAGS:
148
+ if tag in text_lower:
149
+ mentioned_tags.append(tag)
150
+
151
+ # Combine and deduplicate
152
+ all_tags = list(set(explicit_tags + mentioned_tags))
153
+
154
+ # Filter to only include recognized tags or explicitly mentioned ones
155
+ valid_tags = []
156
+ for tag in all_tags:
157
+ if tag in RECOGNIZED_TAGS or tag in explicit_tags:
158
+ valid_tags.append(tag)
159
+
160
+ return valid_tags
161
+
162
+
163
+ async def process_webhook_comment(webhook_data: Dict[str, Any]):
164
+ """Process webhook to detect and add tags"""
165
+ print("🏷️ Starting process_webhook_comment...")
166
+
167
+ try:
168
+ comment_content = webhook_data["comment"]["content"]
169
+ discussion_title = webhook_data["discussion"]["title"]
170
+ repo_name = webhook_data["repo"]["name"]
171
+ discussion_num = webhook_data["discussion"]["num"]
172
+ # Author is an object with "id" field
173
+ comment_author = webhook_data["comment"]["author"].get("id", "unknown")
174
+
175
+ print(f"πŸ“ Comment content: {comment_content}")
176
+ print(f"πŸ“° Discussion title: {discussion_title}")
177
+ print(f"πŸ“¦ Repository: {repo_name}")
178
+
179
+ # Extract potential tags from the comment and discussion title
180
+ comment_tags = extract_tags_from_text(comment_content)
181
+ title_tags = extract_tags_from_text(discussion_title)
182
+ all_tags = list(set(comment_tags + title_tags))
183
+
184
+ print(f"πŸ” Comment tags found: {comment_tags}")
185
+ print(f"πŸ” Title tags found: {title_tags}")
186
+ print(f"🏷️ All unique tags: {all_tags}")
187
+
188
+ result_messages = []
189
+
190
+ if not all_tags:
191
+ msg = "No recognizable tags found in the discussion."
192
+ print(f"❌ {msg}")
193
+ result_messages.append(msg)
194
+ else:
195
+ print("πŸ€– Getting agent instance...")
196
+ agent = await get_agent()
197
+ if not agent:
198
+ msg = "Error: Agent not configured (missing HF_TOKEN)"
199
+ print(f"❌ {msg}")
200
+ result_messages.append(msg)
201
+ else:
202
+ print("βœ… Agent instance obtained successfully")
203
+
204
+ # Process all tags in a single conversation with the agent
205
+ try:
206
+ # Create a comprehensive prompt for the agent
207
+ user_prompt = f"""
208
+ I need to add the following tags to the repository '{repo_name}': {", ".join(all_tags)}
209
+
210
+ For each tag, please:
211
+ 1. Check if the tag already exists on the repository using get_current_tags
212
+ 2. If the tag doesn't exist, add it using add_new_tag
213
+ 3. Provide a summary of what was done for each tag
214
+
215
+ Please process all {len(all_tags)} tags: {", ".join(all_tags)}
216
+ """
217
+
218
+ print("πŸ’¬ Sending comprehensive prompt to agent...")
219
+ print(f"πŸ“ Prompt: {user_prompt}")
220
+
221
+ # Let the agent handle the entire conversation
222
+ conversation_result = []
223
+
224
+ try:
225
+ async for item in agent.run(user_prompt):
226
+ # The agent yields different types of items
227
+ item_str = str(item)
228
+ conversation_result.append(item_str)
229
+
230
+ # Log important events
231
+ if (
232
+ "tool_call" in item_str.lower()
233
+ or "function" in item_str.lower()
234
+ ):
235
+ print(f"πŸ”§ Agent using tools: {item_str[:200]}...")
236
+ elif "content" in item_str and len(item_str) < 500:
237
+ print(f"πŸ’­ Agent response: {item_str}")
238
+
239
+ # Extract the final response from the conversation
240
+ full_response = " ".join(conversation_result)
241
+ print(f"πŸ“‹ Agent conversation completed successfully")
242
+
243
+ # Try to extract meaningful results for each tag
244
+ for tag in all_tags:
245
+ tag_mentioned = tag.lower() in full_response.lower()
246
+
247
+ if (
248
+ "already exists" in full_response.lower()
249
+ and tag_mentioned
250
+ ):
251
+ msg = f"Tag '{tag}': Already exists"
252
+ elif (
253
+ "pr" in full_response.lower()
254
+ or "pull request" in full_response.lower()
255
+ ):
256
+ if tag_mentioned:
257
+ msg = f"Tag '{tag}': PR created successfully"
258
+ else:
259
+ msg = (
260
+ f"Tag '{tag}': Processed "
261
+ "(PR may have been created)"
262
+ )
263
+ elif "success" in full_response.lower() and tag_mentioned:
264
+ msg = f"Tag '{tag}': Successfully processed"
265
+ elif "error" in full_response.lower() and tag_mentioned:
266
+ msg = f"Tag '{tag}': Error during processing"
267
+ else:
268
+ msg = f"Tag '{tag}': Processed by agent"
269
+
270
+ print(f"βœ… Result for tag '{tag}': {msg}")
271
+ result_messages.append(msg)
272
+
273
+ except Exception as agent_error:
274
+ print(f"⚠️ Agent streaming failed: {str(agent_error)}")
275
+ print("πŸ”„ Falling back to direct MCP tool calls...")
276
+
277
+ # Import the MCP server functions directly as fallback
278
+ try:
279
+ import sys
280
+ import importlib.util
281
+
282
+ # Load the MCP server module
283
+ spec = importlib.util.spec_from_file_location(
284
+ "mcp_server", "./mcp_server.py"
285
+ )
286
+ mcp_module = importlib.util.module_from_spec(spec)
287
+ spec.loader.exec_module(mcp_module)
288
+
289
+ # Use the MCP tools directly for each tag
290
+ for tag in all_tags:
291
+ try:
292
+ print(
293
+ f"πŸ”§ Directly calling get_current_tags for '{tag}'"
294
+ )
295
+ current_tags_result = mcp_module.get_current_tags(
296
+ repo_name
297
+ )
298
+ print(
299
+ f"πŸ“„ Current tags result: {current_tags_result}"
300
+ )
301
+
302
+ # Parse the JSON result
303
+ import json
304
+
305
+ tags_data = json.loads(current_tags_result)
306
+
307
+ if tags_data.get("status") == "success":
308
+ current_tags = tags_data.get("current_tags", [])
309
+ if tag in current_tags:
310
+ msg = f"Tag '{tag}': Already exists"
311
+ print(f"βœ… {msg}")
312
+ else:
313
+ print(
314
+ f"πŸ”§ Directly calling add_new_tag for '{tag}'"
315
+ )
316
+ add_result = mcp_module.add_new_tag(
317
+ repo_name, tag
318
+ )
319
+ print(f"πŸ“„ Add tag result: {add_result}")
320
+
321
+ add_data = json.loads(add_result)
322
+ if add_data.get("status") == "success":
323
+ pr_url = add_data.get("pr_url", "")
324
+ msg = f"Tag '{tag}': PR created - {pr_url}"
325
+ elif (
326
+ add_data.get("status")
327
+ == "already_exists"
328
+ ):
329
+ msg = f"Tag '{tag}': Already exists"
330
+ else:
331
+ msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
332
+ print(f"βœ… {msg}")
333
+ else:
334
+ error_msg = tags_data.get(
335
+ "error", "Unknown error"
336
+ )
337
+ msg = f"Tag '{tag}': Error - {error_msg}"
338
+ print(f"❌ {msg}")
339
+
340
+ result_messages.append(msg)
341
+
342
+ except Exception as direct_error:
343
+ error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
344
+ print(f"❌ {error_msg}")
345
+ result_messages.append(error_msg)
346
+
347
+ except Exception as fallback_error:
348
+ error_msg = (
349
+ f"Fallback approach failed: {str(fallback_error)}"
350
+ )
351
+ print(f"❌ {error_msg}")
352
+ result_messages.append(error_msg)
353
+
354
+ except Exception as e:
355
+ error_msg = f"Error during agent processing: {str(e)}"
356
+ print(f"❌ {error_msg}")
357
+ result_messages.append(error_msg)
358
+
359
+ # Store the interaction
360
+ base_url = "https://huggingface.co"
361
+ discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
362
+
363
+ interaction = {
364
+ "timestamp": datetime.now().isoformat(),
365
+ "repo": repo_name,
366
+ "discussion_title": discussion_title,
367
+ "discussion_num": discussion_num,
368
+ "discussion_url": discussion_url,
369
+ "original_comment": comment_content,
370
+ "comment_author": comment_author,
371
+ "detected_tags": all_tags,
372
+ "results": result_messages,
373
+ }
374
+
375
+ tag_operations_store.append(interaction)
376
+ final_result = " | ".join(result_messages)
377
+ print(f"πŸ’Ύ Stored interaction and returning result: {final_result}")
378
+ return final_result
379
+
380
+ except Exception as e:
381
+ error_msg = f"❌ Fatal error in process_webhook_comment: {str(e)}"
382
+ print(error_msg)
383
+ return error_msg
384
+
385
+
386
+ @app.post("/webhook")
387
+ async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
388
+ """Handle HF Hub webhooks"""
389
+ webhook_secret = request.headers.get("X-Webhook-Secret")
390
+ if webhook_secret != WEBHOOK_SECRET:
391
+ print("❌ Invalid webhook secret")
392
+ return {"error": "Invalid webhook secret"}
393
+
394
+ payload = await request.json()
395
+ print(f"πŸ“₯ Received webhook payload: {json.dumps(payload, indent=2)}")
396
+
397
+ event = payload.get("event", {})
398
+ scope = event.get("scope")
399
+ action = event.get("action")
400
+
401
+ print(f"πŸ” Event details - scope: {scope}, action: {action}")
402
+
403
+ # Check if this is a discussion comment creation
404
+ scope_check = scope == "discussion.comment""
405
+ action_check = action == "create"
406
+ not_pr = not payload["discussion"]["isPullRequest"]
407
+ # scope_check = scope_check and not_pr
408
+ print(f"βœ… not_pr: {not_pr}")
409
+ print(f"βœ… scope_check: {scope_check}")
410
+ print(f"βœ… action_check: {action_check}")
411
+
412
+ if scope_check and action_check:
413
+ # Verify we have the required fields
414
+ required_fields = ["comment", "discussion", "repo"]
415
+ missing_fields = [field for field in required_fields if field not in payload]
416
+
417
+ if missing_fields:
418
+ error_msg = f"Missing required fields: {missing_fields}"
419
+ print(f"❌ {error_msg}")
420
+ return {"error": error_msg}
421
+
422
+ print(f"πŸš€ Processing webhook for repo: {payload['repo']['name']}")
423
+ background_tasks.add_task(process_webhook_comment, payload)
424
+ return {"status": "processing"}
425
+
426
+ print(f"⏭️ Ignoring webhook - scope: {scope}, action: {action}")
427
+ return {"status": "ignored"}
428
+
429
+
430
+ async def simulate_webhook(
431
+ repo_name: str, discussion_title: str, comment_content: str
432
+ ) -> str:
433
+ """Simulate webhook for testing"""
434
+ if not all([repo_name, discussion_title, comment_content]):
435
+ return "Please fill in all fields."
436
+
437
+ mock_payload = {
438
+ "event": {"action": "create", "scope": "discussion"},
439
+ "comment": {
440
+ "content": comment_content,
441
+ "author": {"id": "test-user-id"},
442
+ "id": "mock-comment-id",
443
+ "hidden": False,
444
+ },
445
+ "discussion": {
446
+ "title": discussion_title,
447
+ "num": len(tag_operations_store) + 1,
448
+ "id": "mock-discussion-id",
449
+ "status": "open",
450
+ "isPullRequest": False,
451
+ },
452
+ "repo": {
453
+ "name": repo_name,
454
+ "type": "model",
455
+ "private": False,
456
+ },
457
+ }
458
+
459
+ response = await process_webhook_comment(mock_payload)
460
+ return f"βœ… Processed! Results: {response}"
461
+
462
+
463
+ def create_gradio_app():
464
+ """Create Gradio interface"""
465
+ with gr.Blocks(title="HF Tagging Bot", theme=gr.themes.Soft()) as demo:
466
+ gr.Markdown("# 🏷️ HF Tagging Bot Dashboard")
467
+ gr.Markdown("*Automatically adds tags to models when mentioned in discussions*")
468
+
469
+ gr.Markdown("""
470
+ ## How it works:
471
+ - Monitors HuggingFace Hub discussions
472
+ - Detects tag mentions in comments (e.g., "tag: pytorch",
473
+ "#transformers")
474
+ - Automatically adds recognized tags to the model repository
475
+ - Supports common ML tags like: pytorch, tensorflow,
476
+ text-generation, etc.
477
+ """)
478
+
479
+ with gr.Column():
480
+ sim_repo = gr.Textbox(
481
+ label="Repository",
482
+ value="burtenshaw/play-mcp-repo-bot",
483
+ placeholder="username/model-name",
484
+ )
485
+ sim_title = gr.Textbox(
486
+ label="Discussion Title",
487
+ value="Add pytorch tag",
488
+ placeholder="Discussion title",
489
+ )
490
+ sim_comment = gr.Textbox(
491
+ label="Comment",
492
+ lines=3,
493
+ value="This model should have tags: pytorch, text-generation",
494
+ placeholder="Comment mentioning tags...",
495
+ )
496
+ sim_btn = gr.Button("🏷️ Test Tag Detection")
497
+
498
+ with gr.Column():
499
+ sim_result = gr.Textbox(label="Result", lines=8)
500
+
501
+ sim_btn.click(
502
+ fn=simulate_webhook,
503
+ inputs=[sim_repo, sim_title, sim_comment],
504
+ outputs=sim_result,
505
+ )
506
+
507
+ gr.Markdown(f"""
508
+ ## Recognized Tags:
509
+ {", ".join(sorted(RECOGNIZED_TAGS))}
510
+ """)
511
+
512
+ return demo
513
+
514
+
515
+ # Mount Gradio app
516
+ gradio_app = create_gradio_app()
517
+ app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
518
+
519
+
520
+ if __name__ == "__main__":
521
+ print("πŸš€ Starting HF Tagging Bot...")
522
+ print("πŸ“Š Dashboard: http://localhost:7860/gradio")
523
+ print("πŸ”— Webhook: http://localhost:7860/webhook")
524
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)