asmaa105 commited on
Commit
b4b3790
·
verified ·
1 Parent(s): 61bd34c

Upload 10 files

Browse files
Files changed (10) hide show
  1. .gitattributes +35 -35
  2. .python-version +1 -0
  3. Dockerfile +33 -0
  4. README.md +21 -12
  5. app.py +524 -0
  6. env.example +12 -0
  7. mcp_server.py +184 -0
  8. pyproject.toml +24 -0
  9. requirements.txt +77 -0
  10. uv.lock +0 -0
.gitattributes CHANGED
@@ -1,35 +1,35 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y \
8
+ git \
9
+ && rm -rf /var/lib/apt/lists/*
10
+
11
+ # Copy project files
12
+ COPY pyproject.toml .
13
+ COPY server.py .
14
+ COPY mcp_server.py .
15
+ COPY env.example .
16
+ COPY README.md .
17
+
18
+ # Install Python dependencies
19
+ RUN pip install --no-cache-dir -e .
20
+
21
+ # Create a non-root user
22
+ RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
23
+ USER appuser
24
+
25
+ # Expose port
26
+ EXPOSE 8000
27
+
28
+ # Health check
29
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
30
+ CMD curl -f http://localhost:8000/ || exit 1
31
+
32
+ # Run the application
33
+ CMD ["python", "server.py"]
README.md CHANGED
@@ -1,12 +1,21 @@
1
- ---
2
- title: Tag This Repo
3
- emoji: 🌍
4
- colorFrom: gray
5
- colorTo: purple
6
- sdk: gradio
7
- sdk_version: 5.34.2
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: tag-a-repo bot
3
+ emoji: 👀
4
+ colorFrom: purple
5
+ colorTo: yellow
6
+ sdk: gradio
7
+ sdk_version: 5.31.0
8
+ app_file: app.py
9
+ pinned: false
10
+ base_path: /gradio
11
+ ---
12
+
13
+ # HF Tagging Bot
14
+
15
+ This is a bot that tags HuggingFace models when they are mentioned in discussions.
16
+
17
+ ## How it works
18
+
19
+ 1. The bot listens to discussions on the HuggingFace Hub
20
+ 2. When a discussion is created, the bot checks for tag mentions in the comment
21
+ 3. If a tag is mentioned, the bot adds the tag to the model repository via a PR
app.py ADDED
@@ -0,0 +1,524 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import json
4
+ from datetime import datetime
5
+ from typing import List, Dict, Any, Optional, Literal
6
+
7
+ from fastapi import FastAPI, Request, BackgroundTasks
8
+ from fastapi.middleware.cors import CORSMiddleware
9
+ import gradio as gr
10
+ import uvicorn
11
+ from pydantic import BaseModel
12
+ from huggingface_hub.inference._mcp.agent import Agent
13
+ from dotenv import load_dotenv
14
+
15
+ load_dotenv()
16
+
17
+ # Configuration
18
+ WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET", "5a775af722adc63d0b895454e3fb7a50cbc62bfa3f97e37d50d1a986c91d8781")
19
+ HF_TOKEN = os.getenv("HF_TOKEN")
20
+ HF_MODEL = os.getenv("HF_MODEL", "microsoft/DialoGPT-medium")
21
+ # Use a valid provider literal from the documentation
22
+ DEFAULT_PROVIDER: Literal["hf-inference"] = "hf-inference"
23
+ HF_PROVIDER = os.getenv("HF_PROVIDER", DEFAULT_PROVIDER)
24
+
25
+ # Simple storage for processed tag operations
26
+ tag_operations_store: List[Dict[str, Any]] = []
27
+
28
+ # Agent instance
29
+ agent_instance: Optional[Agent] = None
30
+
31
+ # Common ML tags that we recognize for auto-tagging
32
+ RECOGNIZED_TAGS = {
33
+ "pytorch",
34
+ "tensorflow",
35
+ "jax",
36
+ "transformers",
37
+ "diffusers",
38
+ "text-generation",
39
+ "text-classification",
40
+ "question-answering",
41
+ "text-to-image",
42
+ "image-classification",
43
+ "object-detection",
44
+ " ",
45
+ "fill-mask",
46
+ "token-classification",
47
+ "translation",
48
+ "summarization",
49
+ "feature-extraction",
50
+ "sentence-similarity",
51
+ "zero-shot-classification",
52
+ "image-to-text",
53
+ "automatic-speech-recognition",
54
+ "audio-classification",
55
+ "voice-activity-detection",
56
+ "depth-estimation",
57
+ "image-segmentation",
58
+ "video-classification",
59
+ "reinforcement-learning",
60
+ "tabular-classification",
61
+ "tabular-regression",
62
+ "time-series-forecasting",
63
+ "graph-ml",
64
+ "robotics",
65
+ "computer-vision",
66
+ "nlp",
67
+ "cv",
68
+ "multimodal",
69
+ }
70
+
71
+
72
+ class WebhookEvent(BaseModel):
73
+ event: Dict[str, str]
74
+ comment: Dict[str, Any]
75
+ discussion: Dict[str, Any]
76
+ repo: Dict[str, str]
77
+
78
+
79
+ app = FastAPI(title="HF Tagging Bot")
80
+ app.add_middleware(CORSMiddleware, allow_origins=["*"])
81
+
82
+
83
+ async def get_agent():
84
+ """Get or create Agent instance"""
85
+ print("🤖 get_agent() called...")
86
+ global agent_instance
87
+ if agent_instance is None and HF_TOKEN:
88
+ print("🔧 Creating new Agent instance...")
89
+ print(f"🔑 HF_TOKEN present: {bool(HF_TOKEN)}")
90
+ print(f"🤖 Model: {HF_MODEL}")
91
+ print(f"🔗 Provider: {DEFAULT_PROVIDER}")
92
+
93
+ try:
94
+ agent_instance = Agent(
95
+ model=HF_MODEL,
96
+ provider=DEFAULT_PROVIDER,
97
+ api_key=HF_TOKEN,
98
+ servers=[
99
+ {
100
+ "type": "stdio",
101
+ "config": {
102
+ "command": "python",
103
+ "args": ["mcp_server.py"],
104
+ "cwd": ".", # Ensure correct working directory
105
+ "env": {"HF_TOKEN": HF_TOKEN} if HF_TOKEN else {},
106
+ },
107
+ }
108
+ ],
109
+ )
110
+ print("✅ Agent instance created successfully")
111
+ print("🔧 Loading tools...")
112
+ await agent_instance.load_tools()
113
+ print("✅ Tools loaded successfully")
114
+ except Exception as e:
115
+ print(f"❌ Error creating/loading agent: {str(e)}")
116
+ agent_instance = None
117
+ elif agent_instance is None:
118
+ print("❌ No HF_TOKEN available, cannot create agent")
119
+ else:
120
+ print("✅ Using existing agent instance")
121
+
122
+ return agent_instance
123
+
124
+
125
+ def extract_tags_from_text(text: str) -> List[str]:
126
+ """Extract potential tags from discussion text"""
127
+ text_lower = text.lower()
128
+
129
+ # Look for explicit tag mentions like "tag: pytorch" or "#pytorch"
130
+ explicit_tags = []
131
+
132
+ # Pattern 1: "tag: something" or "tags: something"
133
+ tag_pattern = r"tags?:\s*([a-zA-Z0-9-_,\s]+)"
134
+ matches = re.findall(tag_pattern, text_lower)
135
+ for match in matches:
136
+ # Split by comma and clean up
137
+ tags = [tag.strip() for tag in match.split(",")]
138
+ explicit_tags.extend(tags)
139
+
140
+ # Pattern 2: "#hashtag" style
141
+ hashtag_pattern = r"#([a-zA-Z0-9-_]+)"
142
+ hashtag_matches = re.findall(hashtag_pattern, text_lower)
143
+ explicit_tags.extend(hashtag_matches)
144
+
145
+ # Pattern 3: Look for recognized tags mentioned in natural text
146
+ mentioned_tags = []
147
+ for tag in RECOGNIZED_TAGS:
148
+ if tag in text_lower:
149
+ mentioned_tags.append(tag)
150
+
151
+ # Combine and deduplicate
152
+ all_tags = list(set(explicit_tags + mentioned_tags))
153
+
154
+ # Filter to only include recognized tags or explicitly mentioned ones
155
+ valid_tags = []
156
+ for tag in all_tags:
157
+ if tag in RECOGNIZED_TAGS or tag in explicit_tags:
158
+ valid_tags.append(tag)
159
+
160
+ return valid_tags
161
+
162
+
163
+ async def process_webhook_comment(webhook_data: Dict[str, Any]):
164
+ """Process webhook to detect and add tags"""
165
+ print("🏷️ Starting process_webhook_comment...")
166
+
167
+ try:
168
+ comment_content = webhook_data["comment"]["content"]
169
+ discussion_title = webhook_data["discussion"]["title"]
170
+ repo_name = webhook_data["repo"]["name"]
171
+ discussion_num = webhook_data["discussion"]["num"]
172
+ # Author is an object with "id" field
173
+ comment_author = webhook_data["comment"]["author"].get("id", "unknown")
174
+
175
+ print(f"📝 Comment content: {comment_content}")
176
+ print(f"📰 Discussion title: {discussion_title}")
177
+ print(f"📦 Repository: {repo_name}")
178
+
179
+ # Extract potential tags from the comment and discussion title
180
+ comment_tags = extract_tags_from_text(comment_content)
181
+ title_tags = extract_tags_from_text(discussion_title)
182
+ all_tags = list(set(comment_tags + title_tags))
183
+
184
+ print(f"🔍 Comment tags found: {comment_tags}")
185
+ print(f"🔍 Title tags found: {title_tags}")
186
+ print(f"🏷️ All unique tags: {all_tags}")
187
+
188
+ result_messages = []
189
+
190
+ if not all_tags:
191
+ msg = "No recognizable tags found in the discussion."
192
+ print(f"❌ {msg}")
193
+ result_messages.append(msg)
194
+ else:
195
+ print("🤖 Getting agent instance...")
196
+ agent = await get_agent()
197
+ if not agent:
198
+ msg = "Error: Agent not configured (missing HF_TOKEN)"
199
+ print(f"❌ {msg}")
200
+ result_messages.append(msg)
201
+ else:
202
+ print("✅ Agent instance obtained successfully")
203
+
204
+ # Process all tags in a single conversation with the agent
205
+ try:
206
+ # Create a comprehensive prompt for the agent
207
+ user_prompt = f"""
208
+ I need to add the following tags to the repository '{repo_name}': {", ".join(all_tags)}
209
+
210
+ For each tag, please:
211
+ 1. Check if the tag already exists on the repository using get_current_tags
212
+ 2. If the tag doesn't exist, add it using add_new_tag
213
+ 3. Provide a summary of what was done for each tag
214
+
215
+ Please process all {len(all_tags)} tags: {", ".join(all_tags)}
216
+ """
217
+
218
+ print("💬 Sending comprehensive prompt to agent...")
219
+ print(f"📝 Prompt: {user_prompt}")
220
+
221
+ # Let the agent handle the entire conversation
222
+ conversation_result = []
223
+
224
+ try:
225
+ async for item in agent.run(user_prompt):
226
+ # The agent yields different types of items
227
+ item_str = str(item)
228
+ conversation_result.append(item_str)
229
+
230
+ # Log important events
231
+ if (
232
+ "tool_call" in item_str.lower()
233
+ or "function" in item_str.lower()
234
+ ):
235
+ print(f"🔧 Agent using tools: {item_str[:200]}...")
236
+ elif "content" in item_str and len(item_str) < 500:
237
+ print(f"💭 Agent response: {item_str}")
238
+
239
+ # Extract the final response from the conversation
240
+ full_response = " ".join(conversation_result)
241
+ print(f"📋 Agent conversation completed successfully")
242
+
243
+ # Try to extract meaningful results for each tag
244
+ for tag in all_tags:
245
+ tag_mentioned = tag.lower() in full_response.lower()
246
+
247
+ if (
248
+ "already exists" in full_response.lower()
249
+ and tag_mentioned
250
+ ):
251
+ msg = f"Tag '{tag}': Already exists"
252
+ elif (
253
+ "pr" in full_response.lower()
254
+ or "pull request" in full_response.lower()
255
+ ):
256
+ if tag_mentioned:
257
+ msg = f"Tag '{tag}': PR created successfully"
258
+ else:
259
+ msg = (
260
+ f"Tag '{tag}': Processed "
261
+ "(PR may have been created)"
262
+ )
263
+ elif "success" in full_response.lower() and tag_mentioned:
264
+ msg = f"Tag '{tag}': Successfully processed"
265
+ elif "error" in full_response.lower() and tag_mentioned:
266
+ msg = f"Tag '{tag}': Error during processing"
267
+ else:
268
+ msg = f"Tag '{tag}': Processed by agent"
269
+
270
+ print(f"✅ Result for tag '{tag}': {msg}")
271
+ result_messages.append(msg)
272
+
273
+ except Exception as agent_error:
274
+ print(f"⚠️ Agent streaming failed: {str(agent_error)}")
275
+ print("🔄 Falling back to direct MCP tool calls...")
276
+
277
+ # Import the MCP server functions directly as fallback
278
+ try:
279
+ import sys
280
+ import importlib.util
281
+
282
+ # Load the MCP server module
283
+ spec = importlib.util.spec_from_file_location(
284
+ "mcp_server", "./mcp_server.py"
285
+ )
286
+ mcp_module = importlib.util.module_from_spec(spec)
287
+ spec.loader.exec_module(mcp_module)
288
+
289
+ # Use the MCP tools directly for each tag
290
+ for tag in all_tags:
291
+ try:
292
+ print(
293
+ f"🔧 Directly calling get_current_tags for '{tag}'"
294
+ )
295
+ current_tags_result = mcp_module.get_current_tags(
296
+ repo_name
297
+ )
298
+ print(
299
+ f"📄 Current tags result: {current_tags_result}"
300
+ )
301
+
302
+ # Parse the JSON result
303
+ import json
304
+
305
+ tags_data = json.loads(current_tags_result)
306
+
307
+ if tags_data.get("status") == "success":
308
+ current_tags = tags_data.get("current_tags", [])
309
+ if tag in current_tags:
310
+ msg = f"Tag '{tag}': Already exists"
311
+ print(f"✅ {msg}")
312
+ else:
313
+ print(
314
+ f"🔧 Directly calling add_new_tag for '{tag}'"
315
+ )
316
+ add_result = mcp_module.add_new_tag(
317
+ repo_name, tag
318
+ )
319
+ print(f"📄 Add tag result: {add_result}")
320
+
321
+ add_data = json.loads(add_result)
322
+ if add_data.get("status") == "success":
323
+ pr_url = add_data.get("pr_url", "")
324
+ msg = f"Tag '{tag}': PR created - {pr_url}"
325
+ elif (
326
+ add_data.get("status")
327
+ == "already_exists"
328
+ ):
329
+ msg = f"Tag '{tag}': Already exists"
330
+ else:
331
+ msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
332
+ print(f"✅ {msg}")
333
+ else:
334
+ error_msg = tags_data.get(
335
+ "error", "Unknown error"
336
+ )
337
+ msg = f"Tag '{tag}': Error - {error_msg}"
338
+ print(f"❌ {msg}")
339
+
340
+ result_messages.append(msg)
341
+
342
+ except Exception as direct_error:
343
+ error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
344
+ print(f"❌ {error_msg}")
345
+ result_messages.append(error_msg)
346
+
347
+ except Exception as fallback_error:
348
+ error_msg = (
349
+ f"Fallback approach failed: {str(fallback_error)}"
350
+ )
351
+ print(f"❌ {error_msg}")
352
+ result_messages.append(error_msg)
353
+
354
+ except Exception as e:
355
+ error_msg = f"Error during agent processing: {str(e)}"
356
+ print(f"❌ {error_msg}")
357
+ result_messages.append(error_msg)
358
+
359
+ # Store the interaction
360
+ base_url = "https://huggingface.co"
361
+ discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
362
+
363
+ interaction = {
364
+ "timestamp": datetime.now().isoformat(),
365
+ "repo": repo_name,
366
+ "discussion_title": discussion_title,
367
+ "discussion_num": discussion_num,
368
+ "discussion_url": discussion_url,
369
+ "original_comment": comment_content,
370
+ "comment_author": comment_author,
371
+ "detected_tags": all_tags,
372
+ "results": result_messages,
373
+ }
374
+
375
+ tag_operations_store.append(interaction)
376
+ final_result = " | ".join(result_messages)
377
+ print(f"💾 Stored interaction and returning result: {final_result}")
378
+ return final_result
379
+
380
+ except Exception as e:
381
+ error_msg = f"❌ Fatal error in process_webhook_comment: {str(e)}"
382
+ print(error_msg)
383
+ return error_msg
384
+
385
+
386
+ @app.post("/webhook")
387
+ async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
388
+ """Handle HF Hub webhooks"""
389
+ webhook_secret = request.headers.get("X-Webhook-Secret")
390
+ if webhook_secret != WEBHOOK_SECRET:
391
+ print("❌ Invalid webhook secret")
392
+ return {"error": "Invalid webhook secret"}
393
+
394
+ payload = await request.json()
395
+ print(f"📥 Received webhook payload: {json.dumps(payload, indent=2)}")
396
+
397
+ event = payload.get("event", {})
398
+ scope = event.get("scope")
399
+ action = event.get("action")
400
+
401
+ print(f"🔍 Event details - scope: {scope}, action: {action}")
402
+
403
+ # Check if this is a discussion comment creation
404
+ scope_check = scope == "discussion"
405
+ action_check = action == "create"
406
+ not_pr = not payload["discussion"]["isPullRequest"]
407
+ scope_check = scope_check and not_pr
408
+ print(f"✅ not_pr: {not_pr}")
409
+ print(f"✅ scope_check: {scope_check}")
410
+ print(f"✅ action_check: {action_check}")
411
+
412
+ if scope_check and action_check:
413
+ # Verify we have the required fields
414
+ required_fields = ["comment", "discussion", "repo"]
415
+ missing_fields = [field for field in required_fields if field not in payload]
416
+
417
+ if missing_fields:
418
+ error_msg = f"Missing required fields: {missing_fields}"
419
+ print(f"❌ {error_msg}")
420
+ return {"error": error_msg}
421
+
422
+ print(f"🚀 Processing webhook for repo: {payload['repo']['name']}")
423
+ background_tasks.add_task(process_webhook_comment, payload)
424
+ return {"status": "processing"}
425
+
426
+ print(f"⏭️ Ignoring webhook - scope: {scope}, action: {action}")
427
+ return {"status": "ignored"}
428
+
429
+
430
+ async def simulate_webhook(
431
+ repo_name: str, discussion_title: str, comment_content: str
432
+ ) -> str:
433
+ """Simulate webhook for testing"""
434
+ if not all([repo_name, discussion_title, comment_content]):
435
+ return "Please fill in all fields."
436
+
437
+ mock_payload = {
438
+ "event": {"action": "create", "scope": "discussion"},
439
+ "comment": {
440
+ "content": comment_content,
441
+ "author": {"id": "test-user-id"},
442
+ "id": "mock-comment-id",
443
+ "hidden": False,
444
+ },
445
+ "discussion": {
446
+ "title": discussion_title,
447
+ "num": len(tag_operations_store) + 1,
448
+ "id": "mock-discussion-id",
449
+ "status": "open",
450
+ "isPullRequest": False,
451
+ },
452
+ "repo": {
453
+ "name": repo_name,
454
+ "type": "model",
455
+ "private": False,
456
+ },
457
+ }
458
+
459
+ response = await process_webhook_comment(mock_payload)
460
+ return f"✅ Processed! Results: {response}"
461
+
462
+
463
+ def create_gradio_app():
464
+ """Create Gradio interface"""
465
+ with gr.Blocks(title="HF Tagging Bot", theme=gr.themes.Soft()) as demo:
466
+ gr.Markdown("# 🏷️ HF Tagging Bot Dashboard")
467
+ gr.Markdown("*Automatically adds tags to models when mentioned in discussions*")
468
+
469
+ gr.Markdown("""
470
+ ## How it works:
471
+ - Monitors HuggingFace Hub discussions
472
+ - Detects tag mentions in comments (e.g., "tag: pytorch",
473
+ "#transformers")
474
+ - Automatically adds recognized tags to the model repository
475
+ - Supports common ML tags like: pytorch, tensorflow,
476
+ text-generation, etc.
477
+ """)
478
+
479
+ with gr.Column():
480
+ sim_repo = gr.Textbox(
481
+ label="Repository",
482
+ value="burtenshaw/play-mcp-repo-bot",
483
+ placeholder="username/model-name",
484
+ )
485
+ sim_title = gr.Textbox(
486
+ label="Discussion Title",
487
+ value="Add pytorch tag",
488
+ placeholder="Discussion title",
489
+ )
490
+ sim_comment = gr.Textbox(
491
+ label="Comment",
492
+ lines=3,
493
+ value="This model should have tags: pytorch, text-generation",
494
+ placeholder="Comment mentioning tags...",
495
+ )
496
+ sim_btn = gr.Button("🏷️ Test Tag Detection")
497
+
498
+ with gr.Column():
499
+ sim_result = gr.Textbox(label="Result", lines=8)
500
+
501
+ sim_btn.click(
502
+ fn=simulate_webhook,
503
+ inputs=[sim_repo, sim_title, sim_comment],
504
+ outputs=sim_result,
505
+ )
506
+
507
+ gr.Markdown(f"""
508
+ ## Recognized Tags:
509
+ {", ".join(sorted(RECOGNIZED_TAGS))}
510
+ """)
511
+
512
+ return demo
513
+
514
+
515
+ # Mount Gradio app
516
+ gradio_app = create_gradio_app()
517
+ app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
518
+
519
+
520
+ if __name__ == "__main__":
521
+ print("🚀 Starting HF Tagging Bot...")
522
+ print("📊 Dashboard: http://localhost:7860/gradio")
523
+ print("🔗 Webhook: http://localhost:7860/webhook")
524
+ uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)
env.example ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Webhook Configuration
2
+ WEBHOOK_SECRET=your-webhook-secret-here
3
+
4
+ # Hugging Face Configuration
5
+ HF_TOKEN=your-huggingface-token-here
6
+
7
+ # Model Configuration (optional)
8
+ HF_MODEL=microsoft/DialoGPT-medium
9
+ HF_PROVIDER=huggingface
10
+
11
+ # Optional: Custom bot username for mention detection
12
+ BOT_USERNAME=discussion-bot
mcp_server.py ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Simplified MCP Server for HuggingFace Hub Tagging Operations using FastMCP
4
+ """
5
+
6
+ import os
7
+ import json
8
+ from fastmcp import FastMCP
9
+ from huggingface_hub import HfApi, model_info, ModelCard, ModelCardData
10
+ from huggingface_hub.utils import HfHubHTTPError
11
+ from dotenv import load_dotenv
12
+
13
+ load_dotenv()
14
+
15
+ # Configuration
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
17
+
18
+ # Initialize HF API client
19
+ hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
20
+
21
+ # Create the FastMCP server
22
+ mcp = FastMCP("hf-tagging-bot")
23
+
24
+
25
+ @mcp.tool()
26
+ def get_current_tags(repo_id: str) -> str:
27
+ """Get current tags from a HuggingFace model repository"""
28
+ print(f"🔧 get_current_tags called with repo_id: {repo_id}")
29
+
30
+ if not hf_api:
31
+ error_result = {"error": "HF token not configured"}
32
+ json_str = json.dumps(error_result)
33
+ print(f"❌ No HF API token - returning: {json_str}")
34
+ return json_str
35
+
36
+ try:
37
+ print(f"📡 Fetching model info for: {repo_id}")
38
+ info = model_info(repo_id=repo_id, token=HF_TOKEN)
39
+ current_tags = info.tags if info.tags else []
40
+ print(f"🏷️ Found {len(current_tags)} tags: {current_tags}")
41
+
42
+ result = {
43
+ "status": "success",
44
+ "repo_id": repo_id,
45
+ "current_tags": current_tags,
46
+ "count": len(current_tags),
47
+ }
48
+ json_str = json.dumps(result)
49
+ print(f"✅ get_current_tags returning: {json_str}")
50
+ return json_str
51
+
52
+ except Exception as e:
53
+ print(f"❌ Error in get_current_tags: {str(e)}")
54
+ error_result = {"status": "error", "repo_id": repo_id, "error": str(e)}
55
+ json_str = json.dumps(error_result)
56
+ print(f"❌ get_current_tags error returning: {json_str}")
57
+ return json_str
58
+
59
+
60
+ @mcp.tool()
61
+ def add_new_tag(repo_id: str, new_tag: str) -> str:
62
+ """Add a new tag to a HuggingFace model repository via PR"""
63
+ print(f"🔧 add_new_tag called with repo_id: {repo_id}, new_tag: {new_tag}")
64
+
65
+ if not hf_api:
66
+ error_result = {"error": "HF token not configured"}
67
+ json_str = json.dumps(error_result)
68
+ print(f"❌ No HF API token - returning: {json_str}")
69
+ return json_str
70
+
71
+ try:
72
+ # Get current model info and tags
73
+ print(f"📡 Fetching current model info for: {repo_id}")
74
+ info = model_info(repo_id=repo_id, token=HF_TOKEN)
75
+ current_tags = info.tags if info.tags else []
76
+ print(f"🏷️ Current tags: {current_tags}")
77
+
78
+ # Check if tag already exists
79
+ if new_tag in current_tags:
80
+ print(f"⚠️ Tag '{new_tag}' already exists in {current_tags}")
81
+ result = {
82
+ "status": "already_exists",
83
+ "repo_id": repo_id,
84
+ "tag": new_tag,
85
+ "message": f"Tag '{new_tag}' already exists",
86
+ }
87
+ json_str = json.dumps(result)
88
+ print(f"🏷️ add_new_tag (already exists) returning: {json_str}")
89
+ return json_str
90
+
91
+ # Add the new tag to existing tags
92
+ updated_tags = current_tags + [new_tag]
93
+ print(f"🆕 Will update tags from {current_tags} to {updated_tags}")
94
+
95
+ # Create model card content with updated tags
96
+ try:
97
+ # Load existing model card
98
+ print(f"📄 Loading existing model card...")
99
+ card = ModelCard.load(repo_id, token=HF_TOKEN)
100
+ if not hasattr(card, "data") or card.data is None:
101
+ card.data = ModelCardData()
102
+ except HfHubHTTPError:
103
+ # Create new model card if none exists
104
+ print(f"📄 Creating new model card (none exists)")
105
+ card = ModelCard("")
106
+ card.data = ModelCardData()
107
+
108
+ # Update tags - create new ModelCardData with updated tags
109
+ card_dict = card.data.to_dict()
110
+ card_dict["tags"] = updated_tags
111
+ card.data = ModelCardData(**card_dict)
112
+
113
+ # Create a pull request with the updated model card
114
+ pr_title = f"Add '{new_tag}' tag"
115
+ pr_description = f"""
116
+ ## Add tag: {new_tag}
117
+
118
+ This PR adds the `{new_tag}` tag to the model repository.
119
+
120
+ **Changes:**
121
+ - Added `{new_tag}` to model tags
122
+ - Updated from {len(current_tags)} to {len(updated_tags)} tags
123
+
124
+ **Current tags:** {", ".join(current_tags) if current_tags else "None"}
125
+ **New tags:** {", ".join(updated_tags)}
126
+ """
127
+
128
+ print(f"🚀 Creating PR with title: {pr_title}")
129
+
130
+ # Create commit with updated model card using CommitOperationAdd
131
+ from huggingface_hub import CommitOperationAdd
132
+
133
+ commit_info = hf_api.create_commit(
134
+ repo_id=repo_id,
135
+ operations=[
136
+ CommitOperationAdd(
137
+ path_in_repo="README.md", path_or_fileobj=str(card).encode("utf-8")
138
+ )
139
+ ],
140
+ commit_message=pr_title,
141
+ commit_description=pr_description,
142
+ token=HF_TOKEN,
143
+ create_pr=True,
144
+ )
145
+
146
+ # Extract PR URL from commit info
147
+ pr_url_attr = commit_info.pr_url
148
+ pr_url = pr_url_attr if hasattr(commit_info, "pr_url") else str(commit_info)
149
+
150
+ print(f"✅ PR created successfully! URL: {pr_url}")
151
+
152
+ result = {
153
+ "status": "success",
154
+ "repo_id": repo_id,
155
+ "tag": new_tag,
156
+ "pr_url": pr_url,
157
+ "previous_tags": current_tags,
158
+ "new_tags": updated_tags,
159
+ "message": f"Created PR to add tag '{new_tag}'",
160
+ }
161
+ json_str = json.dumps(result)
162
+ print(f"✅ add_new_tag success returning: {json_str}")
163
+ return json_str
164
+
165
+ except Exception as e:
166
+ print(f"❌ Error in add_new_tag: {str(e)}")
167
+ print(f"❌ Error type: {type(e)}")
168
+ import traceback
169
+
170
+ print(f"❌ Traceback: {traceback.format_exc()}")
171
+
172
+ error_result = {
173
+ "status": "error",
174
+ "repo_id": repo_id,
175
+ "tag": new_tag,
176
+ "error": str(e),
177
+ }
178
+ json_str = json.dumps(error_result)
179
+ print(f"❌ add_new_tag error returning: {json_str}")
180
+ return json_str
181
+
182
+
183
+ if __name__ == "__main__":
184
+ mcp.run()
pyproject.toml ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "mcp-course-unit3-example"
3
+ version = "0.1.0"
4
+ description = "FastAPI and Gradio app for Hugging Face Hub discussion webhooks"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "fastapi>=0.104.0",
9
+ "uvicorn[standard]>=0.24.0",
10
+ "gradio>=4.0.0",
11
+ "huggingface-hub[mcp]>=0.32.0",
12
+ "pydantic>=2.0.0",
13
+ "python-multipart>=0.0.6",
14
+ "requests>=2.31.0",
15
+ "python-dotenv>=1.0.0",
16
+ "fastmcp>=2.0.0",
17
+ ]
18
+
19
+ [build-system]
20
+ requires = ["hatchling"]
21
+ build-backend = "hatchling.build"
22
+
23
+ [tool.hatch.build.targets.wheel]
24
+ packages = ["src"]
requirements.txt ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv export --format requirements-txt --no-hashes
3
+ aiofiles==24.1.0
4
+ aiohappyeyeballs==2.6.1
5
+ aiohttp==3.12.2
6
+ aiosignal==1.3.2
7
+ annotated-types==0.7.0
8
+ anyio==4.9.0
9
+ attrs==25.3.0
10
+ audioop-lts==0.2.1 ; python_full_version >= '3.13'
11
+ certifi==2025.4.26
12
+ charset-normalizer==3.4.2
13
+ click==8.2.1
14
+ colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows'
15
+ exceptiongroup==1.3.0
16
+ fastapi==0.115.12
17
+ fastmcp==2.5.1
18
+ ffmpy==0.5.0
19
+ filelock==3.18.0
20
+ frozenlist==1.6.0
21
+ fsspec==2025.5.1
22
+ gradio==5.31.0
23
+ gradio-client==1.10.1
24
+ groovy==0.1.2
25
+ h11==0.16.0
26
+ hf-xet==1.1.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
27
+ httpcore==1.0.9
28
+ httptools==0.6.4
29
+ httpx==0.28.1
30
+ httpx-sse==0.4.0
31
+ huggingface-hub==0.32.2
32
+ idna==3.10
33
+ jinja2==3.1.6
34
+ markdown-it-py==3.0.0
35
+ markupsafe==3.0.2
36
+ mcp==1.9.1
37
+ mdurl==0.1.2
38
+ multidict==6.4.4
39
+ numpy==2.2.6
40
+ openapi-pydantic==0.5.1
41
+ orjson==3.10.18
42
+ packaging==25.0
43
+ pandas==2.2.3
44
+ pillow==11.2.1
45
+ propcache==0.3.1
46
+ pydantic==2.11.5
47
+ pydantic-core==2.33.2
48
+ pydantic-settings==2.9.1
49
+ pydub==0.25.1
50
+ pygments==2.19.1
51
+ python-dateutil==2.9.0.post0
52
+ python-dotenv==1.1.0
53
+ python-multipart==0.0.20
54
+ pytz==2025.2
55
+ pyyaml==6.0.2
56
+ requests==2.32.3
57
+ rich==14.0.0
58
+ ruff==0.11.11 ; sys_platform != 'emscripten'
59
+ safehttpx==0.1.6
60
+ semantic-version==2.10.0
61
+ shellingham==1.5.4
62
+ six==1.17.0
63
+ sniffio==1.3.1
64
+ sse-starlette==2.3.5
65
+ starlette==0.46.2
66
+ tomlkit==0.13.2
67
+ tqdm==4.67.1
68
+ typer==0.16.0
69
+ typing-extensions==4.13.2
70
+ typing-inspection==0.4.1
71
+ tzdata==2025.2
72
+ urllib3==2.4.0
73
+ uvicorn==0.34.2
74
+ uvloop==0.21.0 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
75
+ watchfiles==1.0.5
76
+ websockets==15.0.1
77
+ yarl==1.20.0
uv.lock ADDED
The diff for this file is too large to render. See raw diff