Spaces:

asmaa105
/

tag-this-repo

Sleeping

App Files Files Community

asmaa105 commited on Jun 20

Commit

b4b3790

verified ·

1 Parent(s): 61bd34c

Upload 10 files

Browse files

Files changed (10) hide show

.gitattributes +35 -35
.python-version +1 -0
Dockerfile +33 -0
README.md +21 -12
app.py +524 -0
env.example +12 -0
mcp_server.py +184 -0
pyproject.toml +24 -0
requirements.txt +77 -0
uv.lock +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,35 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

Dockerfile ADDED Viewed

	@@ -0,0 +1,33 @@

+FROM python:3.11-slim
+# Set working directory
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Copy project files
+COPY pyproject.toml .
+COPY server.py .
+COPY mcp_server.py .
+COPY env.example .
+COPY README.md .
+# Install Python dependencies
+RUN pip install --no-cache-dir -e .
+# Create a non-root user
+RUN useradd -m -u 1000 appuser && chown -R appuser:appuser /app
+USER appuser
+# Expose port
+EXPOSE 8000
+# Health check
+HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:8000/ || exit 1
+# Run the application
+CMD ["python", "server.py"]

README.md CHANGED Viewed

@@ -1,12 +1,21 @@
----
-title: Tag This Repo
-emoji: 🌍
-colorFrom: gray
-colorTo: purple
-sdk: gradio
-sdk_version: 5.34.2
-app_file: app.py
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: tag-a-repo bot
+emoji: 👀
+colorFrom: purple
+colorTo: yellow
+sdk: gradio
+sdk_version: 5.31.0
+app_file: app.py
+pinned: false
+base_path: /gradio
+---
+# HF Tagging Bot
+This is a bot that tags HuggingFace models when they are mentioned in discussions.
+## How it works
+1. The bot listens to discussions on the HuggingFace Hub
+2. When a discussion is created, the bot checks for tag mentions in the comment
+3. If a tag is mentioned, the bot adds the tag to the model repository via a PR

app.py ADDED Viewed

	@@ -0,0 +1,524 @@

+import os
+import re
+import json
+from datetime import datetime
+from typing import List, Dict, Any, Optional, Literal
+from fastapi import FastAPI, Request, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+import gradio as gr
+import uvicorn
+from pydantic import BaseModel
+from huggingface_hub.inference._mcp.agent import Agent
+from dotenv import load_dotenv
+load_dotenv()
+# Configuration
+WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET", "5a775af722adc63d0b895454e3fb7a50cbc62bfa3f97e37d50d1a986c91d8781")
+HF_TOKEN = os.getenv("HF_TOKEN")
+HF_MODEL = os.getenv("HF_MODEL", "microsoft/DialoGPT-medium")
+# Use a valid provider literal from the documentation
+DEFAULT_PROVIDER: Literal["hf-inference"] = "hf-inference"
+HF_PROVIDER = os.getenv("HF_PROVIDER", DEFAULT_PROVIDER)
+# Simple storage for processed tag operations
+tag_operations_store: List[Dict[str, Any]] = []
+# Agent instance
+agent_instance: Optional[Agent] = None
+# Common ML tags that we recognize for auto-tagging
+RECOGNIZED_TAGS = {
+    "pytorch",
+    "tensorflow",
+    "jax",
+    "transformers",
+    "diffusers",
+    "text-generation",
+    "text-classification",
+    "question-answering",
+    "text-to-image",
+    "image-classification",
+    "object-detection",
+    "   ",
+    "fill-mask",
+    "token-classification",
+    "translation",
+    "summarization",
+    "feature-extraction",
+    "sentence-similarity",
+    "zero-shot-classification",
+    "image-to-text",
+    "automatic-speech-recognition",
+    "audio-classification",
+    "voice-activity-detection",
+    "depth-estimation",
+    "image-segmentation",
+    "video-classification",
+    "reinforcement-learning",
+    "tabular-classification",
+    "tabular-regression",
+    "time-series-forecasting",
+    "graph-ml",
+    "robotics",
+    "computer-vision",
+    "nlp",
+    "cv",
+    "multimodal",
+}
+class WebhookEvent(BaseModel):
+    event: Dict[str, str]
+    comment: Dict[str, Any]
+    discussion: Dict[str, Any]
+    repo: Dict[str, str]
+app = FastAPI(title="HF Tagging Bot")
+app.add_middleware(CORSMiddleware, allow_origins=["*"])
+async def get_agent():
+    """Get or create Agent instance"""
+    print("🤖 get_agent() called...")
+    global agent_instance
+    if agent_instance is None and HF_TOKEN:
+        print("🔧 Creating new Agent instance...")
+        print(f"🔑 HF_TOKEN present: {bool(HF_TOKEN)}")
+        print(f"🤖 Model: {HF_MODEL}")
+        print(f"🔗 Provider: {DEFAULT_PROVIDER}")
+        try:
+            agent_instance = Agent(
+                model=HF_MODEL,
+                provider=DEFAULT_PROVIDER,
+                api_key=HF_TOKEN,
+                servers=[
+                    {
+                        "type": "stdio",
+                        "config": {
+                            "command": "python",
+                            "args": ["mcp_server.py"],
+                            "cwd": ".",  # Ensure correct working directory
+                            "env": {"HF_TOKEN": HF_TOKEN} if HF_TOKEN else {},
+                        },
+                    }
+                ],
+            )
+            print("✅ Agent instance created successfully")
+            print("🔧 Loading tools...")
+            await agent_instance.load_tools()
+            print("✅ Tools loaded successfully")
+        except Exception as e:
+            print(f"❌ Error creating/loading agent: {str(e)}")
+            agent_instance = None
+    elif agent_instance is None:
+        print("❌ No HF_TOKEN available, cannot create agent")
+    else:
+        print("✅ Using existing agent instance")
+    return agent_instance
+def extract_tags_from_text(text: str) -> List[str]:
+    """Extract potential tags from discussion text"""
+    text_lower = text.lower()
+    # Look for explicit tag mentions like "tag: pytorch" or "#pytorch"
+    explicit_tags = []
+    # Pattern 1: "tag: something" or "tags: something"
+    tag_pattern = r"tags?:\s*([a-zA-Z0-9-_,\s]+)"
+    matches = re.findall(tag_pattern, text_lower)
+    for match in matches:
+        # Split by comma and clean up
+        tags = [tag.strip() for tag in match.split(",")]
+        explicit_tags.extend(tags)
+    # Pattern 2: "#hashtag" style
+    hashtag_pattern = r"#([a-zA-Z0-9-_]+)"
+    hashtag_matches = re.findall(hashtag_pattern, text_lower)
+    explicit_tags.extend(hashtag_matches)
+    # Pattern 3: Look for recognized tags mentioned in natural text
+    mentioned_tags = []
+    for tag in RECOGNIZED_TAGS:
+        if tag in text_lower:
+            mentioned_tags.append(tag)
+    # Combine and deduplicate
+    all_tags = list(set(explicit_tags + mentioned_tags))
+    # Filter to only include recognized tags or explicitly mentioned ones
+    valid_tags = []
+    for tag in all_tags:
+        if tag in RECOGNIZED_TAGS or tag in explicit_tags:
+            valid_tags.append(tag)
+    return valid_tags
+async def process_webhook_comment(webhook_data: Dict[str, Any]):
+    """Process webhook to detect and add tags"""
+    print("🏷️ Starting process_webhook_comment...")
+    try:
+        comment_content = webhook_data["comment"]["content"]
+        discussion_title = webhook_data["discussion"]["title"]
+        repo_name = webhook_data["repo"]["name"]
+        discussion_num = webhook_data["discussion"]["num"]
+        # Author is an object with "id" field
+        comment_author = webhook_data["comment"]["author"].get("id", "unknown")
+        print(f"📝 Comment content: {comment_content}")
+        print(f"📰 Discussion title: {discussion_title}")
+        print(f"📦 Repository: {repo_name}")
+        # Extract potential tags from the comment and discussion title
+        comment_tags = extract_tags_from_text(comment_content)
+        title_tags = extract_tags_from_text(discussion_title)
+        all_tags = list(set(comment_tags + title_tags))
+        print(f"🔍 Comment tags found: {comment_tags}")
+        print(f"🔍 Title tags found: {title_tags}")
+        print(f"🏷️ All unique tags: {all_tags}")
+        result_messages = []
+        if not all_tags:
+            msg = "No recognizable tags found in the discussion."
+            print(f"❌ {msg}")
+            result_messages.append(msg)
+        else:
+            print("🤖 Getting agent instance...")
+            agent = await get_agent()
+            if not agent:
+                msg = "Error: Agent not configured (missing HF_TOKEN)"
+                print(f"❌ {msg}")
+                result_messages.append(msg)
+            else:
+                print("✅ Agent instance obtained successfully")
+                # Process all tags in a single conversation with the agent
+                try:
+                    # Create a comprehensive prompt for the agent
+                    user_prompt = f"""
+I need to add the following tags to the repository '{repo_name}': {", ".join(all_tags)}
+For each tag, please:
+1. Check if the tag already exists on the repository using get_current_tags
+2. If the tag doesn't exist, add it using add_new_tag
+3. Provide a summary of what was done for each tag
+Please process all {len(all_tags)} tags: {", ".join(all_tags)}
+"""
+                    print("💬 Sending comprehensive prompt to agent...")
+                    print(f"📝 Prompt: {user_prompt}")
+                    # Let the agent handle the entire conversation
+                    conversation_result = []
+                    try:
+                        async for item in agent.run(user_prompt):
+                            # The agent yields different types of items
+                            item_str = str(item)
+                            conversation_result.append(item_str)
+                            # Log important events
+                            if (
+                                "tool_call" in item_str.lower()
+                                or "function" in item_str.lower()
+                            ):
+                                print(f"🔧 Agent using tools: {item_str[:200]}...")
+                            elif "content" in item_str and len(item_str) < 500:
+                                print(f"💭 Agent response: {item_str}")
+                        # Extract the final response from the conversation
+                        full_response = " ".join(conversation_result)
+                        print(f"📋 Agent conversation completed successfully")
+                        # Try to extract meaningful results for each tag
+                        for tag in all_tags:
+                            tag_mentioned = tag.lower() in full_response.lower()
+                            if (
+                                "already exists" in full_response.lower()
+                                and tag_mentioned
+                            ):
+                                msg = f"Tag '{tag}': Already exists"
+                            elif (
+                                "pr" in full_response.lower()
+                                or "pull request" in full_response.lower()
+                            ):
+                                if tag_mentioned:
+                                    msg = f"Tag '{tag}': PR created successfully"
+                                else:
+                                    msg = (
+                                        f"Tag '{tag}': Processed "
+                                        "(PR may have been created)"
+                                    )
+                            elif "success" in full_response.lower() and tag_mentioned:
+                                msg = f"Tag '{tag}': Successfully processed"
+                            elif "error" in full_response.lower() and tag_mentioned:
+                                msg = f"Tag '{tag}': Error during processing"
+                            else:
+                                msg = f"Tag '{tag}': Processed by agent"
+                            print(f"✅ Result for tag '{tag}': {msg}")
+                            result_messages.append(msg)
+                    except Exception as agent_error:
+                        print(f"⚠️ Agent streaming failed: {str(agent_error)}")
+                        print("🔄 Falling back to direct MCP tool calls...")
+                        # Import the MCP server functions directly as fallback
+                        try:
+                            import sys
+                            import importlib.util
+                            # Load the MCP server module
+                            spec = importlib.util.spec_from_file_location(
+                                "mcp_server", "./mcp_server.py"
+                            )
+                            mcp_module = importlib.util.module_from_spec(spec)
+                            spec.loader.exec_module(mcp_module)
+                            # Use the MCP tools directly for each tag
+                            for tag in all_tags:
+                                try:
+                                    print(
+                                        f"🔧 Directly calling get_current_tags for '{tag}'"
+                                    )
+                                    current_tags_result = mcp_module.get_current_tags(
+                                        repo_name
+                                    )
+                                    print(
+                                        f"📄 Current tags result: {current_tags_result}"
+                                    )
+                                    # Parse the JSON result
+                                    import json
+                                    tags_data = json.loads(current_tags_result)
+                                    if tags_data.get("status") == "success":
+                                        current_tags = tags_data.get("current_tags", [])
+                                        if tag in current_tags:
+                                            msg = f"Tag '{tag}': Already exists"
+                                            print(f"✅ {msg}")
+                                        else:
+                                            print(
+                                                f"🔧 Directly calling add_new_tag for '{tag}'"
+                                            )
+                                            add_result = mcp_module.add_new_tag(
+                                                repo_name, tag
+                                            )
+                                            print(f"📄 Add tag result: {add_result}")
+                                            add_data = json.loads(add_result)
+                                            if add_data.get("status") == "success":
+                                                pr_url = add_data.get("pr_url", "")
+                                                msg = f"Tag '{tag}': PR created - {pr_url}"
+                                            elif (
+                                                add_data.get("status")
+                                                == "already_exists"
+                                            ):
+                                                msg = f"Tag '{tag}': Already exists"
+                                            else:
+                                                msg = f"Tag '{tag}': {add_data.get('message', 'Processed')}"
+                                            print(f"✅ {msg}")
+                                    else:
+                                        error_msg = tags_data.get(
+                                            "error", "Unknown error"
+                                        )
+                                        msg = f"Tag '{tag}': Error - {error_msg}"
+                                        print(f"❌ {msg}")
+                                    result_messages.append(msg)
+                                except Exception as direct_error:
+                                    error_msg = f"Tag '{tag}': Direct call error - {str(direct_error)}"
+                                    print(f"❌ {error_msg}")
+                                    result_messages.append(error_msg)
+                        except Exception as fallback_error:
+                            error_msg = (
+                                f"Fallback approach failed: {str(fallback_error)}"
+                            )
+                            print(f"❌ {error_msg}")
+                            result_messages.append(error_msg)
+                except Exception as e:
+                    error_msg = f"Error during agent processing: {str(e)}"
+                    print(f"❌ {error_msg}")
+                    result_messages.append(error_msg)
+        # Store the interaction
+        base_url = "https://huggingface.co"
+        discussion_url = f"{base_url}/{repo_name}/discussions/{discussion_num}"
+        interaction = {
+            "timestamp": datetime.now().isoformat(),
+            "repo": repo_name,
+            "discussion_title": discussion_title,
+            "discussion_num": discussion_num,
+            "discussion_url": discussion_url,
+            "original_comment": comment_content,
+            "comment_author": comment_author,
+            "detected_tags": all_tags,
+            "results": result_messages,
+        }
+        tag_operations_store.append(interaction)
+        final_result = " | ".join(result_messages)
+        print(f"💾 Stored interaction and returning result: {final_result}")
+        return final_result
+    except Exception as e:
+        error_msg = f"❌ Fatal error in process_webhook_comment: {str(e)}"
+        print(error_msg)
+        return error_msg
+@app.post("/webhook")
+async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
+    """Handle HF Hub webhooks"""
+    webhook_secret = request.headers.get("X-Webhook-Secret")
+    if webhook_secret != WEBHOOK_SECRET:
+        print("❌ Invalid webhook secret")
+        return {"error": "Invalid webhook secret"}
+    payload = await request.json()
+    print(f"📥 Received webhook payload: {json.dumps(payload, indent=2)}")
+    event = payload.get("event", {})
+    scope = event.get("scope")
+    action = event.get("action")
+    print(f"🔍 Event details - scope: {scope}, action: {action}")
+    # Check if this is a discussion comment creation
+    scope_check = scope == "discussion"
+    action_check = action == "create"
+    not_pr = not payload["discussion"]["isPullRequest"]
+    scope_check = scope_check and not_pr
+    print(f"✅ not_pr: {not_pr}")
+    print(f"✅ scope_check: {scope_check}")
+    print(f"✅ action_check: {action_check}")
+    if scope_check and action_check:
+        # Verify we have the required fields
+        required_fields = ["comment", "discussion", "repo"]
+        missing_fields = [field for field in required_fields if field not in payload]
+        if missing_fields:
+            error_msg = f"Missing required fields: {missing_fields}"
+            print(f"❌ {error_msg}")
+            return {"error": error_msg}
+        print(f"🚀 Processing webhook for repo: {payload['repo']['name']}")
+        background_tasks.add_task(process_webhook_comment, payload)
+        return {"status": "processing"}
+    print(f"⏭️ Ignoring webhook - scope: {scope}, action: {action}")
+    return {"status": "ignored"}
+async def simulate_webhook(
+    repo_name: str, discussion_title: str, comment_content: str
+) -> str:
+    """Simulate webhook for testing"""
+    if not all([repo_name, discussion_title, comment_content]):
+        return "Please fill in all fields."
+    mock_payload = {
+        "event": {"action": "create", "scope": "discussion"},
+        "comment": {
+            "content": comment_content,
+            "author": {"id": "test-user-id"},
+            "id": "mock-comment-id",
+            "hidden": False,
+        },
+        "discussion": {
+            "title": discussion_title,
+            "num": len(tag_operations_store) + 1,
+            "id": "mock-discussion-id",
+            "status": "open",
+            "isPullRequest": False,
+        },
+        "repo": {
+            "name": repo_name,
+            "type": "model",
+            "private": False,
+        },
+    }
+    response = await process_webhook_comment(mock_payload)
+    return f"✅ Processed! Results: {response}"
+def create_gradio_app():
+    """Create Gradio interface"""
+    with gr.Blocks(title="HF Tagging Bot", theme=gr.themes.Soft()) as demo:
+        gr.Markdown("# 🏷️ HF Tagging Bot Dashboard")
+        gr.Markdown("*Automatically adds tags to models when mentioned in discussions*")
+        gr.Markdown("""
+        ## How it works:
+        - Monitors HuggingFace Hub discussions
+        - Detects tag mentions in comments (e.g., "tag: pytorch",
+          "#transformers")
+        - Automatically adds recognized tags to the model repository
+        - Supports common ML tags like: pytorch, tensorflow,
+          text-generation, etc.
+        """)
+        with gr.Column():
+            sim_repo = gr.Textbox(
+                label="Repository",
+                value="burtenshaw/play-mcp-repo-bot",
+                placeholder="username/model-name",
+            )
+            sim_title = gr.Textbox(
+                label="Discussion Title",
+                value="Add pytorch tag",
+                placeholder="Discussion title",
+            )
+            sim_comment = gr.Textbox(
+                label="Comment",
+                lines=3,
+                value="This model should have tags: pytorch, text-generation",
+                placeholder="Comment mentioning tags...",
+            )
+            sim_btn = gr.Button("🏷️ Test Tag Detection")
+        with gr.Column():
+            sim_result = gr.Textbox(label="Result", lines=8)
+        sim_btn.click(
+            fn=simulate_webhook,
+            inputs=[sim_repo, sim_title, sim_comment],
+            outputs=sim_result,
+        )
+        gr.Markdown(f"""
+        ## Recognized Tags:
+        {", ".join(sorted(RECOGNIZED_TAGS))}
+        """)
+    return demo
+# Mount Gradio app
+gradio_app = create_gradio_app()
+app = gr.mount_gradio_app(app, gradio_app, path="/gradio")
+if __name__ == "__main__":
+    print("🚀 Starting HF Tagging Bot...")
+    print("📊 Dashboard: http://localhost:7860/gradio")
+    print("🔗 Webhook: http://localhost:7860/webhook")
+    uvicorn.run("app:app", host="0.0.0.0", port=7860, reload=True)

env.example ADDED Viewed

	@@ -0,0 +1,12 @@

+# Webhook Configuration
+WEBHOOK_SECRET=your-webhook-secret-here
+# Hugging Face Configuration
+HF_TOKEN=your-huggingface-token-here
+# Model Configuration (optional)
+HF_MODEL=microsoft/DialoGPT-medium
+HF_PROVIDER=huggingface
+# Optional: Custom bot username for mention detection
+BOT_USERNAME=discussion-bot

mcp_server.py ADDED Viewed

	@@ -0,0 +1,184 @@

+#!/usr/bin/env python3
+"""
+Simplified MCP Server for HuggingFace Hub Tagging Operations using FastMCP
+"""
+import os
+import json
+from fastmcp import FastMCP
+from huggingface_hub import HfApi, model_info, ModelCard, ModelCardData
+from huggingface_hub.utils import HfHubHTTPError
+from dotenv import load_dotenv
+load_dotenv()
+# Configuration
+HF_TOKEN = os.getenv("HF_TOKEN")
+# Initialize HF API client
+hf_api = HfApi(token=HF_TOKEN) if HF_TOKEN else None
+# Create the FastMCP server
+mcp = FastMCP("hf-tagging-bot")
+@mcp.tool()
+def get_current_tags(repo_id: str) -> str:
+    """Get current tags from a HuggingFace model repository"""
+    print(f"🔧 get_current_tags called with repo_id: {repo_id}")
+    if not hf_api:
+        error_result = {"error": "HF token not configured"}
+        json_str = json.dumps(error_result)
+        print(f"❌ No HF API token - returning: {json_str}")
+        return json_str
+    try:
+        print(f"📡 Fetching model info for: {repo_id}")
+        info = model_info(repo_id=repo_id, token=HF_TOKEN)
+        current_tags = info.tags if info.tags else []
+        print(f"🏷️ Found {len(current_tags)} tags: {current_tags}")
+        result = {
+            "status": "success",
+            "repo_id": repo_id,
+            "current_tags": current_tags,
+            "count": len(current_tags),
+        }
+        json_str = json.dumps(result)
+        print(f"✅ get_current_tags returning: {json_str}")
+        return json_str
+    except Exception as e:
+        print(f"❌ Error in get_current_tags: {str(e)}")
+        error_result = {"status": "error", "repo_id": repo_id, "error": str(e)}
+        json_str = json.dumps(error_result)
+        print(f"❌ get_current_tags error returning: {json_str}")
+        return json_str
+@mcp.tool()
+def add_new_tag(repo_id: str, new_tag: str) -> str:
+    """Add a new tag to a HuggingFace model repository via PR"""
+    print(f"🔧 add_new_tag called with repo_id: {repo_id}, new_tag: {new_tag}")
+    if not hf_api:
+        error_result = {"error": "HF token not configured"}
+        json_str = json.dumps(error_result)
+        print(f"❌ No HF API token - returning: {json_str}")
+        return json_str
+    try:
+        # Get current model info and tags
+        print(f"📡 Fetching current model info for: {repo_id}")
+        info = model_info(repo_id=repo_id, token=HF_TOKEN)
+        current_tags = info.tags if info.tags else []
+        print(f"🏷️ Current tags: {current_tags}")
+        # Check if tag already exists
+        if new_tag in current_tags:
+            print(f"⚠️ Tag '{new_tag}' already exists in {current_tags}")
+            result = {
+                "status": "already_exists",
+                "repo_id": repo_id,
+                "tag": new_tag,
+                "message": f"Tag '{new_tag}' already exists",
+            }
+            json_str = json.dumps(result)
+            print(f"🏷️ add_new_tag (already exists) returning: {json_str}")
+            return json_str
+        # Add the new tag to existing tags
+        updated_tags = current_tags + [new_tag]
+        print(f"🆕 Will update tags from {current_tags} to {updated_tags}")
+        # Create model card content with updated tags
+        try:
+            # Load existing model card
+            print(f"📄 Loading existing model card...")
+            card = ModelCard.load(repo_id, token=HF_TOKEN)
+            if not hasattr(card, "data") or card.data is None:
+                card.data = ModelCardData()
+        except HfHubHTTPError:
+            # Create new model card if none exists
+            print(f"📄 Creating new model card (none exists)")
+            card = ModelCard("")
+            card.data = ModelCardData()
+        # Update tags - create new ModelCardData with updated tags
+        card_dict = card.data.to_dict()
+        card_dict["tags"] = updated_tags
+        card.data = ModelCardData(**card_dict)
+        # Create a pull request with the updated model card
+        pr_title = f"Add '{new_tag}' tag"
+        pr_description = f"""
+## Add tag: {new_tag}
+This PR adds the `{new_tag}` tag to the model repository.
+**Changes:**
+- Added `{new_tag}` to model tags
+- Updated from {len(current_tags)} to {len(updated_tags)} tags
+**Current tags:** {", ".join(current_tags) if current_tags else "None"}
+**New tags:** {", ".join(updated_tags)}
+"""
+        print(f"🚀 Creating PR with title: {pr_title}")
+        # Create commit with updated model card using CommitOperationAdd
+        from huggingface_hub import CommitOperationAdd
+        commit_info = hf_api.create_commit(
+            repo_id=repo_id,
+            operations=[
+                CommitOperationAdd(
+                    path_in_repo="README.md", path_or_fileobj=str(card).encode("utf-8")
+                )
+            ],
+            commit_message=pr_title,
+            commit_description=pr_description,
+            token=HF_TOKEN,
+            create_pr=True,
+        )
+        # Extract PR URL from commit info
+        pr_url_attr = commit_info.pr_url
+        pr_url = pr_url_attr if hasattr(commit_info, "pr_url") else str(commit_info)
+        print(f"✅ PR created successfully! URL: {pr_url}")
+        result = {
+            "status": "success",
+            "repo_id": repo_id,
+            "tag": new_tag,
+            "pr_url": pr_url,
+            "previous_tags": current_tags,
+            "new_tags": updated_tags,
+            "message": f"Created PR to add tag '{new_tag}'",
+        }
+        json_str = json.dumps(result)
+        print(f"✅ add_new_tag success returning: {json_str}")
+        return json_str
+    except Exception as e:
+        print(f"❌ Error in add_new_tag: {str(e)}")
+        print(f"❌ Error type: {type(e)}")
+        import traceback
+        print(f"❌ Traceback: {traceback.format_exc()}")
+        error_result = {
+            "status": "error",
+            "repo_id": repo_id,
+            "tag": new_tag,
+            "error": str(e),
+        }
+        json_str = json.dumps(error_result)
+        print(f"❌ add_new_tag error returning: {json_str}")
+        return json_str
+if __name__ == "__main__":
+    mcp.run()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,24 @@

+[project]
+name = "mcp-course-unit3-example"
+version = "0.1.0"
+description = "FastAPI and Gradio app for Hugging Face Hub discussion webhooks"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "fastapi>=0.104.0",
+    "uvicorn[standard]>=0.24.0",
+    "gradio>=4.0.0",
+    "huggingface-hub[mcp]>=0.32.0",
+    "pydantic>=2.0.0",
+    "python-multipart>=0.0.6",
+    "requests>=2.31.0",
+    "python-dotenv>=1.0.0",
+    "fastmcp>=2.0.0",
+]
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[tool.hatch.build.targets.wheel]
+packages = ["src"]

requirements.txt ADDED Viewed

	@@ -0,0 +1,77 @@

+# This file was autogenerated by uv via the following command:
+#    uv export --format requirements-txt --no-hashes
+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.2
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+audioop-lts==0.2.1 ; python_full_version >= '3.13'
+certifi==2025.4.26
+charset-normalizer==3.4.2
+click==8.2.1
+colorama==0.4.6 ; sys_platform == 'win32' or platform_system == 'Windows'
+exceptiongroup==1.3.0
+fastapi==0.115.12
+fastmcp==2.5.1
+ffmpy==0.5.0
+filelock==3.18.0
+frozenlist==1.6.0
+fsspec==2025.5.1
+gradio==5.31.0
+gradio-client==1.10.1
+groovy==0.1.2
+h11==0.16.0
+hf-xet==1.1.2 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+httpcore==1.0.9
+httptools==0.6.4
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.32.2
+idna==3.10
+jinja2==3.1.6
+markdown-it-py==3.0.0
+markupsafe==3.0.2
+mcp==1.9.1
+mdurl==0.1.2
+multidict==6.4.4
+numpy==2.2.6
+openapi-pydantic==0.5.1
+orjson==3.10.18
+packaging==25.0
+pandas==2.2.3
+pillow==11.2.1
+propcache==0.3.1
+pydantic==2.11.5
+pydantic-core==2.33.2
+pydantic-settings==2.9.1
+pydub==0.25.1
+pygments==2.19.1
+python-dateutil==2.9.0.post0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pytz==2025.2
+pyyaml==6.0.2
+requests==2.32.3
+rich==14.0.0
+ruff==0.11.11 ; sys_platform != 'emscripten'
+safehttpx==0.1.6
+semantic-version==2.10.0
+shellingham==1.5.4
+six==1.17.0
+sniffio==1.3.1
+sse-starlette==2.3.5
+starlette==0.46.2
+tomlkit==0.13.2
+tqdm==4.67.1
+typer==0.16.0
+typing-extensions==4.13.2
+typing-inspection==0.4.1
+tzdata==2025.2
+urllib3==2.4.0
+uvicorn==0.34.2
+uvloop==0.21.0 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
+watchfiles==1.0.5
+websockets==15.0.1
+yarl==1.20.0

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff