Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# app.py -
|
2 |
import os
|
3 |
import re
|
4 |
import json
|
@@ -6,18 +6,19 @@ from datetime import datetime
|
|
6 |
from typing import List, Dict, Any
|
7 |
|
8 |
import gradio as gr
|
9 |
-
import
|
|
|
|
|
10 |
import asyncio
|
11 |
-
from threading import Thread
|
12 |
|
13 |
-
# Configuration
|
14 |
WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET")
|
15 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
16 |
|
17 |
-
#
|
18 |
tag_operations_store: List[Dict[str, Any]] = []
|
19 |
|
20 |
-
#
|
21 |
RECOGNIZED_TAGS = {
|
22 |
"pytorch", "tensorflow", "jax", "transformers", "diffusers",
|
23 |
"text-generation", "text-classification", "question-answering",
|
@@ -29,9 +30,15 @@ RECOGNIZED_TAGS = {
|
|
29 |
"video-classification", "reinforcement-learning", "tabular-classification",
|
30 |
"tabular-regression", "time-series-forecasting", "graph-ml", "robotics",
|
31 |
"computer-vision", "nlp", "cv", "multimodal", "gguf", "safetensors",
|
32 |
-
"llamacpp", "onnx", "mlx"
|
33 |
}
|
34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
def extract_tags_from_text(text: str) -> List[str]:
|
36 |
"""Extract potential tags from discussion text"""
|
37 |
text_lower = text.lower()
|
@@ -66,9 +73,9 @@ def extract_tags_from_text(text: str) -> List[str]:
|
|
66 |
|
67 |
return valid_tags
|
68 |
|
69 |
-
def
|
70 |
-
"""Process tags using direct HuggingFace Hub API calls
|
71 |
-
print(
|
72 |
result_messages = []
|
73 |
|
74 |
if not HF_TOKEN:
|
@@ -188,7 +195,7 @@ This PR adds the `{tag}` tag to the {repo_type} repository.
|
|
188 |
error_msg = f"Processing failed: {str(e)}"
|
189 |
return [error_msg]
|
190 |
|
191 |
-
def process_webhook_comment(webhook_data: Dict[str, Any]):
|
192 |
"""Process webhook to detect and add tags"""
|
193 |
try:
|
194 |
comment_content = webhook_data["comment"]["content"]
|
@@ -206,11 +213,10 @@ def process_webhook_comment(webhook_data: Dict[str, Any]):
|
|
206 |
all_tags = list(set(comment_tags + title_tags))
|
207 |
|
208 |
if not all_tags:
|
209 |
-
print("β No tags found")
|
210 |
return "No recognizable tags found"
|
211 |
|
212 |
print(f"π·οΈ Found tags: {all_tags}")
|
213 |
-
result_messages =
|
214 |
|
215 |
# Store interaction
|
216 |
interaction = {
|
@@ -235,85 +241,90 @@ def process_webhook_comment(webhook_data: Dict[str, Any]):
|
|
235 |
print(f"β {error_msg}")
|
236 |
return error_msg
|
237 |
|
238 |
-
|
239 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
240 |
try:
|
241 |
-
|
242 |
-
|
243 |
-
if request.method != "POST":
|
244 |
-
return {"error": "Method not allowed"}
|
245 |
-
|
246 |
-
# Get request data
|
247 |
-
if hasattr(request, 'json') and callable(request.json):
|
248 |
-
payload = request.json()
|
249 |
-
else:
|
250 |
-
# Fallback for different request formats
|
251 |
-
return {"error": "Could not parse JSON"}
|
252 |
-
|
253 |
-
print(f"π₯ Webhook payload: {payload.get('event', {})}")
|
254 |
-
|
255 |
-
# Verify webhook secret if configured
|
256 |
-
if WEBHOOK_SECRET:
|
257 |
-
webhook_secret = request.headers.get("X-Webhook-Secret")
|
258 |
-
if webhook_secret != WEBHOOK_SECRET:
|
259 |
-
print("β Invalid webhook secret")
|
260 |
-
return {"error": "Invalid webhook secret"}
|
261 |
|
262 |
event = payload.get("event", {})
|
263 |
scope = event.get("scope")
|
264 |
action = event.get("action")
|
265 |
|
266 |
# Only process discussion comment creation (not PRs)
|
267 |
-
if (scope
|
268 |
action == "create" and
|
269 |
not payload.get("discussion", {}).get("isPullRequest", False)):
|
270 |
|
271 |
-
|
272 |
-
def process_in_background():
|
273 |
-
process_webhook_comment(payload)
|
274 |
-
|
275 |
-
thread = Thread(target=process_in_background)
|
276 |
-
thread.start()
|
277 |
-
|
278 |
return {"status": "processing"}
|
279 |
|
280 |
return {"status": "ignored"}
|
281 |
-
|
282 |
except Exception as e:
|
283 |
print(f"β Webhook error: {e}")
|
284 |
return {"error": str(e)}
|
285 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
286 |
def create_gradio_interface():
|
287 |
-
"""Create Gradio interface"""
|
288 |
with gr.Blocks(title="HF Tagging Bot", theme=gr.themes.Soft()) as interface:
|
289 |
gr.Markdown("# π·οΈ HuggingFace Tagging Bot")
|
290 |
gr.Markdown("*Automatically adds tags to repositories when mentioned in discussions*")
|
291 |
|
292 |
with gr.Tab("π Status"):
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
311 |
""")
|
312 |
|
313 |
with gr.Tab("π Operations Log"):
|
314 |
def get_recent_operations():
|
315 |
if not tag_operations_store:
|
316 |
-
return "No operations yet.
|
317 |
|
318 |
recent = tag_operations_store[-10:]
|
319 |
output = []
|
@@ -336,31 +347,7 @@ def create_gradio_interface():
|
|
336 |
refresh_btn = gr.Button("π Refresh Log")
|
337 |
refresh_btn.click(fn=get_recent_operations, outputs=operations_display)
|
338 |
|
339 |
-
with gr.Tab("
|
340 |
-
gr.Markdown("### Test Webhook Processing")
|
341 |
-
test_repo = gr.Textbox(label="Repository", value="asmaa105/streamlitweb1")
|
342 |
-
test_comment = gr.Textbox(label="Test Comment", value="Please add tags: pytorch, transformers", lines=3)
|
343 |
-
test_btn = gr.Button("π§ Test Processing")
|
344 |
-
test_result = gr.Textbox(label="Result", lines=5, interactive=False)
|
345 |
-
|
346 |
-
def test_webhook_processing(repo, comment):
|
347 |
-
try:
|
348 |
-
# Create mock webhook data
|
349 |
-
mock_webhook = {
|
350 |
-
"event": {"action": "create", "scope": "discussion"},
|
351 |
-
"comment": {"content": comment, "author": {"id": "test-user"}},
|
352 |
-
"discussion": {"title": "Test", "num": 1, "isPullRequest": False},
|
353 |
-
"repo": {"name": repo}
|
354 |
-
}
|
355 |
-
|
356 |
-
result = process_webhook_comment(mock_webhook)
|
357 |
-
return f"β
Test completed!\n\nResult: {result}"
|
358 |
-
except Exception as e:
|
359 |
-
return f"β Test failed: {str(e)}"
|
360 |
-
|
361 |
-
test_btn.click(fn=test_webhook_processing, inputs=[test_repo, test_comment], outputs=test_result)
|
362 |
-
|
363 |
-
with gr.Tab("π·οΈ Supported Tags"):
|
364 |
gr.Markdown(f"""
|
365 |
## Supported Tags ({len(RECOGNIZED_TAGS)} total)
|
366 |
{', '.join(sorted(RECOGNIZED_TAGS))}
|
@@ -376,7 +363,7 @@ def create_gradio_interface():
|
|
376 |
label="Test Comment",
|
377 |
placeholder="Enter a comment to test tag detection...",
|
378 |
lines=3,
|
379 |
-
value="
|
380 |
)
|
381 |
test_output = gr.Textbox(
|
382 |
label="Detected Tags",
|
@@ -401,13 +388,13 @@ def create_gradio_interface():
|
|
401 |
# Create the Gradio interface
|
402 |
demo = create_gradio_interface()
|
403 |
|
404 |
-
#
|
405 |
-
|
406 |
-
demo.add_api_route("/webhook", handle_webhook_request, methods=["POST"])
|
407 |
|
|
|
408 |
if __name__ == "__main__":
|
409 |
-
print("π HF Tagging Bot -
|
410 |
print(f"π HF_TOKEN: {'β
Configured' if HF_TOKEN else 'β Missing'}")
|
411 |
print(f"π Webhook Secret: {'β
Configured' if WEBHOOK_SECRET else 'β Missing'}")
|
412 |
-
|
413 |
-
|
|
|
1 |
+
# app.py - HF Spaces compatible version (Fixed webhook handling)
|
2 |
import os
|
3 |
import re
|
4 |
import json
|
|
|
6 |
from typing import List, Dict, Any
|
7 |
|
8 |
import gradio as gr
|
9 |
+
from fastapi import FastAPI, Request, BackgroundTasks
|
10 |
+
from fastapi.middleware.cors import CORSMiddleware
|
11 |
+
from pydantic import BaseModel
|
12 |
import asyncio
|
|
|
13 |
|
14 |
+
# Configuration - Use HF Spaces secrets
|
15 |
WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET")
|
16 |
HF_TOKEN = os.getenv("HF_TOKEN")
|
17 |
|
18 |
+
# Simple storage for processed tag operations
|
19 |
tag_operations_store: List[Dict[str, Any]] = []
|
20 |
|
21 |
+
# Common ML tags that we recognize for auto-tagging
|
22 |
RECOGNIZED_TAGS = {
|
23 |
"pytorch", "tensorflow", "jax", "transformers", "diffusers",
|
24 |
"text-generation", "text-classification", "question-answering",
|
|
|
30 |
"video-classification", "reinforcement-learning", "tabular-classification",
|
31 |
"tabular-regression", "time-series-forecasting", "graph-ml", "robotics",
|
32 |
"computer-vision", "nlp", "cv", "multimodal", "gguf", "safetensors",
|
33 |
+
"llamacpp", "onnx", "mlx"
|
34 |
}
|
35 |
|
36 |
+
class WebhookEvent(BaseModel):
|
37 |
+
event: Dict[str, str]
|
38 |
+
comment: Dict[str, Any]
|
39 |
+
discussion: Dict[str, Any]
|
40 |
+
repo: Dict[str, str]
|
41 |
+
|
42 |
def extract_tags_from_text(text: str) -> List[str]:
|
43 |
"""Extract potential tags from discussion text"""
|
44 |
text_lower = text.lower()
|
|
|
73 |
|
74 |
return valid_tags
|
75 |
|
76 |
+
async def process_tags_directly(all_tags: List[str], repo_name: str) -> List[str]:
|
77 |
+
"""Process tags using direct HuggingFace Hub API calls"""
|
78 |
+
print("π§ Using direct HuggingFace Hub API approach...")
|
79 |
result_messages = []
|
80 |
|
81 |
if not HF_TOKEN:
|
|
|
195 |
error_msg = f"Processing failed: {str(e)}"
|
196 |
return [error_msg]
|
197 |
|
198 |
+
async def process_webhook_comment(webhook_data: Dict[str, Any]):
|
199 |
"""Process webhook to detect and add tags"""
|
200 |
try:
|
201 |
comment_content = webhook_data["comment"]["content"]
|
|
|
213 |
all_tags = list(set(comment_tags + title_tags))
|
214 |
|
215 |
if not all_tags:
|
|
|
216 |
return "No recognizable tags found"
|
217 |
|
218 |
print(f"π·οΈ Found tags: {all_tags}")
|
219 |
+
result_messages = await process_tags_directly(all_tags, repo_name)
|
220 |
|
221 |
# Store interaction
|
222 |
interaction = {
|
|
|
241 |
print(f"β {error_msg}")
|
242 |
return error_msg
|
243 |
|
244 |
+
# Create FastAPI app for webhook handling
|
245 |
+
app = FastAPI(title="HF Tagging Bot API")
|
246 |
+
app.add_middleware(CORSMiddleware, allow_origins=["*"])
|
247 |
+
|
248 |
+
@app.post("/webhook")
|
249 |
+
async def webhook_handler(request: Request, background_tasks: BackgroundTasks):
|
250 |
+
"""Handle HF Hub webhooks"""
|
251 |
+
# Verify webhook secret if configured
|
252 |
+
if WEBHOOK_SECRET:
|
253 |
+
webhook_secret = request.headers.get("X-Webhook-Secret")
|
254 |
+
if webhook_secret != WEBHOOK_SECRET:
|
255 |
+
print("β Invalid webhook secret")
|
256 |
+
return {"error": "Invalid webhook secret"}
|
257 |
+
|
258 |
try:
|
259 |
+
payload = await request.json()
|
260 |
+
print(f"π₯ Received webhook: {payload.get('event', {})}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
|
262 |
event = payload.get("event", {})
|
263 |
scope = event.get("scope")
|
264 |
action = event.get("action")
|
265 |
|
266 |
# Only process discussion comment creation (not PRs)
|
267 |
+
if (scope == "discussion" and
|
268 |
action == "create" and
|
269 |
not payload.get("discussion", {}).get("isPullRequest", False)):
|
270 |
|
271 |
+
background_tasks.add_task(process_webhook_comment, payload)
|
|
|
|
|
|
|
|
|
|
|
|
|
272 |
return {"status": "processing"}
|
273 |
|
274 |
return {"status": "ignored"}
|
|
|
275 |
except Exception as e:
|
276 |
print(f"β Webhook error: {e}")
|
277 |
return {"error": str(e)}
|
278 |
|
279 |
+
@app.get("/health")
|
280 |
+
async def health_check():
|
281 |
+
return {
|
282 |
+
"status": "healthy",
|
283 |
+
"hf_token_configured": bool(HF_TOKEN),
|
284 |
+
"webhook_secret_configured": bool(WEBHOOK_SECRET),
|
285 |
+
"operations_processed": len(tag_operations_store)
|
286 |
+
}
|
287 |
+
|
288 |
+
@app.get("/")
|
289 |
+
async def root():
|
290 |
+
return {"message": "HF Tagging Bot is running! Visit /gradio for the interface."}
|
291 |
+
|
292 |
def create_gradio_interface():
|
293 |
+
"""Create Gradio interface for monitoring"""
|
294 |
with gr.Blocks(title="HF Tagging Bot", theme=gr.themes.Soft()) as interface:
|
295 |
gr.Markdown("# π·οΈ HuggingFace Tagging Bot")
|
296 |
gr.Markdown("*Automatically adds tags to repositories when mentioned in discussions*")
|
297 |
|
298 |
with gr.Tab("π Status"):
|
299 |
+
gr.Markdown(f"""
|
300 |
+
## Bot Configuration
|
301 |
+
- π **HF Token**: {'β
Configured' if HF_TOKEN else 'β Missing'}
|
302 |
+
- π **Webhook Secret**: {'β
Configured' if WEBHOOK_SECRET else 'β Missing'}
|
303 |
+
- π **Operations Processed**: {len(tag_operations_store)}
|
304 |
+
|
305 |
+
## Setup Instructions
|
306 |
+
1. **Add webhook to your repository**:
|
307 |
+
- Go to repository Settings β Webhooks
|
308 |
+
- Add webhook URL: `https://your-space-name.hf.space/webhook`
|
309 |
+
- Select "Discussion comments" events
|
310 |
+
- Add your webhook secret (optional)
|
311 |
+
|
312 |
+
2. **In discussions, mention tags**:
|
313 |
+
- "Please add tags: pytorch, transformers"
|
314 |
+
- "This needs #pytorch and #text-generation"
|
315 |
+
- "tag: computer-vision"
|
316 |
+
|
317 |
+
## Webhook Endpoint
|
318 |
+
`POST https://your-space-name.hf.space/webhook`
|
319 |
+
|
320 |
+
## Health Check
|
321 |
+
Visit: `https://your-space-name.hf.space/health`
|
322 |
""")
|
323 |
|
324 |
with gr.Tab("π Operations Log"):
|
325 |
def get_recent_operations():
|
326 |
if not tag_operations_store:
|
327 |
+
return "No operations yet. Configure webhooks and post comments with tags to see activity here."
|
328 |
|
329 |
recent = tag_operations_store[-10:]
|
330 |
output = []
|
|
|
347 |
refresh_btn = gr.Button("π Refresh Log")
|
348 |
refresh_btn.click(fn=get_recent_operations, outputs=operations_display)
|
349 |
|
350 |
+
with gr.Tab("π·οΈ Tags & Testing"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
351 |
gr.Markdown(f"""
|
352 |
## Supported Tags ({len(RECOGNIZED_TAGS)} total)
|
353 |
{', '.join(sorted(RECOGNIZED_TAGS))}
|
|
|
363 |
label="Test Comment",
|
364 |
placeholder="Enter a comment to test tag detection...",
|
365 |
lines=3,
|
366 |
+
value="This model should have tags: pytorch, text-generation"
|
367 |
)
|
368 |
test_output = gr.Textbox(
|
369 |
label="Detected Tags",
|
|
|
388 |
# Create the Gradio interface
|
389 |
demo = create_gradio_interface()
|
390 |
|
391 |
+
# Mount Gradio app to FastAPI
|
392 |
+
app = gr.mount_gradio_app(app, demo, path="/")
|
|
|
393 |
|
394 |
+
# This is what HF Spaces will use
|
395 |
if __name__ == "__main__":
|
396 |
+
print("π HF Tagging Bot - Starting with FastAPI + Gradio")
|
397 |
print(f"π HF_TOKEN: {'β
Configured' if HF_TOKEN else 'β Missing'}")
|
398 |
print(f"π Webhook Secret: {'β
Configured' if WEBHOOK_SECRET else 'β Missing'}")
|
399 |
+
import uvicorn
|
400 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|