Spaces:
Paused
Paused
Update src/main.py
Browse files- src/main.py +18 -1
src/main.py
CHANGED
@@ -88,7 +88,24 @@ def notify_success(project_id: str):
|
|
88 |
description=message,
|
89 |
token=HF_ACCESS_TOKEN,
|
90 |
)
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
|
93 |
NOTIFICATION_TEMPLATE = """\
|
94 |
🌸 Hello there!
|
|
|
88 |
description=message,
|
89 |
token=HF_ACCESS_TOKEN,
|
90 |
)
|
91 |
+
|
92 |
+
def deploy_model(id: str):
|
93 |
+
url = "https://api.endpoints.huggingface.cloud/v2/endpoint/Platma"
|
94 |
+
data = {"compute": {"accelerator": "gpu", "instanceSize": "x1", "instanceType": "nvidia-l4",
|
95 |
+
"scaling": {"maxReplica": 1, "minReplica": 1, "scaleToZeroTimeout":15}},
|
96 |
+
"model": {"framework": "pytorch", "image": {
|
97 |
+
"custom": {"health_route": "/health",
|
98 |
+
"url": "ghcr.io/huggingface/text-generation-inference:sha-f852190",
|
99 |
+
"env": {"MAX_BATCH_PREFILL_TOKENS": "2048", "MAX_INPUT_LENGTH": "1024",
|
100 |
+
"MAX_TOTAL_TOKENS": "1512",
|
101 |
+
"MODEL_ID": "/repository"}}},
|
102 |
+
"repository": f"Platma/{id}",
|
103 |
+
"secrets": {},
|
104 |
+
"task": "text-generation"},
|
105 |
+
"name": "1726061674-dip", "provider": {"region": "us-east-1", "vendor": "aws"}, "type": "protected"}
|
106 |
+
headers = {"Authorization:" f"Bearer {HF_ACCESS_TOKEN}"}
|
107 |
+
r = requests.post(url, data=data, headers=headers).json()
|
108 |
+
print(r)
|
109 |
|
110 |
NOTIFICATION_TEMPLATE = """\
|
111 |
🌸 Hello there!
|