platma-retrain

Paused

Platma commited on Sep 11, 2024

Commit

1a11765

verified ·

1 Parent(s): f8ae299

Update src/main.py

Files changed (1) hide show

src/main.py CHANGED Viewed

@@ -88,7 +88,24 @@ def notify_success(project_id: str):
         description=message,
         token=HF_ACCESS_TOKEN,
     )
 NOTIFICATION_TEMPLATE = """\
 🌸 Hello there!

         description=message,
         token=HF_ACCESS_TOKEN,
     )
+def deploy_model(id: str):
+    url = "https://api.endpoints.huggingface.cloud/v2/endpoint/Platma"
+    data = {"compute": {"accelerator": "gpu", "instanceSize": "x1", "instanceType": "nvidia-l4",
+                        "scaling": {"maxReplica": 1, "minReplica": 1, "scaleToZeroTimeout":15}},
+            "model": {"framework": "pytorch", "image": {
+                "custom": {"health_route": "/health",
+                           "url": "ghcr.io/huggingface/text-generation-inference:sha-f852190",
+                           "env": {"MAX_BATCH_PREFILL_TOKENS": "2048", "MAX_INPUT_LENGTH": "1024",
+                                   "MAX_TOTAL_TOKENS": "1512",
+                                   "MODEL_ID": "/repository"}}},
+                      "repository": f"Platma/{id}",
+                      "secrets": {},
+                      "task": "text-generation"},
+            "name": "1726061674-dip", "provider": {"region": "us-east-1", "vendor": "aws"}, "type": "protected"}
+    headers = {"Authorization:" f"Bearer {HF_ACCESS_TOKEN}"}
+    r = requests.post(url, data=data, headers=headers).json()
+    print(r)
 NOTIFICATION_TEMPLATE = """\
 🌸 Hello there!