platma-retrain

Paused

App Files Files Community

Platma commited on Sep 12, 2024

Commit

7794767

verified ·

1 Parent(s): 89ca032

Update src/main.py

Browse files

Files changed (1) hide show

src/main.py +38 -7

src/main.py CHANGED Viewed

@@ -90,23 +90,49 @@ def notify_success(project_id: str):
         description=message,
         token=HF_ACCESS_TOKEN,
     )
 def deploy_model(id: str):
     url = "https://api.endpoints.huggingface.cloud/v2/endpoint/Platma"
     data = {"compute": {"accelerator": "gpu", "instanceSize": "x1", "instanceType": "nvidia-l4",
-                        "scaling": {"maxReplica": 1, "minReplica": 1, "scaleToZeroTimeout":15}},
             "model": {"framework": "pytorch", "image": {
                 "custom": {"health_route": "/health",
                            "url": "ghcr.io/huggingface/text-generation-inference:sha-f852190",
-                           "env": {"MAX_BATCH_PREFILL_TOKENS": "2048", "MAX_INPUT_LENGTH": "1024",
-                                   "MAX_TOTAL_TOKENS": "1512",
                                    "MODEL_ID": "/repository"}}},
                       "repository": f"Platma/{id}",
                       "secrets": {},
                       "task": "text-generation"},
-            "name": "1726061674-dip", "provider": {"region": "us-east-1", "vendor": "aws"}, "type": "protected"}
-    headers = {"Authorization": f"Bearer {HF_ACCESS_TOKEN}"}
-    r = requests.post(url, data=data, headers=headers)
     print(r)
 NOTIFICATION_TEMPLATE = """\
@@ -117,5 +143,10 @@ Following an update of [{input_dataset}](https://huggingface.co/datasets/{input_
 (This is an automated message)
 """
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8000)

         description=message,
         token=HF_ACCESS_TOKEN,
     )
+def notify_url(url: str):
+    message = URL_TEMPLATE.format(
+        url=url,
+    )
+    return HfApi(token=HF_ACCESS_TOKEN).create_discussion(
+        repo_id=config.input_dataset,
+        repo_type="dataset",
+        title="✨ Endpoint is ready!",
+        description=message,
+        token=HF_ACCESS_TOKEN,
+    )
 def deploy_model(id: str):
+    api = HfApi(token=HF_ACCESS_TOKEN)
     url = "https://api.endpoints.huggingface.cloud/v2/endpoint/Platma"
     data = {"compute": {"accelerator": "gpu", "instanceSize": "x1", "instanceType": "nvidia-l4",
+                        "scaling": {"maxReplica": 1, "minReplica": 1, "scaleToZeroTimeout": 15}},
             "model": {"framework": "pytorch", "image": {
                 "custom": {"health_route": "/health",
                            "url": "ghcr.io/huggingface/text-generation-inference:sha-f852190",
+                           "env": {"MAX_BATCH_PREFILL_TOKENS": "2048", "MAX_INPUT_LENGTH": "2048",
+                                   "MAX_TOTAL_TOKENS": "2512",
                                    "MODEL_ID": "/repository"}}},
                       "repository": f"Platma/{id}",
                       "secrets": {},
                       "task": "text-generation"},
+            "name": f"platma-{id}", "provider": {"region": "us-east-1", "vendor": "aws"}, "type": "protected"}
+    headers = {"Authorization": f"Bearer {HF_ACCESS_TOKEN}", "Content-Type": "application/json"}
+    r = requests.post(url, json=data, headers=headers)
+    print(r)
+    r = api.get_inference_endpoint(name=f"platma-{id}")
+    while True:
+        print("Fetching url")
+        if r.status == 'running':
+            print(r)
+            notify_url(r.url)
+            break
+        else:
+            if r.status == 'error':
+                break
+        time.sleep(10)
+        r = api.get_inference_endpoint(name=f"platma-{id}")
     print(r)
 NOTIFICATION_TEMPLATE = """\
 (This is an automated message)
 """
+URL_TEMPLATE = """\
+    Here is your endpoint: {url}
+(This is an automated message)
+"""
 if __name__ == "__main__":
     uvicorn.run(app, host="0.0.0.0", port=8000)