Platma commited on
Commit
1a11765
·
verified ·
1 Parent(s): f8ae299

Update src/main.py

Browse files
Files changed (1) hide show
  1. src/main.py +18 -1
src/main.py CHANGED
@@ -88,7 +88,24 @@ def notify_success(project_id: str):
88
  description=message,
89
  token=HF_ACCESS_TOKEN,
90
  )
91
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
 
93
  NOTIFICATION_TEMPLATE = """\
94
  🌸 Hello there!
 
88
  description=message,
89
  token=HF_ACCESS_TOKEN,
90
  )
91
+
92
+ def deploy_model(id: str):
93
+ url = "https://api.endpoints.huggingface.cloud/v2/endpoint/Platma"
94
+ data = {"compute": {"accelerator": "gpu", "instanceSize": "x1", "instanceType": "nvidia-l4",
95
+ "scaling": {"maxReplica": 1, "minReplica": 1, "scaleToZeroTimeout":15}},
96
+ "model": {"framework": "pytorch", "image": {
97
+ "custom": {"health_route": "/health",
98
+ "url": "ghcr.io/huggingface/text-generation-inference:sha-f852190",
99
+ "env": {"MAX_BATCH_PREFILL_TOKENS": "2048", "MAX_INPUT_LENGTH": "1024",
100
+ "MAX_TOTAL_TOKENS": "1512",
101
+ "MODEL_ID": "/repository"}}},
102
+ "repository": f"Platma/{id}",
103
+ "secrets": {},
104
+ "task": "text-generation"},
105
+ "name": "1726061674-dip", "provider": {"region": "us-east-1", "vendor": "aws"}, "type": "protected"}
106
+ headers = {"Authorization:" f"Bearer {HF_ACCESS_TOKEN}"}
107
+ r = requests.post(url, data=data, headers=headers).json()
108
+ print(r)
109
 
110
  NOTIFICATION_TEMPLATE = """\
111
  🌸 Hello there!