platma-retrain

Paused

App Files Files Community

baryshych commited on Sep 5, 2024

Commit

a075ab3

1 Parent(s): 981ad5c

add local autotrain

Browse files

Files changed (4) hide show

requirements.txt +1 -0
src/config.json +6 -0
src/config.yaml +39 -0
src/main.py +56 -35

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ fastapi==0.74.*
 requests==2.27.*
 huggingface_hub==0.11.*
 uvicorn[standard]==0.17.*

 requests==2.27.*
 huggingface_hub==0.11.*
 uvicorn[standard]==0.17.*
+autotrain-advanced==0.8.12

src/config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "target_namespace": "baryshych",
+    "input_dataset": "huggingface-projects/auto-retrain-input-dataset",
+    "input_model": "microsoft/resnet-50",
+    "autotrain_project_prefix": "platma-retrain"
+}

src/config.yaml ADDED Viewed

	@@ -0,0 +1,39 @@

+task: llm-sft
+base_model: meta-llama/Meta-Llama-3.1-8B-Instruct
+project_name: llama
+log: tensorboard
+backend: local
+data:
+  path: baryshych/platma
+  train_split: train
+  valid_split: null
+  chat_template: null
+  column_mapping:
+    text_column: text
+params:
+  block_size: 1024
+  lr: 1e-4
+  warmup_ratio: 0.1
+  weight_decay: 0.01
+  epochs: 1
+  batch_size: 2
+  gradient_accumulation: 8
+  mixed_precision: fp16
+  peft: True
+  quantization: null
+  lora_r: 16
+  lora_alpha: 32
+  lora_dropout: 0.05
+  unsloth: False
+  optimizer: paged_adamw_8bit
+  target_modules: all-linear
+  padding: right
+  optimizer: paged_adamw_8bit
+  scheduler: cosine
+hub:
+  username: baryshych
+  token: ${HF_ACCESS_TOKEN}
+  push_to_hub: True

src/main.py CHANGED Viewed

@@ -1,21 +1,21 @@
 import os
 import requests
 from typing import Optional
 from fastapi import FastAPI, Header, HTTPException, BackgroundTasks
 from fastapi.responses import FileResponse
 from huggingface_hub.hf_api import HfApi
-from .models import config, WebhookPayload
-WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET")
-HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")
-AUTOTRAIN_API_URL = "https://api.autotrain.huggingface.co"
-AUTOTRAIN_UI_URL = "https://ui.autotrain.huggingface.co"
 app = FastAPI()
 @app.get("/")
 async def home():
 	return FileResponse("home.html")
@@ -26,23 +26,23 @@ async def post_webhook(
 		task_queue: BackgroundTasks,
 		x_webhook_secret:  Optional[str] = Header(default=None),
 	):
-	if x_webhook_secret is None:
-		raise HTTPException(401)
-	if x_webhook_secret != WEBHOOK_SECRET:
-		raise HTTPException(403)
-	if not (
-		payload.event.action == "update"
-		and payload.event.scope.startswith("repo.content")
-		and payload.repo.name == config.input_dataset
-		and payload.repo.type == "dataset"
-	):
-		# no-op
-		return {"processed": False}
-	task_queue.add_task(
-		schedule_retrain,
-		payload
-	)
 	return {"processed": True}
@@ -50,17 +50,18 @@ async def post_webhook(
 def schedule_retrain(payload: WebhookPayload):
 	# Create the autotrain project
 	try:
-		project = AutoTrain.create_project(payload)
-		AutoTrain.add_data(project_id=project["id"])
-		AutoTrain.start_processing(project_id=project["id"])
 	except requests.HTTPError as err:
 		print("ERROR while requesting AutoTrain API:")
 		print(f"  code: {err.response.status_code}")
 		print(f"  {err.response.json()}")
 		raise
 	# Notify in the community tab
-	notify_success(project["id"])
 	return {"processed": True}
@@ -68,15 +69,32 @@ class AutoTrain:
 	@staticmethod
 	def create_project(payload: WebhookPayload) -> dict:
 		project_resp = requests.post(
-			f"{AUTOTRAIN_API_URL}/projects/create",
 			json={
 				"username": config.target_namespace,
 				"proj_name": f"{config.autotrain_project_prefix}-{payload.repo.headSha[:7]}",
-				"task": 18, # image-multi-class-classification
-				"config": {
-					"hub-model": config.input_model,
-					"max_models": 1,
-					"language": "unk",
 				}
 			},
 			headers={
@@ -140,3 +158,6 @@ Please review and approve the project [here]({ui_url}/{project_id}/trainings) to
 (This is an automated message)
 """

 import os
 import requests
 from typing import Optional
+import uvicorn
+import subprocess
+from subprocess import Popen
 from fastapi import FastAPI, Header, HTTPException, BackgroundTasks
 from fastapi.responses import FileResponse
 from huggingface_hub.hf_api import HfApi
+from models import config, WebhookPayload
 app = FastAPI()
+WEBHOOK_SECRET = os.getenv("WEBHOOK_SECRET")
+HF_ACCESS_TOKEN = os.getenv("HF_ACCESS_TOKEN")
 @app.get("/")
 async def home():
 	return FileResponse("home.html")
 		task_queue: BackgroundTasks,
 		x_webhook_secret:  Optional[str] = Header(default=None),
 	):
+	# if x_webhook_secret is None:
+	# 	raise HTTPException(401)
+	# if x_webhook_secret != WEBHOOK_SECRET:
+	# 	raise HTTPException(403)
+	# if not (
+	# 	payload.event.action == "update"
+	# 	and payload.event.scope.startswith("repo.content")
+	# 	and payload.repo.name == config.input_dataset
+	# 	and payload.repo.type == "dataset"
+	# ):
+	# 	# no-op
+	# 	return {"processed": False}
+	schedule_retrain(payload=payload)
+	# task_queue.add_task(
+	# 	schedule_retrain,
+	# 	payload
+	# )
 	return {"processed": True}
 def schedule_retrain(payload: WebhookPayload):
 	# Create the autotrain project
 	try:
+		result = Popen(['autotrain', '--config', 'config.yaml'])
+		# project = AutoTrain.create_project(payload)
+		# AutoTrain.add_data(project_id=project["id"])
+		# AutoTrain.start_processing(project_id=project["id"])
 	except requests.HTTPError as err:
 		print("ERROR while requesting AutoTrain API:")
 		print(f"  code: {err.response.status_code}")
 		print(f"  {err.response.json()}")
 		raise
 	# Notify in the community tab
+	notify_success('vicuna')
+	print(result.returncode)
 	return {"processed": True}
 	@staticmethod
 	def create_project(payload: WebhookPayload) -> dict:
 		project_resp = requests.post(
+			f"{AUTOTRAIN_API_URL}/api/create_project",
 			json={
 				"username": config.target_namespace,
 				"proj_name": f"{config.autotrain_project_prefix}-{payload.repo.headSha[:7]}",
+				"task": 'llm:sft', # image-multi-class-classification
+				"base_model": "meta-llama/Meta-Llama-3-8B-Instruct",
+				"train_split": "train",
+				"column_mapping": {
+					"text_column": "text",
+				},
+				"params": {
+					"block_size": 1024,
+					"model_max_length": 4096,
+					"max_prompt_length": 512,
+					"epochs": 1,
+					"batch_size": 2,
+					"lr": 0.00003,
+					"peft": True,
+					"quantization": "int4",
+					"target_modules": "all-linear",
+					"padding": "right",
+					"optimizer": "adamw_torch",
+					"scheduler": "linear",
+					"gradient_accumulation": 4,
+					"mixed_precision": "fp16",
+					"chat_template": "chatml"
 				}
 			},
 			headers={
 (This is an automated message)
 """
+if __name__ == "__main__":
+    uvicorn.run(app, host="0.0.0.0", port=8000)