hirushanirmal301 commited on
Commit
2df1810
·
1 Parent(s): e51f92b

Add application file

Browse files
Files changed (3) hide show
  1. Dockerfile +9 -0
  2. requirements.txt +10 -0
  3. train.py +59 -0
Dockerfile ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.8-slim
2
+
3
+ RUN pip install torch transformers datasets deepspeed vllm accelerate unsloth
4
+ RUN pip install huggingface_hub
5
+
6
+ WORKDIR /app
7
+ COPY . /app
8
+
9
+ CMD ["python", "train.py"]
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn[standard]
3
+ torch
4
+ transformers
5
+ datasets
6
+ deepspeed
7
+ vllm
8
+ accelerate
9
+ unsloth
10
+ huggingface_hub
train.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from unsloth import FastLanguageModel
2
+ from transformers import TrainingArguments, Trainer
3
+
4
+ # Load quantized model
5
+ model, tokenizer = FastLanguageModel.from_pretrained(
6
+ model_name="deepseek-ai/DeepSeek-V3-0324",
7
+ dtype=torch.bfloat16,
8
+ load_in_4bit=True, # Or 2.71-bit
9
+ token=os.environ["HF_TOKEN"]
10
+ )
11
+ FastLanguageModel.for_training(model)
12
+
13
+ # Training arguments
14
+ training_args = TrainingArguments(
15
+ output_dir="/app/checkpoints",
16
+ per_device_train_batch_size=4, # Adjust for A100 40GB/80GB
17
+ per_device_eval_batch_size=4,
18
+ num_train_epochs=2,
19
+ learning_rate=2e-5,
20
+ save_steps=500,
21
+ save_total_limit=2,
22
+ evaluation_strategy="steps",
23
+ eval_steps=500,
24
+ logging_dir="/app/logs",
25
+ logging_steps=100,
26
+ fp16=False, # bfloat16 for A100
27
+ deepspeed="/app/ds_config.json"
28
+ )
29
+
30
+ # DeepSpeed config
31
+ with open("/app/ds_config.json", "w") as f:
32
+ f.write('''
33
+ {
34
+ "fp16": {"enabled": false},
35
+ "bf16": {"enabled": true},
36
+ "zero_optimization": {
37
+ "stage": 3,
38
+ "offload_optimizer": {"device": "cpu"},
39
+ "offload_param": {"device": "cpu"}
40
+ },
41
+ "train_batch_size": "auto",
42
+ "gradient_accumulation_steps": 4
43
+ }
44
+ ''')
45
+
46
+ # Initialize trainer
47
+ trainer = Trainer(
48
+ model=model,
49
+ args=training_args,
50
+ train_dataset=tokenized_dataset["train"],
51
+ eval_dataset=tokenized_dataset["test"]
52
+ )
53
+
54
+ # Train
55
+ trainer.train()
56
+
57
+ # Save model
58
+ model.save_pretrained("/app/fine_tuned_model")
59
+ tokenizer.save_pretrained("/app/fine_tuned_model")