rararara9999 commited on
Commit
476e75f
·
verified ·
1 Parent(s): 56abb62

Create model

Browse files
Files changed (1) hide show
  1. model +187 -0
model ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #installing dependencies
2
+
3
+ ! pip install -U git+https://github.com/huggingface/transformers.git
4
+ ! pip install -U git+https://github.com/huggingface/accelerate.git
5
+
6
+ ! pip install datasets
7
+
8
+ #setting up models and dataset
9
+
10
+ model_checkpoint = "microsoft/resnet-50"
11
+ batch_size = 128
12
+
13
+ from datasets import load_dataset
14
+
15
+ !pip install evaluate
16
+
17
+ from google.colab import drive
18
+ drive.mount('/content/drive/')
19
+
20
+ from evaluate import load
21
+ metric = load("accuracy")
22
+
23
+ #loading and preparing dataset
24
+
25
+ dataset = load_dataset("imagefolder", data_dir="drive/MyDrive/Face Mask Dataset")
26
+ labels = dataset["train"].features["label"].names
27
+ label2id, id2label = dict(), dict()
28
+ for i, label in enumerate(labels):
29
+ label2id[label] = i
30
+ id2label[i] = label
31
+
32
+ #image processing
33
+ from transformers import AutoImageProcessor
34
+ image_processor = AutoImageProcessor.from_pretrained(model_checkpoint)
35
+ image_processor
36
+
37
+ #data augmentation and normalization
38
+ from torchvision.transforms import (
39
+ CenterCrop,
40
+ Compose,
41
+ Normalize,
42
+ RandomHorizontalFlip,
43
+ RandomResizedCrop,
44
+ Resize,
45
+ ToTensor,
46
+ ColorJitter,
47
+ RandomRotation
48
+ )
49
+
50
+ normalize = Normalize(mean=image_processor.image_mean, std=image_processor.image_std)
51
+ if "height" in image_processor.size:
52
+ size = (image_processor.size["height"], image_processor.size["width"])
53
+ crop_size = size
54
+ max_size = None
55
+ elif "shortest_edge" in image_processor.size:
56
+ size = image_processor.size["shortest_edge"]
57
+ crop_size = (size, size)
58
+ max_size = image_processor.size.get("longest_edge")
59
+
60
+ train_transforms = Compose(
61
+ [
62
+ RandomResizedCrop(crop_size),
63
+ RandomHorizontalFlip(),
64
+ RandomRotation(degrees=15),
65
+ ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
66
+ ToTensor(),
67
+ normalize,
68
+ ]
69
+ )
70
+
71
+ val_transforms = Compose(
72
+ [
73
+ Resize(size),
74
+ CenterCrop(crop_size),
75
+ RandomRotation(degrees=15),
76
+ ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4, hue=0.1),
77
+ ToTensor(),
78
+ normalize,
79
+ ]
80
+ )
81
+
82
+ def preprocess_train(example_batch):
83
+ example_batch["pixel_values"] = [
84
+ train_transforms(image.convert("RGB")) for image in example_batch["image"]
85
+ ]
86
+ return example_batch
87
+
88
+ def preprocess_val(example_batch):
89
+ example_batch["pixel_values"] = [val_transforms(image.convert("RGB")) for image in example_batch["image"]]
90
+ return example_batch
91
+
92
+ #splitting the Dataset
93
+
94
+ splits = dataset["train"].train_test_split(test_size=0.3)
95
+ train_ds = splits['train']
96
+ val_ds = splits['test']
97
+
98
+ train_ds.set_transform(preprocess_train)
99
+ val_ds.set_transform(preprocess_val)
100
+
101
+ #Model and training setup
102
+ from transformers import AutoModelForImageClassification, TrainingArguments, Trainer
103
+
104
+ model = AutoModelForImageClassification.from_pretrained(model_checkpoint,
105
+ label2id=label2id,
106
+ id2label=id2label,
107
+ ignore_mismatched_sizes = True)
108
+
109
+ model_name = model_checkpoint.split("/")[-1]
110
+
111
+ args = TrainingArguments(
112
+ f"{model_name}-finetuned",
113
+ remove_unused_columns=False,
114
+ evaluation_strategy = "epoch",
115
+ save_strategy = "epoch",
116
+ save_total_limit = 5,
117
+ learning_rate=1e-3,
118
+ per_device_train_batch_size=batch_size,
119
+ gradient_accumulation_steps=2,
120
+ per_device_eval_batch_size=batch_size,
121
+ num_train_epochs=2,
122
+ warmup_ratio=0.1,
123
+ weight_decay=0.01,
124
+ lr_scheduler_type="cosine",
125
+ logging_steps=10,
126
+ load_best_model_at_end=True,
127
+ metric_for_best_model="accuracy",)
128
+
129
+ #Metric and Data Collection
130
+ import numpy as np
131
+
132
+ def compute_metrics(eval_pred):
133
+ """Computes accuracy on a batch of predictions"""
134
+ predictions = np.argmax(eval_pred.predictions, axis=1)
135
+ return metric.compute(predictions=predictions, references=eval_pred.label_ids)
136
+
137
+ import torch
138
+
139
+ def collate_fn(examples):
140
+ pixel_values = torch.stack([example["pixel_values"] for example in examples])
141
+ labels = torch.tensor([example["label"] for example in examples])
142
+ return {"pixel_values": pixel_values, "labels": labels}
143
+ #test for pre-trained model
144
+ val_ds.set_transform(preprocess_val)
145
+ from transformers import Trainer, TrainingArguments
146
+
147
+ # Define evaluation arguments
148
+ eval_args = TrainingArguments(
149
+ output_dir="./results",
150
+ per_device_eval_batch_size=batch_size,
151
+ remove_unused_columns=False,
152
+ )
153
+
154
+ # Initialize the Trainer
155
+ trainer = Trainer(
156
+ model=model,
157
+ args=eval_args,
158
+ eval_dataset=val_ds,
159
+ tokenizer=image_processor,
160
+ compute_metrics=compute_metrics,
161
+ data_collator=collate_fn,
162
+ )
163
+
164
+ # Evaluate the pre-trained model
165
+ metrics = trainer.evaluate()
166
+ print(metrics)
167
+
168
+ #Training and Evaluation
169
+ trainer = Trainer(model,
170
+ args,
171
+ train_dataset=train_ds,
172
+ eval_dataset=val_ds,
173
+ tokenizer=image_processor,
174
+ compute_metrics=compute_metrics,
175
+ data_collator=collate_fn,)
176
+
177
+ train_results = trainer.train()
178
+ # 保存模型
179
+ trainer.save_model()
180
+ trainer.log_metrics("train", train_results.metrics)
181
+ trainer.save_metrics("train", train_results.metrics)
182
+ trainer.save_state()
183
+
184
+ metrics = trainer.evaluate()
185
+ # some nice to haves:
186
+ trainer.log_metrics("eval", metrics)
187
+ trainer.save_metrics("eval", metrics)