Terry Zhang
commited on
Commit
·
03a22c3
1
Parent(s):
c0a2424
add moe classifier function and results
Browse files- results.txt +44 -0
- tasks/custom_classifiers.py +72 -1
- tasks/text.py +41 -3
results.txt
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"username": "theterryzhang",
|
3 |
+
"space_url": "https://huggingface.co/spaces/theterryzhang/submission-template",
|
4 |
+
"submission_timestamp": "2025-01-31T12:01:09.396352",
|
5 |
+
"model_description": "Fine-tuned sentence transformer DistilRoBERTa",
|
6 |
+
"accuracy": 0.7015384615384616,
|
7 |
+
"energy_consumed_wh": 0.5669492253472,
|
8 |
+
"emissions_gco2eq": 0.2092804495941757,
|
9 |
+
"emissions_data": {
|
10 |
+
"run_id": "6cfd4617-a2f2-4e4a-8778-03dd45b162be",
|
11 |
+
"duration": 11.484101312999996,
|
12 |
+
"emissions": 0.0002092804495941757,
|
13 |
+
"emissions_rate": 0.000018223608660457374,
|
14 |
+
"cpu_power": 105,
|
15 |
+
"gpu_power": 66.98551893981808,
|
16 |
+
"ram_power": 5.74672794342041,
|
17 |
+
"cpu_energy": 0.00033494035373749984,
|
18 |
+
"gpu_energy": 0.00021367989316599975,
|
19 |
+
"ram_energy": 0.00001832897844370036,
|
20 |
+
"energy_consumed": 0.0005669492253472001,
|
21 |
+
"country_name": "United States",
|
22 |
+
"country_iso_code": "USA",
|
23 |
+
"region": "virginia",
|
24 |
+
"cloud_provider": "",
|
25 |
+
"cloud_region": "",
|
26 |
+
"os": "Linux-5.10.230-223.885.amzn2.x86_64-x86_64-with-glibc2.36",
|
27 |
+
"python_version": "3.9.21",
|
28 |
+
"codecarbon_version": "2.8.3",
|
29 |
+
"cpu_count": 4,
|
30 |
+
"cpu_model": "Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz",
|
31 |
+
"gpu_count": 1,
|
32 |
+
"gpu_model": "1 x Tesla T4",
|
33 |
+
"ram_total_size": 15.324607849121094,
|
34 |
+
"tracking_mode": "machine",
|
35 |
+
"on_cloud": "N",
|
36 |
+
"pue": 1
|
37 |
+
},
|
38 |
+
"api_route": "/text",
|
39 |
+
"dataset_config": {
|
40 |
+
"dataset_name": "QuotaClimat/frugalaichallenge-text-train",
|
41 |
+
"test_size": 0.2,
|
42 |
+
"test_seed": 42
|
43 |
+
}
|
44 |
+
}
|
tasks/custom_classifiers.py
CHANGED
@@ -2,6 +2,7 @@ from transformers import RobertaModel, AutoTokenizer
|
|
2 |
from transformers.modeling_outputs import SequenceClassifierOutput
|
3 |
from huggingface_hub import PyTorchModelHubMixin
|
4 |
from torch.nn import CrossEntropyLoss
|
|
|
5 |
import torch.nn as nn
|
6 |
import torch
|
7 |
|
@@ -25,4 +26,74 @@ class SentenceBERTClassifier(nn.Module, PyTorchModelHubMixin):
|
|
25 |
logits=logits,
|
26 |
hidden_states=outputs.hidden_states,
|
27 |
attentions=outputs.attentions,
|
28 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
from transformers.modeling_outputs import SequenceClassifierOutput
|
3 |
from huggingface_hub import PyTorchModelHubMixin
|
4 |
from torch.nn import CrossEntropyLoss
|
5 |
+
import torch.nn.functional as F
|
6 |
import torch.nn as nn
|
7 |
import torch
|
8 |
|
|
|
26 |
logits=logits,
|
27 |
hidden_states=outputs.hidden_states,
|
28 |
attentions=outputs.attentions,
|
29 |
+
)
|
30 |
+
|
31 |
+
|
32 |
+
class DenseBlock(nn.Module):
|
33 |
+
def __init__(self, input_size, output_size, dropout_rate):
|
34 |
+
super(DenseBlock, self).__init__()
|
35 |
+
self.linear = nn.Linear(input_size, output_size)
|
36 |
+
self.batch_norm = nn.BatchNorm1d(output_size)
|
37 |
+
self.activation = nn.ReLU()
|
38 |
+
self.dropout = nn.Dropout(dropout_rate)
|
39 |
+
|
40 |
+
def forward(self, input):
|
41 |
+
output = self.linear(input)
|
42 |
+
output = self.batch_norm(output)
|
43 |
+
output = self.activation(output)
|
44 |
+
output = self.dropout(output)
|
45 |
+
return output
|
46 |
+
|
47 |
+
class FeedForwardExpert(nn.Module):
|
48 |
+
def __init__(self, dropout_rate, num_labels=8):
|
49 |
+
super(FeedForwardExpert, self).__init__()
|
50 |
+
|
51 |
+
# Define the dense blocks
|
52 |
+
self.block_1 = DenseBlock(768, 400, dropout_rate)
|
53 |
+
self.block_2 = DenseBlock(400, 200, dropout_rate)
|
54 |
+
self.final_layer = nn.Linear(200, num_labels)
|
55 |
+
|
56 |
+
self.initialize_weights()
|
57 |
+
|
58 |
+
def forward(self, input):
|
59 |
+
output = self.block_1(input)
|
60 |
+
output = self.block_2(output)
|
61 |
+
output = self.final_layer(output)
|
62 |
+
|
63 |
+
return output
|
64 |
+
|
65 |
+
def initialize_weights(self):
|
66 |
+
for m in self.modules():
|
67 |
+
if isinstance(m, nn.Linear):
|
68 |
+
nn.init.xavier_uniform_(m.weight)
|
69 |
+
if m.bias is not None:
|
70 |
+
nn.init.zeros_(m.bias)
|
71 |
+
|
72 |
+
|
73 |
+
class MoEClassifier(nn.Module):
|
74 |
+
def __init__(self, num_experts, dropout_rate=0.1, gate_hidden_size = 128):
|
75 |
+
super(MoEClassifier, self).__init__()
|
76 |
+
self.dropout = dropout_rate
|
77 |
+
self.num_experts = num_experts
|
78 |
+
self.gate_hidden_size = gate_hidden_size
|
79 |
+
|
80 |
+
# Create a list of feedforward experts
|
81 |
+
self.experts = nn.ModuleList([FeedForwardExpert(self.dropout) for _ in range(self.num_experts)])
|
82 |
+
|
83 |
+
# A gating network
|
84 |
+
self.gate_fc1 = nn.Linear(768, self.gate_hidden_size)
|
85 |
+
self.gate_fc2 = nn.Linear(self.gate_hidden_size, self.num_experts)
|
86 |
+
|
87 |
+
def forward(self, x):
|
88 |
+
|
89 |
+
# Calculate gating weights
|
90 |
+
gate_hidden = F.relu(self.gate_fc1(x))
|
91 |
+
weights = F.softmax(self.gate_fc2(gate_hidden), dim=1).unsqueeze(2)
|
92 |
+
|
93 |
+
# Get outputs from all experts
|
94 |
+
outputs = torch.stack([expert(x) for expert in self.experts], dim=2)
|
95 |
+
|
96 |
+
# apply weights using a batch matrix multiplication
|
97 |
+
weighted_outputs = torch.bmm(outputs, weights).squeeze(2)
|
98 |
+
|
99 |
+
return weighted_outputs
|
tasks/text.py
CHANGED
@@ -15,7 +15,7 @@ from .utils.evaluation import TextEvaluationRequest
|
|
15 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
16 |
from .utils.text_preprocessor import preprocess
|
17 |
from accelerate.test_utils.testing import get_backend
|
18 |
-
from custom_classifiers import SentenceBERTClassifier
|
19 |
|
20 |
router = APIRouter()
|
21 |
|
@@ -27,7 +27,8 @@ models_descriptions = {
|
|
27 |
"tfidf_xgb": "TF-IDF vectorizer and XGBoost classifier", # Submitted
|
28 |
"bert_base_pruned": "Pruned BERT base model", # Submitted
|
29 |
'climate_bert_pruned': "Fine-tuned and pruned DistilRoBERTa pre-trained on climate texts", # Not working
|
30 |
-
"sbert_distilroberta": "Fine-tuned sentence transformer DistilRoBERTa"
|
|
|
31 |
}
|
32 |
|
33 |
|
@@ -75,7 +76,6 @@ class TextDataset(Dataset):
|
|
75 |
|
76 |
def __len__(self) -> int:
|
77 |
return len(self.texts)
|
78 |
-
|
79 |
|
80 |
|
81 |
def bert_classifier(test_dataset: dict, model: str):
|
@@ -116,6 +116,42 @@ def bert_classifier(test_dataset: dict, model: str):
|
|
116 |
|
117 |
return predictions
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
|
120 |
@router.post(ROUTE, tags=["Text Task"])
|
121 |
async def evaluate_text(request: TextEvaluationRequest,
|
@@ -170,6 +206,8 @@ async def evaluate_text(request: TextEvaluationRequest,
|
|
170 |
predictions = tree_classifier(test_dataset, model='xgb_pipeline')
|
171 |
elif 'bert' in model:
|
172 |
predictions = bert_classifier(test_dataset, model)
|
|
|
|
|
173 |
|
174 |
#--------------------------------------------------------------------------------------------
|
175 |
# YOUR MODEL INFERENCE STOPS HERE
|
|
|
15 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
16 |
from .utils.text_preprocessor import preprocess
|
17 |
from accelerate.test_utils.testing import get_backend
|
18 |
+
from custom_classifiers import SentenceBERTClassifier, MoEClassifier
|
19 |
|
20 |
router = APIRouter()
|
21 |
|
|
|
27 |
"tfidf_xgb": "TF-IDF vectorizer and XGBoost classifier", # Submitted
|
28 |
"bert_base_pruned": "Pruned BERT base model", # Submitted
|
29 |
'climate_bert_pruned': "Fine-tuned and pruned DistilRoBERTa pre-trained on climate texts", # Not working
|
30 |
+
"sbert_distilroberta": "Fine-tuned sentence transformer DistilRoBERTa",
|
31 |
+
"embedding_moe": "Mixture of expert classifier with DistilBERT Embeddings"
|
32 |
}
|
33 |
|
34 |
|
|
|
76 |
|
77 |
def __len__(self) -> int:
|
78 |
return len(self.texts)
|
|
|
79 |
|
80 |
|
81 |
def bert_classifier(test_dataset: dict, model: str):
|
|
|
116 |
|
117 |
return predictions
|
118 |
|
119 |
+
def moe_classifier(test_dataset: dict, model: str):
|
120 |
+
print("Starting MoE run")
|
121 |
+
texts = test_dataset["quote"]
|
122 |
+
model_path = f"theterryzhang/frugal_ai_{model}/0131_MoE_final.pt"
|
123 |
+
|
124 |
+
embedding_model = AutoModel.from_pretrained("sentence-transformers/all-distilroberta-v1")
|
125 |
+
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-distilroberta-v1")
|
126 |
+
|
127 |
+
dataset = TextDataset(texts, tokenizer=tokenizer, max_length=512)
|
128 |
+
dataloader = DataLoader(dataset, batch_size=64, shuffle=False)
|
129 |
+
|
130 |
+
# Use CUDA if available
|
131 |
+
device, _, _ = get_backend()
|
132 |
+
|
133 |
+
model = MoEClassifier(3, 0.05)
|
134 |
+
model.load_state_dict(torch.load(model_path))
|
135 |
+
model = model.to(device)
|
136 |
+
|
137 |
+
print("Starting MoE Classifier")
|
138 |
+
|
139 |
+
model.eval()
|
140 |
+
with torch.no_grad():
|
141 |
+
predictions = np.array([])
|
142 |
+
for batch in dataloader:
|
143 |
+
input_ids = batch['input_ids'].to(device)
|
144 |
+
attn_mask = batch['attention_mask'].to(device)
|
145 |
+
embedding_outputs = embedding_model(input_ids, attn_mask)
|
146 |
+
embeddings = embedding_outputs.last_hidden_state[:, 0, :]
|
147 |
+
|
148 |
+
outputs = model(embeddings)
|
149 |
+
p = torch.argmax(outputs.logits, dim=1)
|
150 |
+
predictions = np.append(predictions, p.cpu().numpy())
|
151 |
+
|
152 |
+
print("Finished running MoE Classifier")
|
153 |
+
|
154 |
+
return predictions
|
155 |
|
156 |
@router.post(ROUTE, tags=["Text Task"])
|
157 |
async def evaluate_text(request: TextEvaluationRequest,
|
|
|
206 |
predictions = tree_classifier(test_dataset, model='xgb_pipeline')
|
207 |
elif 'bert' in model:
|
208 |
predictions = bert_classifier(test_dataset, model)
|
209 |
+
elif 'moe' in model:
|
210 |
+
predictions = moe_classifier(test_dataset, model)
|
211 |
|
212 |
#--------------------------------------------------------------------------------------------
|
213 |
# YOUR MODEL INFERENCE STOPS HERE
|