Terry Zhang commited on
Commit
03a22c3
·
1 Parent(s): c0a2424

add moe classifier function and results

Browse files
Files changed (3) hide show
  1. results.txt +44 -0
  2. tasks/custom_classifiers.py +72 -1
  3. tasks/text.py +41 -3
results.txt ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "username": "theterryzhang",
3
+ "space_url": "https://huggingface.co/spaces/theterryzhang/submission-template",
4
+ "submission_timestamp": "2025-01-31T12:01:09.396352",
5
+ "model_description": "Fine-tuned sentence transformer DistilRoBERTa",
6
+ "accuracy": 0.7015384615384616,
7
+ "energy_consumed_wh": 0.5669492253472,
8
+ "emissions_gco2eq": 0.2092804495941757,
9
+ "emissions_data": {
10
+ "run_id": "6cfd4617-a2f2-4e4a-8778-03dd45b162be",
11
+ "duration": 11.484101312999996,
12
+ "emissions": 0.0002092804495941757,
13
+ "emissions_rate": 0.000018223608660457374,
14
+ "cpu_power": 105,
15
+ "gpu_power": 66.98551893981808,
16
+ "ram_power": 5.74672794342041,
17
+ "cpu_energy": 0.00033494035373749984,
18
+ "gpu_energy": 0.00021367989316599975,
19
+ "ram_energy": 0.00001832897844370036,
20
+ "energy_consumed": 0.0005669492253472001,
21
+ "country_name": "United States",
22
+ "country_iso_code": "USA",
23
+ "region": "virginia",
24
+ "cloud_provider": "",
25
+ "cloud_region": "",
26
+ "os": "Linux-5.10.230-223.885.amzn2.x86_64-x86_64-with-glibc2.36",
27
+ "python_version": "3.9.21",
28
+ "codecarbon_version": "2.8.3",
29
+ "cpu_count": 4,
30
+ "cpu_model": "Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz",
31
+ "gpu_count": 1,
32
+ "gpu_model": "1 x Tesla T4",
33
+ "ram_total_size": 15.324607849121094,
34
+ "tracking_mode": "machine",
35
+ "on_cloud": "N",
36
+ "pue": 1
37
+ },
38
+ "api_route": "/text",
39
+ "dataset_config": {
40
+ "dataset_name": "QuotaClimat/frugalaichallenge-text-train",
41
+ "test_size": 0.2,
42
+ "test_seed": 42
43
+ }
44
+ }
tasks/custom_classifiers.py CHANGED
@@ -2,6 +2,7 @@ from transformers import RobertaModel, AutoTokenizer
2
  from transformers.modeling_outputs import SequenceClassifierOutput
3
  from huggingface_hub import PyTorchModelHubMixin
4
  from torch.nn import CrossEntropyLoss
 
5
  import torch.nn as nn
6
  import torch
7
 
@@ -25,4 +26,74 @@ class SentenceBERTClassifier(nn.Module, PyTorchModelHubMixin):
25
  logits=logits,
26
  hidden_states=outputs.hidden_states,
27
  attentions=outputs.attentions,
28
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  from transformers.modeling_outputs import SequenceClassifierOutput
3
  from huggingface_hub import PyTorchModelHubMixin
4
  from torch.nn import CrossEntropyLoss
5
+ import torch.nn.functional as F
6
  import torch.nn as nn
7
  import torch
8
 
 
26
  logits=logits,
27
  hidden_states=outputs.hidden_states,
28
  attentions=outputs.attentions,
29
+ )
30
+
31
+
32
+ class DenseBlock(nn.Module):
33
+ def __init__(self, input_size, output_size, dropout_rate):
34
+ super(DenseBlock, self).__init__()
35
+ self.linear = nn.Linear(input_size, output_size)
36
+ self.batch_norm = nn.BatchNorm1d(output_size)
37
+ self.activation = nn.ReLU()
38
+ self.dropout = nn.Dropout(dropout_rate)
39
+
40
+ def forward(self, input):
41
+ output = self.linear(input)
42
+ output = self.batch_norm(output)
43
+ output = self.activation(output)
44
+ output = self.dropout(output)
45
+ return output
46
+
47
+ class FeedForwardExpert(nn.Module):
48
+ def __init__(self, dropout_rate, num_labels=8):
49
+ super(FeedForwardExpert, self).__init__()
50
+
51
+ # Define the dense blocks
52
+ self.block_1 = DenseBlock(768, 400, dropout_rate)
53
+ self.block_2 = DenseBlock(400, 200, dropout_rate)
54
+ self.final_layer = nn.Linear(200, num_labels)
55
+
56
+ self.initialize_weights()
57
+
58
+ def forward(self, input):
59
+ output = self.block_1(input)
60
+ output = self.block_2(output)
61
+ output = self.final_layer(output)
62
+
63
+ return output
64
+
65
+ def initialize_weights(self):
66
+ for m in self.modules():
67
+ if isinstance(m, nn.Linear):
68
+ nn.init.xavier_uniform_(m.weight)
69
+ if m.bias is not None:
70
+ nn.init.zeros_(m.bias)
71
+
72
+
73
+ class MoEClassifier(nn.Module):
74
+ def __init__(self, num_experts, dropout_rate=0.1, gate_hidden_size = 128):
75
+ super(MoEClassifier, self).__init__()
76
+ self.dropout = dropout_rate
77
+ self.num_experts = num_experts
78
+ self.gate_hidden_size = gate_hidden_size
79
+
80
+ # Create a list of feedforward experts
81
+ self.experts = nn.ModuleList([FeedForwardExpert(self.dropout) for _ in range(self.num_experts)])
82
+
83
+ # A gating network
84
+ self.gate_fc1 = nn.Linear(768, self.gate_hidden_size)
85
+ self.gate_fc2 = nn.Linear(self.gate_hidden_size, self.num_experts)
86
+
87
+ def forward(self, x):
88
+
89
+ # Calculate gating weights
90
+ gate_hidden = F.relu(self.gate_fc1(x))
91
+ weights = F.softmax(self.gate_fc2(gate_hidden), dim=1).unsqueeze(2)
92
+
93
+ # Get outputs from all experts
94
+ outputs = torch.stack([expert(x) for expert in self.experts], dim=2)
95
+
96
+ # apply weights using a batch matrix multiplication
97
+ weighted_outputs = torch.bmm(outputs, weights).squeeze(2)
98
+
99
+ return weighted_outputs
tasks/text.py CHANGED
@@ -15,7 +15,7 @@ from .utils.evaluation import TextEvaluationRequest
15
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
16
  from .utils.text_preprocessor import preprocess
17
  from accelerate.test_utils.testing import get_backend
18
- from custom_classifiers import SentenceBERTClassifier
19
 
20
  router = APIRouter()
21
 
@@ -27,7 +27,8 @@ models_descriptions = {
27
  "tfidf_xgb": "TF-IDF vectorizer and XGBoost classifier", # Submitted
28
  "bert_base_pruned": "Pruned BERT base model", # Submitted
29
  'climate_bert_pruned': "Fine-tuned and pruned DistilRoBERTa pre-trained on climate texts", # Not working
30
- "sbert_distilroberta": "Fine-tuned sentence transformer DistilRoBERTa"
 
31
  }
32
 
33
 
@@ -75,7 +76,6 @@ class TextDataset(Dataset):
75
 
76
  def __len__(self) -> int:
77
  return len(self.texts)
78
-
79
 
80
 
81
  def bert_classifier(test_dataset: dict, model: str):
@@ -116,6 +116,42 @@ def bert_classifier(test_dataset: dict, model: str):
116
 
117
  return predictions
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
 
120
  @router.post(ROUTE, tags=["Text Task"])
121
  async def evaluate_text(request: TextEvaluationRequest,
@@ -170,6 +206,8 @@ async def evaluate_text(request: TextEvaluationRequest,
170
  predictions = tree_classifier(test_dataset, model='xgb_pipeline')
171
  elif 'bert' in model:
172
  predictions = bert_classifier(test_dataset, model)
 
 
173
 
174
  #--------------------------------------------------------------------------------------------
175
  # YOUR MODEL INFERENCE STOPS HERE
 
15
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
16
  from .utils.text_preprocessor import preprocess
17
  from accelerate.test_utils.testing import get_backend
18
+ from custom_classifiers import SentenceBERTClassifier, MoEClassifier
19
 
20
  router = APIRouter()
21
 
 
27
  "tfidf_xgb": "TF-IDF vectorizer and XGBoost classifier", # Submitted
28
  "bert_base_pruned": "Pruned BERT base model", # Submitted
29
  'climate_bert_pruned': "Fine-tuned and pruned DistilRoBERTa pre-trained on climate texts", # Not working
30
+ "sbert_distilroberta": "Fine-tuned sentence transformer DistilRoBERTa",
31
+ "embedding_moe": "Mixture of expert classifier with DistilBERT Embeddings"
32
  }
33
 
34
 
 
76
 
77
  def __len__(self) -> int:
78
  return len(self.texts)
 
79
 
80
 
81
  def bert_classifier(test_dataset: dict, model: str):
 
116
 
117
  return predictions
118
 
119
+ def moe_classifier(test_dataset: dict, model: str):
120
+ print("Starting MoE run")
121
+ texts = test_dataset["quote"]
122
+ model_path = f"theterryzhang/frugal_ai_{model}/0131_MoE_final.pt"
123
+
124
+ embedding_model = AutoModel.from_pretrained("sentence-transformers/all-distilroberta-v1")
125
+ tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-distilroberta-v1")
126
+
127
+ dataset = TextDataset(texts, tokenizer=tokenizer, max_length=512)
128
+ dataloader = DataLoader(dataset, batch_size=64, shuffle=False)
129
+
130
+ # Use CUDA if available
131
+ device, _, _ = get_backend()
132
+
133
+ model = MoEClassifier(3, 0.05)
134
+ model.load_state_dict(torch.load(model_path))
135
+ model = model.to(device)
136
+
137
+ print("Starting MoE Classifier")
138
+
139
+ model.eval()
140
+ with torch.no_grad():
141
+ predictions = np.array([])
142
+ for batch in dataloader:
143
+ input_ids = batch['input_ids'].to(device)
144
+ attn_mask = batch['attention_mask'].to(device)
145
+ embedding_outputs = embedding_model(input_ids, attn_mask)
146
+ embeddings = embedding_outputs.last_hidden_state[:, 0, :]
147
+
148
+ outputs = model(embeddings)
149
+ p = torch.argmax(outputs.logits, dim=1)
150
+ predictions = np.append(predictions, p.cpu().numpy())
151
+
152
+ print("Finished running MoE Classifier")
153
+
154
+ return predictions
155
 
156
  @router.post(ROUTE, tags=["Text Task"])
157
  async def evaluate_text(request: TextEvaluationRequest,
 
206
  predictions = tree_classifier(test_dataset, model='xgb_pipeline')
207
  elif 'bert' in model:
208
  predictions = bert_classifier(test_dataset, model)
209
+ elif 'moe' in model:
210
+ predictions = moe_classifier(test_dataset, model)
211
 
212
  #--------------------------------------------------------------------------------------------
213
  # YOUR MODEL INFERENCE STOPS HERE