submission-template

Paused

App Files Files Community

Terry Zhang commited on Jan 31

Commit

03a22c3

1 Parent(s): c0a2424

add moe classifier function and results

Browse files

Files changed (3) hide show

results.txt +44 -0
tasks/custom_classifiers.py +72 -1
tasks/text.py +41 -3

results.txt ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "username": "theterryzhang",
+  "space_url": "https://huggingface.co/spaces/theterryzhang/submission-template",
+  "submission_timestamp": "2025-01-31T12:01:09.396352",
+  "model_description": "Fine-tuned sentence transformer DistilRoBERTa",
+  "accuracy": 0.7015384615384616,
+  "energy_consumed_wh": 0.5669492253472,
+  "emissions_gco2eq": 0.2092804495941757,
+  "emissions_data": {
+    "run_id": "6cfd4617-a2f2-4e4a-8778-03dd45b162be",
+    "duration": 11.484101312999996,
+    "emissions": 0.0002092804495941757,
+    "emissions_rate": 0.000018223608660457374,
+    "cpu_power": 105,
+    "gpu_power": 66.98551893981808,
+    "ram_power": 5.74672794342041,
+    "cpu_energy": 0.00033494035373749984,
+    "gpu_energy": 0.00021367989316599975,
+    "ram_energy": 0.00001832897844370036,
+    "energy_consumed": 0.0005669492253472001,
+    "country_name": "United States",
+    "country_iso_code": "USA",
+    "region": "virginia",
+    "cloud_provider": "",
+    "cloud_region": "",
+    "os": "Linux-5.10.230-223.885.amzn2.x86_64-x86_64-with-glibc2.36",
+    "python_version": "3.9.21",
+    "codecarbon_version": "2.8.3",
+    "cpu_count": 4,
+    "cpu_model": "Intel(R) Xeon(R) Platinum 8259CL CPU @ 2.50GHz",
+    "gpu_count": 1,
+    "gpu_model": "1 x Tesla T4",
+    "ram_total_size": 15.324607849121094,
+    "tracking_mode": "machine",
+    "on_cloud": "N",
+    "pue": 1
+  },
+  "api_route": "/text",
+  "dataset_config": {
+    "dataset_name": "QuotaClimat/frugalaichallenge-text-train",
+    "test_size": 0.2,
+    "test_seed": 42
+  }
+}

tasks/custom_classifiers.py CHANGED Viewed

@@ -2,6 +2,7 @@ from transformers import RobertaModel, AutoTokenizer
 from transformers.modeling_outputs import SequenceClassifierOutput
 from huggingface_hub import PyTorchModelHubMixin
 from torch.nn import CrossEntropyLoss
 import torch.nn as nn
 import torch
@@ -25,4 +26,74 @@ class SentenceBERTClassifier(nn.Module, PyTorchModelHubMixin):
             logits=logits,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
-        )

 from transformers.modeling_outputs import SequenceClassifierOutput
 from huggingface_hub import PyTorchModelHubMixin
 from torch.nn import CrossEntropyLoss
+import torch.nn.functional as F
 import torch.nn as nn
 import torch
             logits=logits,
             hidden_states=outputs.hidden_states,
             attentions=outputs.attentions,
+        )
+class DenseBlock(nn.Module):
+    def __init__(self, input_size, output_size, dropout_rate):
+        super(DenseBlock, self).__init__()
+        self.linear = nn.Linear(input_size, output_size)
+        self.batch_norm = nn.BatchNorm1d(output_size)
+        self.activation = nn.ReLU()
+        self.dropout = nn.Dropout(dropout_rate)
+    def forward(self, input):
+        output = self.linear(input)
+        output = self.batch_norm(output)
+        output = self.activation(output)
+        output = self.dropout(output)
+        return output
+class FeedForwardExpert(nn.Module):
+    def __init__(self, dropout_rate, num_labels=8):
+        super(FeedForwardExpert, self).__init__()
+        # Define the dense blocks
+        self.block_1 = DenseBlock(768, 400, dropout_rate)
+        self.block_2 = DenseBlock(400, 200, dropout_rate)
+        self.final_layer = nn.Linear(200, num_labels)
+        self.initialize_weights()
+    def forward(self, input):
+        output = self.block_1(input)
+        output = self.block_2(output)
+        output = self.final_layer(output)
+        return output
+    def initialize_weights(self):
+        for m in self.modules():
+            if isinstance(m, nn.Linear):
+                nn.init.xavier_uniform_(m.weight)
+                if m.bias is not None:
+                    nn.init.zeros_(m.bias)
+class MoEClassifier(nn.Module):
+    def __init__(self, num_experts, dropout_rate=0.1, gate_hidden_size = 128):
+      super(MoEClassifier, self).__init__()
+      self.dropout = dropout_rate
+      self.num_experts = num_experts
+      self.gate_hidden_size = gate_hidden_size
+      # Create a list of feedforward experts
+      self.experts = nn.ModuleList([FeedForwardExpert(self.dropout) for _ in range(self.num_experts)])
+      # A gating network
+      self.gate_fc1 = nn.Linear(768, self.gate_hidden_size)
+      self.gate_fc2 = nn.Linear(self.gate_hidden_size, self.num_experts)
+    def forward(self, x):
+      # Calculate gating weights
+      gate_hidden = F.relu(self.gate_fc1(x))
+      weights = F.softmax(self.gate_fc2(gate_hidden), dim=1).unsqueeze(2)
+      # Get outputs from all experts
+      outputs = torch.stack([expert(x) for expert in self.experts], dim=2)
+      # apply weights using a batch matrix multiplication
+      weighted_outputs = torch.bmm(outputs, weights).squeeze(2)
+      return weighted_outputs

tasks/text.py CHANGED Viewed

@@ -15,7 +15,7 @@ from .utils.evaluation import TextEvaluationRequest
 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 from .utils.text_preprocessor import preprocess
 from accelerate.test_utils.testing import get_backend
-from custom_classifiers import SentenceBERTClassifier
 router = APIRouter()
@@ -27,7 +27,8 @@ models_descriptions = {
     "tfidf_xgb": "TF-IDF vectorizer and XGBoost classifier", # Submitted
     "bert_base_pruned": "Pruned BERT base model", # Submitted
     'climate_bert_pruned': "Fine-tuned and pruned DistilRoBERTa pre-trained on climate texts", # Not working
-    "sbert_distilroberta": "Fine-tuned sentence transformer DistilRoBERTa"
 }
@@ -75,7 +76,6 @@ class TextDataset(Dataset):
     def __len__(self) -> int:
         return len(self.texts)
 def bert_classifier(test_dataset: dict, model: str):
@@ -116,6 +116,42 @@ def bert_classifier(test_dataset: dict, model: str):
     return predictions
 @router.post(ROUTE, tags=["Text Task"])
 async def evaluate_text(request: TextEvaluationRequest,
@@ -170,6 +206,8 @@ async def evaluate_text(request: TextEvaluationRequest,
         predictions = tree_classifier(test_dataset, model='xgb_pipeline')
     elif 'bert' in model:
         predictions = bert_classifier(test_dataset, model)
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE

 from .utils.emissions import tracker, clean_emissions_data, get_space_info
 from .utils.text_preprocessor import preprocess
 from accelerate.test_utils.testing import get_backend
+from custom_classifiers import SentenceBERTClassifier, MoEClassifier
 router = APIRouter()
     "tfidf_xgb": "TF-IDF vectorizer and XGBoost classifier", # Submitted
     "bert_base_pruned": "Pruned BERT base model", # Submitted
     'climate_bert_pruned': "Fine-tuned and pruned DistilRoBERTa pre-trained on climate texts", # Not working
+    "sbert_distilroberta": "Fine-tuned sentence transformer DistilRoBERTa",
+    "embedding_moe": "Mixture of expert classifier with DistilBERT Embeddings"
 }
     def __len__(self) -> int:
         return len(self.texts)
 def bert_classifier(test_dataset: dict, model: str):
     return predictions
+def moe_classifier(test_dataset: dict, model: str):
+    print("Starting MoE run")
+    texts = test_dataset["quote"]
+    model_path = f"theterryzhang/frugal_ai_{model}/0131_MoE_final.pt"
+    embedding_model = AutoModel.from_pretrained("sentence-transformers/all-distilroberta-v1")
+    tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-distilroberta-v1")
+    dataset = TextDataset(texts, tokenizer=tokenizer, max_length=512)
+    dataloader = DataLoader(dataset, batch_size=64, shuffle=False)
+    # Use CUDA if available
+    device, _, _ = get_backend()
+    model = MoEClassifier(3, 0.05)
+    model.load_state_dict(torch.load(model_path))
+    model = model.to(device)
+    print("Starting MoE Classifier")
+    model.eval()
+    with torch.no_grad():
+        predictions = np.array([])
+        for batch in dataloader:
+            input_ids = batch['input_ids'].to(device)
+            attn_mask = batch['attention_mask'].to(device)
+            embedding_outputs = embedding_model(input_ids, attn_mask)
+            embeddings = embedding_outputs.last_hidden_state[:, 0, :]
+            outputs = model(embeddings)
+            p = torch.argmax(outputs.logits, dim=1)
+            predictions = np.append(predictions, p.cpu().numpy())
+    print("Finished running MoE Classifier")
+    return predictions
 @router.post(ROUTE, tags=["Text Task"])
 async def evaluate_text(request: TextEvaluationRequest,
         predictions = tree_classifier(test_dataset, model='xgb_pipeline')
     elif 'bert' in model:
         predictions = bert_classifier(test_dataset, model)
+    elif 'moe' in model:
+        predictions = moe_classifier(test_dataset, model)
     #--------------------------------------------------------------------------------------------
     # YOUR MODEL INFERENCE STOPS HERE