frugal-ai-submission-template

Sleeping

App Files Files Community

Tonic commited on Feb 10

Commit

a8d5c7d

unverified ·

1 Parent(s): 889d09f

Set all essential configuration parameters without bias-related settings

Browse files

Files changed (1) hide show

tasks/text.py +20 -29

tasks/text.py CHANGED Viewed

@@ -15,31 +15,6 @@ router = APIRouter()
 DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
 ROUTE = "/text"
-class ModernBertConfig(PretrainedConfig):
-    model_type = "modernbert"
-    def __init__(
-        self,
-        vocab_size=50368,
-        hidden_size=768,
-        num_hidden_layers=22,
-        num_attention_heads=12,
-        intermediate_size=1152,
-        max_position_embeddings=8192,
-        layer_norm_eps=1e-5,
-        classifier_dropout=0.0,
-        **kwargs
-    ):
-        super().__init__(**kwargs)
-        self.vocab_size = vocab_size
-        self.hidden_size = hidden_size
-        self.num_hidden_layers = num_hidden_layers
-        self.num_attention_heads = num_attention_heads
-        self.intermediate_size = intermediate_size
-        self.max_position_embeddings = max_position_embeddings
-        self.layer_norm_eps = layer_norm_eps
-        self.classifier_dropout = classifier_dropout
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
@@ -79,24 +54,40 @@ async def evaluate_text(request: TextEvaluationRequest):
     # MODEL INFERENCE CODE
     #--------------------------------------------------------------------------------------------
     try:
         # Set device
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Model and tokenizer paths
         model_name = "Tonic/climate-guard-toxic-agent"
-        # Create custom config
         config = ModernBertConfig(
             num_labels=8,
             id2label={str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
-            label2id=LABEL_MAPPING
         )
         # Load tokenizer
-        tokenizer = AutoTokenizer.from_pretrained(model_name)
-        # Load model with custom config
         model = AutoModelForSequenceClassification.from_pretrained(
             model_name,
             config=config,

 DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
 ROUTE = "/text"
 @router.post(ROUTE, tags=["Text Task"],
              description=DESCRIPTION)
     # MODEL INFERENCE CODE
     #--------------------------------------------------------------------------------------------
     try:
         # Set device
         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         # Model and tokenizer paths
         model_name = "Tonic/climate-guard-toxic-agent"
+        tokenizer_name = "answerdotai/ModernBERT-base"
+        # Create ModernBERT config
         config = ModernBertConfig(
+            vocab_size=50368,
+            hidden_size=768,
+            num_hidden_layers=22,
+            num_attention_heads=12,
+            intermediate_size=1152,
+            max_position_embeddings=8192,
+            layer_norm_eps=1e-5,
+            classifier_dropout=0.0,
             num_labels=8,
             id2label={str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
+            label2id=LABEL_MAPPING,
+            problem_type="single_label_classification",
+            classifier_activation="gelu",
+            classifier_pooling="mean",
+            attention_dropout=0.0,
+            embedding_dropout=0.0,
+            mlp_dropout=0.0
         )
         # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
+        # Load model with config
         model = AutoModelForSequenceClassification.from_pretrained(
             model_name,
             config=config,