Tonic commited on
Commit
a8d5c7d
·
unverified ·
1 Parent(s): 889d09f

Set all essential configuration parameters without bias-related settings

Browse files
Files changed (1) hide show
  1. tasks/text.py +20 -29
tasks/text.py CHANGED
@@ -15,31 +15,6 @@ router = APIRouter()
15
  DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
16
  ROUTE = "/text"
17
 
18
- class ModernBertConfig(PretrainedConfig):
19
- model_type = "modernbert"
20
-
21
- def __init__(
22
- self,
23
- vocab_size=50368,
24
- hidden_size=768,
25
- num_hidden_layers=22,
26
- num_attention_heads=12,
27
- intermediate_size=1152,
28
- max_position_embeddings=8192,
29
- layer_norm_eps=1e-5,
30
- classifier_dropout=0.0,
31
- **kwargs
32
- ):
33
- super().__init__(**kwargs)
34
- self.vocab_size = vocab_size
35
- self.hidden_size = hidden_size
36
- self.num_hidden_layers = num_hidden_layers
37
- self.num_attention_heads = num_attention_heads
38
- self.intermediate_size = intermediate_size
39
- self.max_position_embeddings = max_position_embeddings
40
- self.layer_norm_eps = layer_norm_eps
41
- self.classifier_dropout = classifier_dropout
42
-
43
 
44
  @router.post(ROUTE, tags=["Text Task"],
45
  description=DESCRIPTION)
@@ -79,24 +54,40 @@ async def evaluate_text(request: TextEvaluationRequest):
79
  # MODEL INFERENCE CODE
80
  #--------------------------------------------------------------------------------------------
81
 
 
82
  try:
83
  # Set device
84
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
85
 
86
  # Model and tokenizer paths
87
  model_name = "Tonic/climate-guard-toxic-agent"
 
88
 
89
- # Create custom config
90
  config = ModernBertConfig(
 
 
 
 
 
 
 
 
91
  num_labels=8,
92
  id2label={str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
93
- label2id=LABEL_MAPPING
 
 
 
 
 
 
94
  )
95
 
96
  # Load tokenizer
97
- tokenizer = AutoTokenizer.from_pretrained(model_name)
98
 
99
- # Load model with custom config
100
  model = AutoModelForSequenceClassification.from_pretrained(
101
  model_name,
102
  config=config,
 
15
  DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
16
  ROUTE = "/text"
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  @router.post(ROUTE, tags=["Text Task"],
20
  description=DESCRIPTION)
 
54
  # MODEL INFERENCE CODE
55
  #--------------------------------------------------------------------------------------------
56
 
57
+
58
  try:
59
  # Set device
60
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
61
 
62
  # Model and tokenizer paths
63
  model_name = "Tonic/climate-guard-toxic-agent"
64
+ tokenizer_name = "answerdotai/ModernBERT-base"
65
 
66
+ # Create ModernBERT config
67
  config = ModernBertConfig(
68
+ vocab_size=50368,
69
+ hidden_size=768,
70
+ num_hidden_layers=22,
71
+ num_attention_heads=12,
72
+ intermediate_size=1152,
73
+ max_position_embeddings=8192,
74
+ layer_norm_eps=1e-5,
75
+ classifier_dropout=0.0,
76
  num_labels=8,
77
  id2label={str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
78
+ label2id=LABEL_MAPPING,
79
+ problem_type="single_label_classification",
80
+ classifier_activation="gelu",
81
+ classifier_pooling="mean",
82
+ attention_dropout=0.0,
83
+ embedding_dropout=0.0,
84
+ mlp_dropout=0.0
85
  )
86
 
87
  # Load tokenizer
88
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
89
 
90
+ # Load model with config
91
  model = AutoModelForSequenceClassification.from_pretrained(
92
  model_name,
93
  config=config,