Spaces:
Sleeping
Sleeping
Set all essential configuration parameters without bias-related settings
Browse files- tasks/text.py +20 -29
tasks/text.py
CHANGED
@@ -15,31 +15,6 @@ router = APIRouter()
|
|
15 |
DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
|
16 |
ROUTE = "/text"
|
17 |
|
18 |
-
class ModernBertConfig(PretrainedConfig):
|
19 |
-
model_type = "modernbert"
|
20 |
-
|
21 |
-
def __init__(
|
22 |
-
self,
|
23 |
-
vocab_size=50368,
|
24 |
-
hidden_size=768,
|
25 |
-
num_hidden_layers=22,
|
26 |
-
num_attention_heads=12,
|
27 |
-
intermediate_size=1152,
|
28 |
-
max_position_embeddings=8192,
|
29 |
-
layer_norm_eps=1e-5,
|
30 |
-
classifier_dropout=0.0,
|
31 |
-
**kwargs
|
32 |
-
):
|
33 |
-
super().__init__(**kwargs)
|
34 |
-
self.vocab_size = vocab_size
|
35 |
-
self.hidden_size = hidden_size
|
36 |
-
self.num_hidden_layers = num_hidden_layers
|
37 |
-
self.num_attention_heads = num_attention_heads
|
38 |
-
self.intermediate_size = intermediate_size
|
39 |
-
self.max_position_embeddings = max_position_embeddings
|
40 |
-
self.layer_norm_eps = layer_norm_eps
|
41 |
-
self.classifier_dropout = classifier_dropout
|
42 |
-
|
43 |
|
44 |
@router.post(ROUTE, tags=["Text Task"],
|
45 |
description=DESCRIPTION)
|
@@ -79,24 +54,40 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
79 |
# MODEL INFERENCE CODE
|
80 |
#--------------------------------------------------------------------------------------------
|
81 |
|
|
|
82 |
try:
|
83 |
# Set device
|
84 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
85 |
|
86 |
# Model and tokenizer paths
|
87 |
model_name = "Tonic/climate-guard-toxic-agent"
|
|
|
88 |
|
89 |
-
# Create
|
90 |
config = ModernBertConfig(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
91 |
num_labels=8,
|
92 |
id2label={str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
|
93 |
-
label2id=LABEL_MAPPING
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
)
|
95 |
|
96 |
# Load tokenizer
|
97 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
98 |
|
99 |
-
# Load model with
|
100 |
model = AutoModelForSequenceClassification.from_pretrained(
|
101 |
model_name,
|
102 |
config=config,
|
|
|
15 |
DESCRIPTION = "Climate Guard Toxic Agent is a ModernBERT for Climate Disinformation Detection"
|
16 |
ROUTE = "/text"
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
@router.post(ROUTE, tags=["Text Task"],
|
20 |
description=DESCRIPTION)
|
|
|
54 |
# MODEL INFERENCE CODE
|
55 |
#--------------------------------------------------------------------------------------------
|
56 |
|
57 |
+
|
58 |
try:
|
59 |
# Set device
|
60 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
61 |
|
62 |
# Model and tokenizer paths
|
63 |
model_name = "Tonic/climate-guard-toxic-agent"
|
64 |
+
tokenizer_name = "answerdotai/ModernBERT-base"
|
65 |
|
66 |
+
# Create ModernBERT config
|
67 |
config = ModernBertConfig(
|
68 |
+
vocab_size=50368,
|
69 |
+
hidden_size=768,
|
70 |
+
num_hidden_layers=22,
|
71 |
+
num_attention_heads=12,
|
72 |
+
intermediate_size=1152,
|
73 |
+
max_position_embeddings=8192,
|
74 |
+
layer_norm_eps=1e-5,
|
75 |
+
classifier_dropout=0.0,
|
76 |
num_labels=8,
|
77 |
id2label={str(i): label for i, label in enumerate(LABEL_MAPPING.keys())},
|
78 |
+
label2id=LABEL_MAPPING,
|
79 |
+
problem_type="single_label_classification",
|
80 |
+
classifier_activation="gelu",
|
81 |
+
classifier_pooling="mean",
|
82 |
+
attention_dropout=0.0,
|
83 |
+
embedding_dropout=0.0,
|
84 |
+
mlp_dropout=0.0
|
85 |
)
|
86 |
|
87 |
# Load tokenizer
|
88 |
+
tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
|
89 |
|
90 |
+
# Load model with config
|
91 |
model = AutoModelForSequenceClassification.from_pretrained(
|
92 |
model_name,
|
93 |
config=config,
|