Spaces:
Sleeping
Sleeping
Update tasks/text.py
Browse files- tasks/text.py +21 -3
tasks/text.py
CHANGED
@@ -18,13 +18,18 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
18 |
"""
|
19 |
Evaluate text classification for climate disinformation detection.
|
20 |
|
21 |
-
Current Model:
|
22 |
-
- Makes random predictions from the label space (0-7)
|
23 |
- Used as a baseline for comparison
|
24 |
"""
|
25 |
# Get space info
|
26 |
username, space_url = get_space_info()
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
# Define the label mapping
|
29 |
LABEL_MAPPING = {
|
30 |
"0_not_relevant": 0,
|
@@ -44,9 +49,19 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
44 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
45 |
|
46 |
# Split dataset
|
|
|
47 |
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
48 |
test_dataset = train_test["test"]
|
|
|
|
|
|
|
|
|
|
|
49 |
|
|
|
|
|
|
|
|
|
50 |
# Start tracking emissions
|
51 |
tracker.start()
|
52 |
tracker.start_task("inference")
|
@@ -58,8 +73,11 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
58 |
|
59 |
# Make random predictions (placeholder for actual model inference)
|
60 |
true_labels = test_dataset["label"]
|
61 |
-
predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
|
62 |
|
|
|
|
|
|
|
|
|
63 |
#--------------------------------------------------------------------------------------------
|
64 |
# YOUR MODEL INFERENCE STOPS HERE
|
65 |
#--------------------------------------------------------------------------------------------
|
|
|
18 |
"""
|
19 |
Evaluate text classification for climate disinformation detection.
|
20 |
|
21 |
+
Current Model: Logistic regression
|
|
|
22 |
- Used as a baseline for comparison
|
23 |
"""
|
24 |
# Get space info
|
25 |
username, space_url = get_space_info()
|
26 |
|
27 |
+
from sklearn.linear_model import LogisticRegression
|
28 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
29 |
+
from sklearn.model_selection import train_test_split
|
30 |
+
from sklearn import metrics
|
31 |
+
from datetime import datetime
|
32 |
+
|
33 |
# Define the label mapping
|
34 |
LABEL_MAPPING = {
|
35 |
"0_not_relevant": 0,
|
|
|
49 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
50 |
|
51 |
# Split dataset
|
52 |
+
#train_test = dataset.train_test_split(test_size=.33, seed=42)
|
53 |
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
54 |
test_dataset = train_test["test"]
|
55 |
+
|
56 |
+
#test_dataset = train_test["test"]
|
57 |
+
#train_dataset = train_test["train"]
|
58 |
+
|
59 |
+
tfidf_vect = TfidfVectorizer(stop_words = 'english')
|
60 |
|
61 |
+
tfidf_train = tfidf_vect.fit_transform(train_dataset['quote'])
|
62 |
+
tfidf_test = tfidf_vect.transform(test_dataset['quote'])
|
63 |
+
|
64 |
+
|
65 |
# Start tracking emissions
|
66 |
tracker.start()
|
67 |
tracker.start_task("inference")
|
|
|
73 |
|
74 |
# Make random predictions (placeholder for actual model inference)
|
75 |
true_labels = test_dataset["label"]
|
|
|
76 |
|
77 |
+
LR = LogisticRegression(class_weight='balanced', max_iter=20, random_state=1234,
|
78 |
+
solver='liblinear')
|
79 |
+
LR.fit(pd.DataFrame.sparse.from_spmatrix(tfidf_train), pd.DataFrame(y_train_v))
|
80 |
+
predictions=LR.predict(pd.DataFrame.sparse.from_spmatrix(tfidf_test))
|
81 |
#--------------------------------------------------------------------------------------------
|
82 |
# YOUR MODEL INFERENCE STOPS HERE
|
83 |
#--------------------------------------------------------------------------------------------
|