Update tasks/text.py
Browse files- tasks/text.py +5 -2
tasks/text.py
CHANGED
@@ -45,10 +45,13 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
45 |
# Convert string labels to integers
|
46 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
47 |
|
|
|
|
|
48 |
# Split dataset
|
49 |
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
50 |
test_dataset = train_test["test"]
|
51 |
-
|
|
|
52 |
tfidf_vect = TfidfVectorizer(stop_words = 'english')
|
53 |
|
54 |
tfidf_train = tfidf_vect.fit_transform(train_dataset['quote'])
|
@@ -68,7 +71,7 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
68 |
true_labels = test_dataset["label"]
|
69 |
LR = LogisticRegression(class_weight='balanced', max_iter=20, random_state=1234,
|
70 |
solver='liblinear')
|
71 |
-
LR.fit(pd.DataFrame.sparse.from_spmatrix(tfidf_train), pd.DataFrame(
|
72 |
predictions=LR.predict(pd.DataFrame.sparse.from_spmatrix(tfidf_test))
|
73 |
|
74 |
#--------------------------------------------------------------------------------------------
|
|
|
45 |
# Convert string labels to integers
|
46 |
dataset = dataset.map(lambda x: {"label": LABEL_MAPPING[x["label"]]})
|
47 |
|
48 |
+
|
49 |
+
|
50 |
# Split dataset
|
51 |
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
52 |
test_dataset = train_test["test"]
|
53 |
+
train_dataset = train_test["train"]
|
54 |
+
y_train=train_dataset['label']
|
55 |
tfidf_vect = TfidfVectorizer(stop_words = 'english')
|
56 |
|
57 |
tfidf_train = tfidf_vect.fit_transform(train_dataset['quote'])
|
|
|
71 |
true_labels = test_dataset["label"]
|
72 |
LR = LogisticRegression(class_weight='balanced', max_iter=20, random_state=1234,
|
73 |
solver='liblinear')
|
74 |
+
LR.fit(pd.DataFrame.sparse.from_spmatrix(tfidf_train), pd.DataFrame(y_train))
|
75 |
predictions=LR.predict(pd.DataFrame.sparse.from_spmatrix(tfidf_test))
|
76 |
|
77 |
#--------------------------------------------------------------------------------------------
|