Update tasks/text.py
Browse files- tasks/text.py +14 -3
tasks/text.py
CHANGED
@@ -2,7 +2,9 @@ from fastapi import APIRouter
|
|
2 |
from datetime import datetime
|
3 |
from datasets import load_dataset
|
4 |
from sklearn.metrics import accuracy_score
|
5 |
-
import
|
|
|
|
|
6 |
|
7 |
from .utils.evaluation import TextEvaluationRequest
|
8 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
@@ -47,6 +49,12 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
47 |
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
48 |
test_dataset = train_test["test"]
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
# Start tracking emissions
|
51 |
tracker.start()
|
52 |
tracker.start_task("inference")
|
@@ -58,8 +66,11 @@ async def evaluate_text(request: TextEvaluationRequest):
|
|
58 |
|
59 |
# Make random predictions (placeholder for actual model inference)
|
60 |
true_labels = test_dataset["label"]
|
61 |
-
|
62 |
-
|
|
|
|
|
|
|
63 |
#--------------------------------------------------------------------------------------------
|
64 |
# YOUR MODEL INFERENCE STOPS HERE
|
65 |
#--------------------------------------------------------------------------------------------
|
|
|
2 |
from datetime import datetime
|
3 |
from datasets import load_dataset
|
4 |
from sklearn.metrics import accuracy_score
|
5 |
+
from sklearn.linear_model import LogisticRegression
|
6 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
7 |
+
from sklearn.model_selection import train_test_split
|
8 |
|
9 |
from .utils.evaluation import TextEvaluationRequest
|
10 |
from .utils.emissions import tracker, clean_emissions_data, get_space_info
|
|
|
49 |
train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
|
50 |
test_dataset = train_test["test"]
|
51 |
|
52 |
+
tfidf_vect = TfidfVectorizer(stop_words = 'english')
|
53 |
+
|
54 |
+
tfidf_train = tfidf_vect.fit_transform(train_dataset['quote'])
|
55 |
+
tfidf_test = tfidf_vect.transform(test_dataset['quote'])
|
56 |
+
|
57 |
+
|
58 |
# Start tracking emissions
|
59 |
tracker.start()
|
60 |
tracker.start_task("inference")
|
|
|
66 |
|
67 |
# Make random predictions (placeholder for actual model inference)
|
68 |
true_labels = test_dataset["label"]
|
69 |
+
LR = LogisticRegression(class_weight='balanced', max_iter=20, random_state=1234,
|
70 |
+
solver='liblinear')
|
71 |
+
LR.fit(pd.DataFrame.sparse.from_spmatrix(tfidf_train), pd.DataFrame(y_train_v))
|
72 |
+
predictions=LR.predict(pd.DataFrame.sparse.from_spmatrix(tfidf_test))
|
73 |
+
|
74 |
#--------------------------------------------------------------------------------------------
|
75 |
# YOUR MODEL INFERENCE STOPS HERE
|
76 |
#--------------------------------------------------------------------------------------------
|