laureBe commited on
Commit
945b47e
·
verified ·
1 Parent(s): 9685f7b

Update tasks/text.py

Browse files
Files changed (1) hide show
  1. tasks/text.py +14 -3
tasks/text.py CHANGED
@@ -2,7 +2,9 @@ from fastapi import APIRouter
2
  from datetime import datetime
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
5
- import random
 
 
6
 
7
  from .utils.evaluation import TextEvaluationRequest
8
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
@@ -47,6 +49,12 @@ async def evaluate_text(request: TextEvaluationRequest):
47
  train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
48
  test_dataset = train_test["test"]
49
 
 
 
 
 
 
 
50
  # Start tracking emissions
51
  tracker.start()
52
  tracker.start_task("inference")
@@ -58,8 +66,11 @@ async def evaluate_text(request: TextEvaluationRequest):
58
 
59
  # Make random predictions (placeholder for actual model inference)
60
  true_labels = test_dataset["label"]
61
- predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
62
-
 
 
 
63
  #--------------------------------------------------------------------------------------------
64
  # YOUR MODEL INFERENCE STOPS HERE
65
  #--------------------------------------------------------------------------------------------
 
2
  from datetime import datetime
3
  from datasets import load_dataset
4
  from sklearn.metrics import accuracy_score
5
+ from sklearn.linear_model import LogisticRegression
6
+ from sklearn.feature_extraction.text import TfidfVectorizer
7
+ from sklearn.model_selection import train_test_split
8
 
9
  from .utils.evaluation import TextEvaluationRequest
10
  from .utils.emissions import tracker, clean_emissions_data, get_space_info
 
49
  train_test = dataset["train"].train_test_split(test_size=request.test_size, seed=request.test_seed)
50
  test_dataset = train_test["test"]
51
 
52
+ tfidf_vect = TfidfVectorizer(stop_words = 'english')
53
+
54
+ tfidf_train = tfidf_vect.fit_transform(train_dataset['quote'])
55
+ tfidf_test = tfidf_vect.transform(test_dataset['quote'])
56
+
57
+
58
  # Start tracking emissions
59
  tracker.start()
60
  tracker.start_task("inference")
 
66
 
67
  # Make random predictions (placeholder for actual model inference)
68
  true_labels = test_dataset["label"]
69
+ LR = LogisticRegression(class_weight='balanced', max_iter=20, random_state=1234,
70
+ solver='liblinear')
71
+ LR.fit(pd.DataFrame.sparse.from_spmatrix(tfidf_train), pd.DataFrame(y_train_v))
72
+ predictions=LR.predict(pd.DataFrame.sparse.from_spmatrix(tfidf_test))
73
+
74
  #--------------------------------------------------------------------------------------------
75
  # YOUR MODEL INFERENCE STOPS HERE
76
  #--------------------------------------------------------------------------------------------