clemdesr commited on
Commit
ff046db
·
1 Parent(s): 9a3fedf

feat: distilbert model

Browse files
Files changed (2) hide show
  1. requirements.txt +5 -1
  2. tasks/text.py +45 -1
requirements.txt CHANGED
@@ -7,4 +7,8 @@ pydantic>=1.10.0
7
  python-dotenv>=1.0.0
8
  gradio>=4.0.0
9
  requests>=2.31.0
10
- librosa==0.10.2.post1
 
 
 
 
 
7
  python-dotenv>=1.0.0
8
  gradio>=4.0.0
9
  requests>=2.31.0
10
+ librosa==0.10.2.post1
11
+ llvmlite == 0.43.0
12
+ transformers
13
+ torch
14
+ transformers[torch]
tasks/text.py CHANGED
@@ -59,7 +59,51 @@ async def evaluate_text(request: TextEvaluationRequest):
59
 
60
  # Make random predictions (placeholder for actual model inference)
61
  true_labels = test_dataset["label"]
62
- predictions = [random.randint(0, 7) for _ in range(len(true_labels))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  # --------------------------------------------------------------------------------------------
65
  # YOUR MODEL INFERENCE STOPS HERE
 
59
 
60
  # Make random predictions (placeholder for actual model inference)
61
  true_labels = test_dataset["label"]
62
+
63
+ import torch
64
+ from transformers import (
65
+ AutoModelForSequenceClassification,
66
+ AutoTokenizer,
67
+ Trainer,
68
+ TrainingArguments,
69
+ )
70
+
71
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
72
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=len(LABEL_MAPPING))
73
+
74
+ # Tokenize the datasets
75
+ def tokenize_function(examples):
76
+ return tokenizer(examples["quote"], padding="max_length", truncation=True)
77
+
78
+ tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)
79
+
80
+ # Set training arguments
81
+ training_args = TrainingArguments(
82
+ output_dir="./bert_classification_results",
83
+ evaluation_strategy="epoch",
84
+ save_strategy="epoch",
85
+ learning_rate=2e-5,
86
+ per_device_train_batch_size=8,
87
+ per_device_eval_batch_size=8,
88
+ num_train_epochs=30,
89
+ weight_decay=0.01,
90
+ load_best_model_at_end=True,
91
+ use_mps_device=True,
92
+ )
93
+
94
+ # Initialize the Trainer
95
+ trainer = Trainer(
96
+ model=model,
97
+ args=training_args,
98
+ eval_dataset=tokenized_test_dataset,
99
+ tokenizer=tokenizer,
100
+ )
101
+
102
+ import numpy as np
103
+
104
+ preds = trainer.predict(tokenized_test_dataset)
105
+
106
+ predictions = np.array([np.argmax(x) for x in preds[0]])
107
 
108
  # --------------------------------------------------------------------------------------------
109
  # YOUR MODEL INFERENCE STOPS HERE