UserAgentII commited on
Commit
118e85e
·
verified ·
1 Parent(s): d54fc41

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -0
app.py CHANGED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #made by gpt
2
+
3
+ from datasets import load_dataset
4
+ from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments
5
+ import torch
6
+
7
+ # Load a small dataset (IMDB with just a few samples for quick testing)
8
+ dataset = load_dataset("imdb", split='train[:2%]').train_test_split(test_size=0.2)
9
+
10
+ # Tokenizer and model
11
+ tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
12
+ model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)
13
+
14
+ # Tokenize the dataset
15
+ def tokenize(batch):
16
+ return tokenizer(batch['text'], padding=True, truncation=True)
17
+
18
+ tokenized_dataset = dataset.map(tokenize, batched=True)
19
+ tokenized_dataset = tokenized_dataset.rename_column("label", "labels")
20
+
21
+ # Training arguments
22
+ training_args = TrainingArguments(
23
+ output_dir="./results",
24
+ evaluation_strategy="epoch",
25
+ per_device_train_batch_size=4,
26
+ per_device_eval_batch_size=4,
27
+ num_train_epochs=1,
28
+ logging_steps=10,
29
+ save_steps=10,
30
+ report_to="none"
31
+ )
32
+
33
+ # Trainer
34
+ trainer = Trainer(
35
+ model=model,
36
+ args=training_args,
37
+ train_dataset=tokenized_dataset["train"],
38
+ eval_dataset=tokenized_dataset["test"]
39
+ )
40
+
41
+ # Train the model
42
+ trainer.train()
43
+
44
+ # Save model
45
+ trainer.save_model("my-simple-sentiment-model")