Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -103,6 +103,7 @@ tokenized_datasets = dataset_neu.map(tokenize_function, batched=True, num_proc=4
|
|
103 |
#small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
|
104 |
#small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
|
105 |
|
|
|
106 |
print (tokenized_datasets["train"][4])
|
107 |
|
108 |
|
@@ -134,19 +135,18 @@ metric = evaluate.load("accuracy") # 3 Arten von gegebener Metrik: f1 oder ro
|
|
134 |
####################################################
|
135 |
#Training
|
136 |
####################################################
|
137 |
-
|
138 |
#Training Args
|
139 |
batch_size = 2
|
140 |
|
141 |
training_args = TrainingArguments(
|
142 |
-
|
143 |
-
output_dir="model",
|
144 |
overwrite_output_dir = 'True',
|
145 |
per_device_train_batch_size=batch_size, #batch_size = 2 for full training
|
146 |
per_device_eval_batch_size=batch_size,
|
147 |
-
evaluation_strategy = "
|
148 |
-
logging_strategy="
|
149 |
-
logging_steps=10,
|
150 |
logging_dir='logs',
|
151 |
learning_rate=2e-5,
|
152 |
weight_decay=0.01,
|
@@ -165,10 +165,11 @@ training_args = TrainingArguments(
|
|
165 |
load_best_model_at_end=False,
|
166 |
#push_to_hub=True,
|
167 |
)
|
168 |
-
|
169 |
############################################
|
170 |
#def trainieren_neu(name):
|
171 |
#Trainer zusammenstellen
|
|
|
172 |
trainer = Trainer(
|
173 |
model=model,
|
174 |
args=training_args,
|
@@ -177,7 +178,25 @@ trainer = Trainer(
|
|
177 |
#tokenizer=tokenizer,
|
178 |
compute_metrics=compute_metrics,
|
179 |
)
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
#trainer ausführen
|
182 |
trainer.train()
|
183 |
#Wenn man vom letzten checkpoint aus weiter trainieren möchte: trainer.train(resume_from_checkpoint=True)
|
|
|
103 |
#small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
|
104 |
#small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
|
105 |
|
106 |
+
#Probe ansehen - zum überprüfen...
|
107 |
print (tokenized_datasets["train"][4])
|
108 |
|
109 |
|
|
|
135 |
####################################################
|
136 |
#Training
|
137 |
####################################################
|
138 |
+
print ("training args")
|
139 |
#Training Args
|
140 |
batch_size = 2
|
141 |
|
142 |
training_args = TrainingArguments(
|
143 |
+
output_dir="alexkueck/li-tis-tuned-1",
|
|
|
144 |
overwrite_output_dir = 'True',
|
145 |
per_device_train_batch_size=batch_size, #batch_size = 2 for full training
|
146 |
per_device_eval_batch_size=batch_size,
|
147 |
+
evaluation_strategy = "epoch", #oder steps
|
148 |
+
logging_strategy="epoch", #oder steps
|
149 |
+
#logging_steps=10,
|
150 |
logging_dir='logs',
|
151 |
learning_rate=2e-5,
|
152 |
weight_decay=0.01,
|
|
|
165 |
load_best_model_at_end=False,
|
166 |
#push_to_hub=True,
|
167 |
)
|
168 |
+
|
169 |
############################################
|
170 |
#def trainieren_neu(name):
|
171 |
#Trainer zusammenstellen
|
172 |
+
print ("trainer")
|
173 |
trainer = Trainer(
|
174 |
model=model,
|
175 |
args=training_args,
|
|
|
178 |
#tokenizer=tokenizer,
|
179 |
compute_metrics=compute_metrics,
|
180 |
)
|
181 |
+
|
182 |
+
###############################################
|
183 |
+
#Special QA Trainer...#
|
184 |
+
'''
|
185 |
+
trainer = QuestionAnsweringTrainer(
|
186 |
+
model=model,
|
187 |
+
args=training_args,
|
188 |
+
train_dataset=train_dataset if training_args.do_train else None,
|
189 |
+
eval_dataset=eval_dataset if training_args.do_eval else None,
|
190 |
+
eval_examples=eval_examples if training_args.do_eval else None,
|
191 |
+
tokenizer=tokenizer,
|
192 |
+
data_collator=data_collator,
|
193 |
+
post_process_function=post_processing_function,
|
194 |
+
compute_metrics=compute_metrics,
|
195 |
+
)
|
196 |
+
'''
|
197 |
+
#################################################
|
198 |
+
|
199 |
+
|
200 |
#trainer ausführen
|
201 |
trainer.train()
|
202 |
#Wenn man vom letzten checkpoint aus weiter trainieren möchte: trainer.train(resume_from_checkpoint=True)
|