alexkueck commited on
Commit
d7d2637
·
1 Parent(s): 78956b3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -8
app.py CHANGED
@@ -103,6 +103,7 @@ tokenized_datasets = dataset_neu.map(tokenize_function, batched=True, num_proc=4
103
  #small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
104
  #small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
105
 
 
106
  print (tokenized_datasets["train"][4])
107
 
108
 
@@ -134,19 +135,18 @@ metric = evaluate.load("accuracy") # 3 Arten von gegebener Metrik: f1 oder ro
134
  ####################################################
135
  #Training
136
  ####################################################
137
-
138
  #Training Args
139
  batch_size = 2
140
 
141
  training_args = TrainingArguments(
142
- #output_dir="alexkueck/test-tis-1",
143
- output_dir="model",
144
  overwrite_output_dir = 'True',
145
  per_device_train_batch_size=batch_size, #batch_size = 2 for full training
146
  per_device_eval_batch_size=batch_size,
147
- evaluation_strategy = "steps", #oder
148
- logging_strategy="steps", #oder epoch
149
- logging_steps=10,
150
  logging_dir='logs',
151
  learning_rate=2e-5,
152
  weight_decay=0.01,
@@ -165,10 +165,11 @@ training_args = TrainingArguments(
165
  load_best_model_at_end=False,
166
  #push_to_hub=True,
167
  )
168
- print ("training args")
169
  ############################################
170
  #def trainieren_neu(name):
171
  #Trainer zusammenstellen
 
172
  trainer = Trainer(
173
  model=model,
174
  args=training_args,
@@ -177,7 +178,25 @@ trainer = Trainer(
177
  #tokenizer=tokenizer,
178
  compute_metrics=compute_metrics,
179
  )
180
- print ("trainer")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  #trainer ausführen
182
  trainer.train()
183
  #Wenn man vom letzten checkpoint aus weiter trainieren möchte: trainer.train(resume_from_checkpoint=True)
 
103
  #small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
104
  #small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
105
 
106
+ #Probe ansehen - zum überprüfen...
107
  print (tokenized_datasets["train"][4])
108
 
109
 
 
135
  ####################################################
136
  #Training
137
  ####################################################
138
+ print ("training args")
139
  #Training Args
140
  batch_size = 2
141
 
142
  training_args = TrainingArguments(
143
+ output_dir="alexkueck/li-tis-tuned-1",
 
144
  overwrite_output_dir = 'True',
145
  per_device_train_batch_size=batch_size, #batch_size = 2 for full training
146
  per_device_eval_batch_size=batch_size,
147
+ evaluation_strategy = "epoch", #oder steps
148
+ logging_strategy="epoch", #oder steps
149
+ #logging_steps=10,
150
  logging_dir='logs',
151
  learning_rate=2e-5,
152
  weight_decay=0.01,
 
165
  load_best_model_at_end=False,
166
  #push_to_hub=True,
167
  )
168
+
169
  ############################################
170
  #def trainieren_neu(name):
171
  #Trainer zusammenstellen
172
+ print ("trainer")
173
  trainer = Trainer(
174
  model=model,
175
  args=training_args,
 
178
  #tokenizer=tokenizer,
179
  compute_metrics=compute_metrics,
180
  )
181
+
182
+ ###############################################
183
+ #Special QA Trainer...#
184
+ '''
185
+ trainer = QuestionAnsweringTrainer(
186
+ model=model,
187
+ args=training_args,
188
+ train_dataset=train_dataset if training_args.do_train else None,
189
+ eval_dataset=eval_dataset if training_args.do_eval else None,
190
+ eval_examples=eval_examples if training_args.do_eval else None,
191
+ tokenizer=tokenizer,
192
+ data_collator=data_collator,
193
+ post_process_function=post_processing_function,
194
+ compute_metrics=compute_metrics,
195
+ )
196
+ '''
197
+ #################################################
198
+
199
+
200
  #trainer ausführen
201
  trainer.train()
202
  #Wenn man vom letzten checkpoint aus weiter trainieren möchte: trainer.train(resume_from_checkpoint=True)