LIFineTuned

Paused

alexkueck commited on Jun 19, 2023

Commit

50b8512

1 Parent(s): 8949dd7

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -8,6 +8,31 @@ from utils import *
 from presets import *
 from transformers import Trainer, TrainingArguments
 ######################################################################
 #Modelle und Tokenizer
@@ -80,30 +105,6 @@ def trainieren_neu():
-#####################################################
-#Hilfsfunktionen für das training
-#####################################################
-#Datensets in den Tokenizer schieben...
-def tokenize_function(examples):
-    return tokenizer(examples["text"])
-#Funktion, die den gegebenen Text aus dem Datenset gruppiert
-def group_texts(examples):
-    # Concatenate all texts.
-    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
-    total_length = len(concatenated_examples[list(examples.keys())[0]])
-    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
-    # customize this part to your needs.
-    total_length = (total_length // block_size) * block_size
-    # Split by chunks of max_len.
-    result = {
-        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
-        for k, t in concatenated_examples.items()
-    }
-    result["labels"] = result["input_ids"].copy()
-    return result

 from presets import *
 from transformers import Trainer, TrainingArguments
+#####################################################
+#Hilfsfunktionen für das training
+#####################################################
+#Datensets in den Tokenizer schieben...
+def tokenize_function(examples):
+    return tokenizer(examples["text"])
+#Funktion, die den gegebenen Text aus dem Datenset gruppiert
+def group_texts(examples):
+    # Concatenate all texts.
+    concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
+    total_length = len(concatenated_examples[list(examples.keys())[0]])
+    # We drop the small remainder, we could add padding if the model supported it instead of this drop, you can
+    # customize this part to your needs.
+    total_length = (total_length // block_size) * block_size
+    # Split by chunks of max_len.
+    result = {
+        k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
+        for k, t in concatenated_examples.items()
+    }
+    result["labels"] = result["input_ids"].copy()
+    return result
 ######################################################################
 #Modelle und Tokenizer