updated again idk what
Browse files- train_llama.py +3 -1
train_llama.py
CHANGED
@@ -59,7 +59,9 @@ def tokenize_data(example):
|
|
59 |
}
|
60 |
|
61 |
tokenized_dataset = dataset["train"].map(tokenize_data, batched=False, remove_columns=dataset["train"].column_names)
|
62 |
-
|
|
|
|
|
63 |
|
64 |
# Data collator
|
65 |
def custom_data_collator(features):
|
|
|
59 |
}
|
60 |
|
61 |
tokenized_dataset = dataset["train"].map(tokenize_data, batched=False, remove_columns=dataset["train"].column_names)
|
62 |
+
# Fix print to handle potential list or tensor
|
63 |
+
first_example = tokenized_dataset[0]
|
64 |
+
print("First tokenized example:", {k: (type(v), v.shape if hasattr(v, 'shape') else len(v)) for k, v in first_example.items()})
|
65 |
|
66 |
# Data collator
|
67 |
def custom_data_collator(features):
|