Update app.py
Browse files
app.py
CHANGED
@@ -10,7 +10,7 @@ from transformers import (
|
|
10 |
AutoModelForSpeechSeq2Seq,
|
11 |
TrainingArguments,
|
12 |
Trainer,
|
13 |
-
|
14 |
)
|
15 |
|
16 |
# ================================
|
@@ -116,16 +116,15 @@ training_args = TrainingArguments(
|
|
116 |
hub_token=HF_TOKEN,
|
117 |
)
|
118 |
|
119 |
-
#
|
120 |
-
data_collator =
|
121 |
|
122 |
-
# Define Trainer
|
123 |
trainer = Trainer(
|
124 |
model=model,
|
125 |
args=training_args,
|
126 |
train_dataset=train_dataset,
|
127 |
eval_dataset=eval_dataset,
|
128 |
-
processing_class=processor, # Fix: Replacing deprecated `tokenizer`
|
129 |
data_collator=data_collator,
|
130 |
)
|
131 |
|
|
|
10 |
AutoModelForSpeechSeq2Seq,
|
11 |
TrainingArguments,
|
12 |
Trainer,
|
13 |
+
DataCollatorSpeechSeq2Seq, # ✅ Fix: Use correct data collator
|
14 |
)
|
15 |
|
16 |
# ================================
|
|
|
116 |
hub_token=HF_TOKEN,
|
117 |
)
|
118 |
|
119 |
+
# ✅ FIX: Use correct Data Collator
|
120 |
+
data_collator = DataCollatorSpeechSeq2Seq(processor=processor, model=model, return_tensors="pt")
|
121 |
|
122 |
+
# Define Trainer
|
123 |
trainer = Trainer(
|
124 |
model=model,
|
125 |
args=training_args,
|
126 |
train_dataset=train_dataset,
|
127 |
eval_dataset=eval_dataset,
|
|
|
128 |
data_collator=data_collator,
|
129 |
)
|
130 |
|