Spaces:
Runtime error
Runtime error
File size: 887 Bytes
f5ea7e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, Trainer, TrainingArguments
model_name = "ai4bharat/indic-bert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
dataset = load_dataset("json", data_files="qa_dataset.json")
def preprocess(examples):
inputs = tokenizer(examples['question'], examples['context'], truncation=True, padding='max_length')
return inputs
dataset = dataset.map(preprocess, batched=True)
training_args = TrainingArguments(
output_dir="./model",
evaluation_strategy="no",
per_device_train_batch_size=4,
num_train_epochs=3
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset['train']
)
trainer.train()
model.save_pretrained("./model")
tokenizer.save_pretrained("./model")
|