Test_Question_Ai / train.py
Nurisslam's picture
Rename inference.py to train.py
f5ea7e7 verified
raw
history blame contribute delete
887 Bytes
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForQuestionAnswering, Trainer, TrainingArguments
model_name = "ai4bharat/indic-bert"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForQuestionAnswering.from_pretrained(model_name)
dataset = load_dataset("json", data_files="qa_dataset.json")
def preprocess(examples):
inputs = tokenizer(examples['question'], examples['context'], truncation=True, padding='max_length')
return inputs
dataset = dataset.map(preprocess, batched=True)
training_args = TrainingArguments(
output_dir="./model",
evaluation_strategy="no",
per_device_train_batch_size=4,
num_train_epochs=3
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset['train']
)
trainer.train()
model.save_pretrained("./model")
tokenizer.save_pretrained("./model")