|
|
|
|
|
import json |
|
from typing import List |
|
import gradio as gr |
|
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs |
|
|
|
|
|
with open("test.json", "r") as read_file: |
|
test = json.load(read_file) |
|
|
|
|
|
with open("konbert-export-a07a2fb8c3174.json", "r") as json_file: |
|
train_data = json.load(json_file) |
|
|
|
|
|
adapted_data = [] |
|
for paragraph in train_data: |
|
qas_list = [] |
|
if "answers" in paragraph and "text" in paragraph["answers"] and "answer_start" in paragraph["answers"]: |
|
for i in range(len(paragraph["answers"]["text"])): |
|
answer_text = paragraph["answers"]["text"][i].strip() |
|
if answer_text: |
|
qa_dict = { |
|
"id": f"{paragraph['id']}_{i}", |
|
"question": paragraph.get("question", ""), |
|
"answers": [{"text": answer_text, "answer_start": paragraph["answers"]["answer_start"][i]}] |
|
} |
|
qas_list.append(qa_dict) |
|
|
|
if qas_list: |
|
adapted_data.append({ |
|
"context": paragraph.get("context", ""), |
|
"qas": qas_list |
|
}) |
|
|
|
|
|
model_args = QuestionAnsweringArgs() |
|
model_args.train_batch_size = 16 |
|
model_args.evaluate_during_training = True |
|
model_args.n_best_size = 3 |
|
model_args.num_train_epochs = 5 |
|
|
|
|
|
model = QuestionAnsweringModel('bert', 'bert-base-uncased', use_cuda=False, args={'overwrite_output_dir': True, 'num_train_epochs': 20}) |
|
model.train_model(adapted_data, num_train_epochs=20) |
|
model.save_model(f"outputs/bert/final_model") |
|
|
|
|
|
|
|
|
|
def chatbot(question): |
|
to_predict = [ |
|
{ |
|
"context": "The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.", |
|
"qas": [ |
|
{ |
|
"question": question, |
|
"id": "user_question", |
|
} |
|
], |
|
} |
|
] |
|
|
|
answers, probabilities = model.predict(to_predict) |
|
top_answer = answers[0]['answer'][0] |
|
return top_answer |
|
|
|
|
|
iface = gr.Interface( |
|
fn=chatbot, |
|
inputs="text", |
|
outputs="text", |
|
live=True, |
|
title="Chatbot Interface", |
|
description="Ask a question about the Normans", |
|
) |
|
|
|
|
|
iface.launch() |
|
|