File size: 3,064 Bytes
b61b37c
 
 
 
28420a0
b61b37c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4cbe3a9
 
b61b37c
 
 
28420a0
b61b37c
28420a0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b61b37c
28420a0
 
 
 
 
 
 
 
 
b61b37c
af030cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# app.py

import json
from typing import List
import gradio as gr
from simpletransformers.question_answering import QuestionAnsweringModel, QuestionAnsweringArgs

# Load test data
with open("test.json", "r") as read_file:
    test = json.load(read_file)

# Load train data
with open("konbert-export-a07a2fb8c3174.json", "r") as json_file:
    train_data = json.load(json_file)

# Adapt the training data
adapted_data = []
for paragraph in train_data:
    qas_list = []
    if "answers" in paragraph and "text" in paragraph["answers"] and "answer_start" in paragraph["answers"]:
        for i in range(len(paragraph["answers"]["text"])):
            answer_text = paragraph["answers"]["text"][i].strip()
            if answer_text:
                qa_dict = {
                    "id": f"{paragraph['id']}_{i}",
                    "question": paragraph.get("question", ""),
                    "answers": [{"text": answer_text, "answer_start": paragraph["answers"]["answer_start"][i]}]
                }
                qas_list.append(qa_dict)

        if qas_list:
            adapted_data.append({
                "context": paragraph.get("context", ""),
                "qas": qas_list
            })

# Model training arguments
model_args = QuestionAnsweringArgs()
model_args.train_batch_size = 16
model_args.evaluate_during_training = True
model_args.n_best_size = 3
model_args.num_train_epochs = 5

# Model definition
model = QuestionAnsweringModel('bert', 'bert-base-uncased', use_cuda=False, args={'overwrite_output_dir': True, 'num_train_epochs': 15})
model.train_model(adapted_data, num_train_epochs=15)
model.save_model(f"outputs/bert/final_model")



# Gradio interface function
def chatbot(question):
    to_predict = [
        {
            "context": "The Normans (Norman: Nourmands; French: Normands; Latin: Normanni) were the people who in the 10th and 11th centuries gave their name to Normandy, a region in France. They were descended from Norse (\"Norman\" comes from \"Norseman\") raiders and pirates from Denmark, Iceland and Norway who, under their leader Rollo, agreed to swear fealty to King Charles III of West Francia. Through generations of assimilation and mixing with the native Frankish and Roman-Gaulish populations, their descendants would gradually merge with the Carolingian-based cultures of West Francia. The distinct cultural and ethnic identity of the Normans emerged initially in the first half of the 10th century, and it continued to evolve over the succeeding centuries.",
            "qas": [
                {
                    "question": question,
                    "id": "user_question",
                }
            ],
        }
    ]

    answers, probabilities = model.predict(to_predict)
    top_answer = answers[0]['answer'][0]
    return top_answer

# Gradio interface setup
iface = gr.Interface(
    fn=chatbot,
    inputs="text",
    outputs="text",
    live=True,
    title="Chatbot Interface",
    description="Ask a question about the Normans",
)

# Launch Gradio interface
iface.launch(share=True)