File size: 1,603 Bytes
a8bb7b0
b09df44
52d2c96
a8bb7b0
 
52d2c96
 
 
 
 
 
a8bb7b0
dbcf61e
 
9a2d9be
 
 
b09df44
9a2d9be
52d2c96
 
 
 
32bf464
 
a8bb7b0
b09df44
a8bb7b0
 
 
 
9a2d9be
dbcf61e
32bf464
 
dbcf61e
b09df44
dbcf61e
 
9a2d9be
 
dbcf61e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import gradio as gr
import spacy
from transformers import GPT2Tokenizer, GPT2LMHeadModel

nlp = spacy.load('es_core_news_sm')

# Load pre-trained model tokenizer (vocabulary)
tokenizer = GPT2Tokenizer.from_pretrained('datificate/gpt-2-small-spanish')

# Load pre-trained model (weights)
model = GPT2LMHeadModel.from_pretrained('datificate/gpt-2-small-spanish')

pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']

sentence = ""
tagged_words = []

def generate_sentence():
    global sentence, tagged_words
    # We will generate the text manually to control the special tokens
    input_ids = tokenizer.encode('', return_tensors='pt')
    output = model.generate(input_ids, max_length=50)
    sentence = tokenizer.decode(output[0], skip_special_tokens=True)
    tagged_words = analyze_sentence(sentence)
    return sentence, [word for word, _ in tagged_words]

def analyze_sentence(sentence):
    doc = nlp(sentence)
    tagged_words = [(token.text, token.pos_) for token in doc]
    return tagged_words

def check_answer(*args):
    correct_answer = [tag for word, tag in tagged_words]
    user_answer = list(args)
    if user_answer == correct_answer:
        return 'Correcto!'
    else:
        return 'Incorrecto. La respuesta correcta es: ' + str(correct_answer)

iface = gr.Interface(fn=generate_sentence, inputs='button', outputs=['textbox', 'dynamic'])
iface.add_interface(fn=check_answer, inputs=gr.inputs.Dynamic(type="dropdown", choices=pos_tags, label='Word Tags'), outputs='textbox')
iface.launch()