File size: 1,603 Bytes
a8bb7b0 b09df44 52d2c96 a8bb7b0 52d2c96 a8bb7b0 dbcf61e 9a2d9be b09df44 9a2d9be 52d2c96 32bf464 a8bb7b0 b09df44 a8bb7b0 9a2d9be dbcf61e 32bf464 dbcf61e b09df44 dbcf61e 9a2d9be dbcf61e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 |
import gradio as gr
import spacy
from transformers import GPT2Tokenizer, GPT2LMHeadModel
nlp = spacy.load('es_core_news_sm')
# Load pre-trained model tokenizer (vocabulary)
tokenizer = GPT2Tokenizer.from_pretrained('datificate/gpt-2-small-spanish')
# Load pre-trained model (weights)
model = GPT2LMHeadModel.from_pretrained('datificate/gpt-2-small-spanish')
pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']
sentence = ""
tagged_words = []
def generate_sentence():
global sentence, tagged_words
# We will generate the text manually to control the special tokens
input_ids = tokenizer.encode('', return_tensors='pt')
output = model.generate(input_ids, max_length=50)
sentence = tokenizer.decode(output[0], skip_special_tokens=True)
tagged_words = analyze_sentence(sentence)
return sentence, [word for word, _ in tagged_words]
def analyze_sentence(sentence):
doc = nlp(sentence)
tagged_words = [(token.text, token.pos_) for token in doc]
return tagged_words
def check_answer(*args):
correct_answer = [tag for word, tag in tagged_words]
user_answer = list(args)
if user_answer == correct_answer:
return 'Correcto!'
else:
return 'Incorrecto. La respuesta correcta es: ' + str(correct_answer)
iface = gr.Interface(fn=generate_sentence, inputs='button', outputs=['textbox', 'dynamic'])
iface.add_interface(fn=check_answer, inputs=gr.inputs.Dynamic(type="dropdown", choices=pos_tags, label='Word Tags'), outputs='textbox')
iface.launch()
|