Merlintxu commited on
Commit
52d2c96
·
1 Parent(s): c95135b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -1,9 +1,14 @@
1
  import gradio as gr
2
  import spacy
3
- from transformers import pipeline
4
 
5
  nlp = spacy.load('es_core_news_sm')
6
- text_generator = pipeline('text-generation', model='datificate/gpt-2-small-spanish')
 
 
 
 
 
7
 
8
  pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']
9
 
@@ -12,8 +17,10 @@ tagged_words = []
12
 
13
  def generate_sentence():
14
  global sentence, tagged_words
15
- result = text_generator('', max_length=50)[0]
16
- sentence = result['generated_text']
 
 
17
  tagged_words = analyze_sentence(sentence)
18
  return sentence, [word for word, _ in tagged_words]
19
 
 
1
  import gradio as gr
2
  import spacy
3
+ from transformers import GPT2Tokenizer, GPT2LMHeadModel
4
 
5
  nlp = spacy.load('es_core_news_sm')
6
+
7
+ # Load pre-trained model tokenizer (vocabulary)
8
+ tokenizer = GPT2Tokenizer.from_pretrained('datificate/gpt-2-small-spanish')
9
+
10
+ # Load pre-trained model (weights)
11
+ model = GPT2LMHeadModel.from_pretrained('datificate/gpt-2-small-spanish')
12
 
13
  pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']
14
 
 
17
 
18
  def generate_sentence():
19
  global sentence, tagged_words
20
+ # We will generate the text manually to control the special tokens
21
+ input_ids = tokenizer.encode('', return_tensors='pt')
22
+ output = model.generate(input_ids, max_length=50)
23
+ sentence = tokenizer.decode(output[0], skip_special_tokens=True)
24
  tagged_words = analyze_sentence(sentence)
25
  return sentence, [word for word, _ in tagged_words]
26