Spaces:

Merlintxu
/

GramAPP

Runtime error

Merlintxu commited on May 16, 2023

Commit

52d2c96

1 Parent(s): c95135b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,9 +1,14 @@
 import gradio as gr
 import spacy
-from transformers import pipeline
 nlp = spacy.load('es_core_news_sm')
-text_generator = pipeline('text-generation', model='datificate/gpt-2-small-spanish')
 pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']
@@ -12,8 +17,10 @@ tagged_words = []
 def generate_sentence():
     global sentence, tagged_words
-    result = text_generator('', max_length=50)[0]
-    sentence = result['generated_text']
     tagged_words = analyze_sentence(sentence)
     return sentence, [word for word, _ in tagged_words]

 import gradio as gr
 import spacy
+from transformers import GPT2Tokenizer, GPT2LMHeadModel
 nlp = spacy.load('es_core_news_sm')
+# Load pre-trained model tokenizer (vocabulary)
+tokenizer = GPT2Tokenizer.from_pretrained('datificate/gpt-2-small-spanish')
+# Load pre-trained model (weights)
+model = GPT2LMHeadModel.from_pretrained('datificate/gpt-2-small-spanish')
 pos_tags = ['ADJ', 'ADP', 'ADV', 'AUX', 'CONJ', 'DET', 'INTJ', 'NOUN', 'NUM', 'PART', 'PRON', 'PROPN', 'PUNCT', 'SCONJ', 'SYM', 'VERB', 'X']
 def generate_sentence():
     global sentence, tagged_words
+    # We will generate the text manually to control the special tokens
+    input_ids = tokenizer.encode('', return_tensors='pt')
+    output = model.generate(input_ids, max_length=50)
+    sentence = tokenizer.decode(output[0], skip_special_tokens=True)
     tagged_words = analyze_sentence(sentence)
     return sentence, [word for word, _ in tagged_words]