Spaces:
Runtime error
Runtime error
import gradio as gr | |
import gradio as gr | |
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig | |
from transformers import GPT2TokenizerFast,GPT2LMHeadModel | |
tokenizer = GPT2TokenizerFast.from_pretrained("AlexWortega/instruct_rugptlarge") | |
special_tokens_dict = {'additional_special_tokens': ['<code>', '</code>', '<instructionS>', '<instructionE>', '<next>']} | |
tokenizer.add_special_tokens(special_tokens_dict) | |
device = 'cpu' # мэх дорога | |
model = GPT2LMHeadModel.from_pretrained("AlexWortega/instruct_rugptlarge") | |
# | |
model.resize_token_embeddings(len(tokenizer)) | |
def generate_prompt(instruction, input=None): | |
if input: | |
return f"{input}:" | |
return f"{instruction}" | |
def generate_seqs(q, temp, topp, topk, nb, maxtok): | |
k=1 | |
gen_kwargs = { | |
"min_length": 20, | |
"max_new_tokens": maxtok, | |
"top_k": topk, | |
"top_p": topp, | |
"do_sample": True, | |
"early_stopping": True, | |
"no_repeat_ngram_size": 2, | |
"temperature":temp, | |
"eos_token_id": tokenizer.eos_token_id, | |
"pad_token_id": tokenizer.eos_token_id, | |
"use_cache": True, | |
"repetition_penalty": 1.5, | |
"length_penalty": 0.8, | |
"num_beams": nb, | |
"num_return_sequences": k | |
} | |
if len(q)>0: | |
q = q + '<instructionS>' | |
else: | |
q = 'Как зарабатывать денег на нейросетях ?' + '<instructionS>' | |
t = tokenizer.encode(q, return_tensors='pt').to(device) | |
g = model.generate(t, **gen_kwargs) | |
generated_sequences = tokenizer.batch_decode(g, skip_special_tokens=False) | |
#print(generated_sequences) | |
# Add </s></s>A: after the question and before each generated sequence | |
#sequences = [f"H:{q}</s></s>A:{s.replace(q, '')}" for s in generated_sequences] | |
# Compute the reward score for each generated sequence | |
#cores = [reward_model.reward_score(q, s.split('</s></s>A:')[-1]) for s in sequences] | |
# Return the k sequences with the highest score and their corresponding scores | |
# results = [(s, score) for score, s in sorted(zip(scores, sequences), reverse=True)[:k]] | |
ans = generated_sequences[0].replace('<instructionS>','\n').replace('<instructionE>','').replace('<|endoftext|>','') | |
return ans | |
description_html = ''' | |
<p>Обучена на 2v100, коллективом авторов:</p> | |
<ul> | |
<li><a href="https://t.me/YallenGusev" target="_blank">@YallenGusev</a></li> | |
<li><a href="https://t.me/lovedeathtransformers" target="_blank">@lovedeathtransformers</a></li> | |
<li><a href="https://t.me/alexkuk" target="_blank">@alexkuk</a></li> | |
<li><a href="https://t.me/chckdskeasfsd" target="_blank">@chckdskeasfsd</a></li> | |
<li><a href="https://t.me/dno5iq" target="_blank">@dno5iq</a></li> | |
</ul> | |
''' | |
g = gr.Interface( | |
fn=generate_seqs, | |
inputs=[ | |
gr.components.Textbox( | |
lines=2, label="Впишите сюда задачу, а я попробую решить", placeholder="Как зарабатывать денег на нейросетях?" | |
), | |
#gr.components.Textbox(lines=2, label="Вход", placeholder="Нет"), | |
gr.components.Slider(minimum=0.1, maximum=2, value=1.0, label="Temperature"), | |
gr.components.Slider(minimum=0, maximum=1, value=0.9, label="Top p"), | |
gr.components.Slider(minimum=0, maximum=100, value=50, label="Top k"), | |
gr.components.Slider(minimum=0, maximum=5, step=1, value=4, label="Beams"), | |
gr.components.Slider( | |
minimum=1, maximum=256, step=1, value=100, label="Max tokens" | |
), | |
], | |
outputs=[ | |
gr.inputs.Textbox( | |
lines=5, | |
label="Output", | |
) | |
], | |
title="ruInstructlarge", | |
description=description_html) | |
g.queue(concurrency_count=5) | |
g.launch() |