Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision,KnownModels | |
from huggingface_hub import space_info | |
repo_name = "svjack/ggml" | |
file_name = "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin" | |
examples = [ | |
"How to promote Chinese traditional culture ?", | |
"Explain the meaning of word Ottoman", | |
"Explain the meaning of π¨", | |
"Use following emojis to generate a short description of a scene , the emojis are π¨π©π₯βοΈ", | |
"Use following emojis to generate a short description of a scene , the emojis are π²π₯π¨π¦", | |
] | |
session_config = SessionConfig(threads=2,batch_size=2) | |
model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True,model_type=KnownModels.Llama) | |
''' | |
model_path = "/Users/svjack/Library/Application Support/nomic.ai/GPT4All/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin" | |
model = AutoModel.from_pretrained(model_path, | |
model_type=KnownModels.Llama) | |
''' | |
def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed): | |
prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. | |
### Instruction: | |
{instruction} | |
### Response: | |
Answer:""" | |
generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens) | |
response = "" | |
streamer = model.stream(prompt=prompt,generation_config=generation_config) | |
for new_text in streamer: | |
response += new_text | |
yield response | |
with gr.Blocks( | |
theme=gr.themes.Soft(), | |
css=".disclaimer {font-variant-caps: all-small-caps;}", | |
) as demo: | |
gr.Markdown( | |
"""<h1><center> Wizardlm-13b on CPU in Rust π¦</center></h1> | |
This demo uses the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) on 2 CPU cores. | |
""" | |
) | |
''' | |
markdown_exp_size = "##" | |
lora_repo = "svjack/chatglm3-few-shot" | |
lora_repo_link = "svjack/chatglm3-few-shot/?input_list_index=1" | |
emoji_info = space_info(lora_repo).__dict__["cardData"]["emoji"] | |
space_cnt = 1 | |
task_name = "[---Emojis to Image Prompt---]" | |
gr.Markdown( | |
value=f"{markdown_exp_size} {task_name} few shot prompt in ChatGLM3 Few Shot space repo (click submit to activate) : [{lora_repo_link}](https://huggingface.co/spaces/{lora_repo_link}) {emoji_info}", | |
visible=True, | |
elem_id="selected_space", | |
) | |
''' | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
instruction = gr.Textbox( | |
placeholder="Enter your question or instruction here", | |
label="Question/Instruction", | |
elem_id="q-input", | |
) | |
with gr.Accordion("Advanced Options:", open=False): | |
with gr.Row(): | |
with gr.Column(): | |
with gr.Row(): | |
temperature = gr.Slider( | |
label="Temperature", | |
value=0.8, | |
minimum=0.1, | |
maximum=1.0, | |
step=0.1, | |
interactive=True, | |
info="Higher values produce more diverse outputs", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
top_p = gr.Slider( | |
label="Top-p (nucleus sampling)", | |
value=0.95, | |
minimum=0.0, | |
maximum=1.0, | |
step=0.01, | |
interactive=True, | |
info=( | |
"Sample from the smallest possible set of tokens whose cumulative probability " | |
"exceeds top_p. Set to 1 to disable and sample from all tokens." | |
), | |
) | |
with gr.Column(): | |
with gr.Row(): | |
top_k = gr.Slider( | |
label="Top-k", | |
value=40, | |
minimum=5, | |
maximum=80, | |
step=1, | |
interactive=True, | |
info="Sample from a shortlist of top-k tokens β 0 to disable and sample from all tokens.", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
max_new_tokens = gr.Slider( | |
label="Maximum new tokens", | |
value=256, | |
minimum=0, | |
maximum=1024, | |
step=5, | |
interactive=True, | |
info="The maximum number of new tokens to generate", | |
) | |
with gr.Column(): | |
with gr.Row(): | |
seed = gr.Number( | |
label="Seed", | |
value=42, | |
interactive=True, | |
info="The seed to use for the generation", | |
precision=0 | |
) | |
with gr.Row(): | |
submit = gr.Button("Submit") | |
with gr.Row(): | |
with gr.Box(): | |
gr.Markdown("**Wizardlm-13b**") | |
output_7b = gr.Markdown() | |
with gr.Row(): | |
gr.Examples( | |
examples=examples, | |
inputs=[instruction], | |
cache_examples=False, | |
fn=process_stream, | |
outputs=output_7b, | |
) | |
submit.click( | |
process_stream, | |
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed], | |
outputs=output_7b, | |
) | |
instruction.submit( | |
process_stream, | |
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed], | |
outputs=output_7b, | |
) | |
with demo: | |
gr.HTML( | |
''' | |
<div style="justify-content: center; display: flex;"> | |
<iframe | |
src="https://svjack-chatglm3-few-shot-demo.hf.space/?input_list_index=1" | |
frameborder="0" | |
width="1400" | |
height="768" | |
></iframe> | |
</div> | |
''' | |
) | |
demo.queue(max_size=4, concurrency_count=1).launch(debug=True) | |