Spaces:
Sleeping
Sleeping
File size: 6,903 Bytes
e1e1bf8 9c7ae16 e1e1bf8 cf8bc32 e1e1bf8 bbfa5db e1e1bf8 9c7ae16 e882568 d420d33 9c7ae16 d420d33 9c7ae16 e882568 9c7ae16 e1e1bf8 f93a3bd f876863 1074d25 f93a3bd e1e1bf8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
import gradio as gr
from llm_rs import AutoModel,SessionConfig,GenerationConfig,Precision,KnownModels
from huggingface_hub import space_info
repo_name = "svjack/ggml"
file_name = "wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
examples = [
"How to promote Chinese traditional culture ?",
"Explain the meaning of word Ottoman",
"Explain the meaning of π¨",
"Use following emojis to generate a short description of a scene , the emojis are π¨π©π₯βοΈ",
"Use following emojis to generate a short description of a scene , the emojis are π²π₯π¨π¦",
]
session_config = SessionConfig(threads=2,batch_size=2)
model = AutoModel.from_pretrained(repo_name, model_file=file_name, session_config=session_config,verbose=True,model_type=KnownModels.Llama)
'''
model_path = "/Users/svjack/Library/Application Support/nomic.ai/GPT4All/wizardlm-13b-v1.1-superhot-8k.ggmlv3.q4_0.bin"
model = AutoModel.from_pretrained(model_path,
model_type=KnownModels.Llama)
'''
def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
prompt=f"""Below is an instruction that describes a task. Write a response that appropriately completes the request.
### Instruction:
{instruction}
### Response:
Answer:"""
generation_config = GenerationConfig(seed=seed,temperature=temperature,top_p=top_p,top_k=top_k,max_new_tokens=max_new_tokens)
response = ""
streamer = model.stream(prompt=prompt,generation_config=generation_config)
for new_text in streamer:
response += new_text
yield response
with gr.Blocks(
theme=gr.themes.Soft(),
css=".disclaimer {font-variant-caps: all-small-caps;}",
) as demo:
gr.Markdown(
"""<h1><center> Wizardlm-13b on CPU in Rust π¦</center></h1>
This demo uses the [rustformers/llm](https://github.com/rustformers/llm) library via [llm-rs](https://github.com/LLukas22/llm-rs-python) on 2 CPU cores.
"""
)
'''
markdown_exp_size = "##"
lora_repo = "svjack/chatglm3-few-shot"
lora_repo_link = "svjack/chatglm3-few-shot/?input_list_index=1"
emoji_info = space_info(lora_repo).__dict__["cardData"]["emoji"]
space_cnt = 1
task_name = "[---Emojis to Image Prompt---]"
gr.Markdown(
value=f"{markdown_exp_size} {task_name} few shot prompt in ChatGLM3 Few Shot space repo (click submit to activate) : [{lora_repo_link}](https://huggingface.co/spaces/{lora_repo_link}) {emoji_info}",
visible=True,
elem_id="selected_space",
)
'''
with gr.Row():
with gr.Column():
with gr.Row():
instruction = gr.Textbox(
placeholder="Enter your question or instruction here",
label="Question/Instruction",
elem_id="q-input",
)
with gr.Accordion("Advanced Options:", open=False):
with gr.Row():
with gr.Column():
with gr.Row():
temperature = gr.Slider(
label="Temperature",
value=0.8,
minimum=0.1,
maximum=1.0,
step=0.1,
interactive=True,
info="Higher values produce more diverse outputs",
)
with gr.Column():
with gr.Row():
top_p = gr.Slider(
label="Top-p (nucleus sampling)",
value=0.95,
minimum=0.0,
maximum=1.0,
step=0.01,
interactive=True,
info=(
"Sample from the smallest possible set of tokens whose cumulative probability "
"exceeds top_p. Set to 1 to disable and sample from all tokens."
),
)
with gr.Column():
with gr.Row():
top_k = gr.Slider(
label="Top-k",
value=40,
minimum=5,
maximum=80,
step=1,
interactive=True,
info="Sample from a shortlist of top-k tokens β 0 to disable and sample from all tokens.",
)
with gr.Column():
with gr.Row():
max_new_tokens = gr.Slider(
label="Maximum new tokens",
value=256,
minimum=0,
maximum=1024,
step=5,
interactive=True,
info="The maximum number of new tokens to generate",
)
with gr.Column():
with gr.Row():
seed = gr.Number(
label="Seed",
value=42,
interactive=True,
info="The seed to use for the generation",
precision=0
)
with gr.Row():
submit = gr.Button("Submit")
with gr.Row():
with gr.Box():
gr.Markdown("**Wizardlm-13b**")
output_7b = gr.Markdown()
with gr.Row():
gr.Examples(
examples=examples,
inputs=[instruction],
cache_examples=False,
fn=process_stream,
outputs=output_7b,
)
submit.click(
process_stream,
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
outputs=output_7b,
)
instruction.submit(
process_stream,
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
outputs=output_7b,
)
with demo:
gr.HTML(
'''
<div style="justify-content: center; display: flex;">
<iframe
src="https://svjack-chatglm3-few-shot-demo.hf.space/?input_list_index=1"
frameborder="0"
width="1400"
height="768"
></iframe>
</div>
'''
)
demo.queue(max_size=4, concurrency_count=1).launch(debug=True)
|