Spaces:
Sleeping
Sleeping
File size: 5,685 Bytes
4d15bb3 4cf98c1 4d15bb3 4cf98c1 4d15bb3 e028464 4d15bb3 206f410 e028464 4d15bb3 e028464 4d15bb3 dfecfdd 4d15bb3 8fff3f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
import chatglm_cpp
import gradio as gr
from pathlib import Path
model_file_path = "chatglm3-ggml_q4_0.bin"
chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path))
examples = [
"哈利波特和赫敏是什么关系?",
"如何学好历史?",
"明朝内阁制度的特点是什么?",
"如何进行经济建设?",
"How to promote Chinese traditional culture ?",
"你听说过马克思吗?",
]
def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
if "[SEP]" not in instruction:
streamer = chatglm_llm.generate(prompt=instruction,
temperature=temperature,
top_p=top_p,top_k=top_k,max_length=max_new_tokens,
stream = True
)
else:
history = instruction.split("[SEP]")
streamer = chatglm_llm.chat(
history=history,
temperature=temperature,
top_p=top_p,top_k=top_k,max_length=max_new_tokens,
do_sample=False,
stream = True
)
response = ""
for new_text in streamer:
response += new_text
yield response
with gr.Blocks(
theme=gr.themes.Soft(),
css='''
.header img {
float: middle;
width: 33px;
height: 33px;
}
.header h1 {
top: 18px;
left: 10px;
}
.disclaimer {font-variant-caps: all-small-caps;}
''',
) as demo:
gr.HTML(
"""
<div class="header">
<h1> <center> <img src="https://huggingface.co/spaces/svjack/chatglm3-6b-ggml/resolve/main/hanuman.png">
ChatGLM3 on CPU in CPP </center></h1>
</div>
This demo uses the [chatglm.cpp](https://github.com/li-plus/chatglm.cpp) library on 2 CPU cores.
"""
)
with gr.Row():
with gr.Column():
with gr.Row():
instruction = gr.Textbox(
placeholder="Enter your question or instruction here",
label="Question/Instruction",
elem_id="q-input",
)
with gr.Accordion("Advanced Options:", open=False):
with gr.Row():
with gr.Column():
with gr.Row():
temperature = gr.Slider(
label="Temperature",
value=0.5,
minimum=0.1,
maximum=1.0,
step=0.1,
interactive=True,
info="Higher values produce more diverse outputs",
)
with gr.Column():
with gr.Row():
top_p = gr.Slider(
label="Top-p (nucleus sampling)",
value=0.95,
minimum=0.0,
maximum=1.0,
step=0.01,
interactive=True,
info=(
"Sample from the smallest possible set of tokens whose cumulative probability "
"exceeds top_p. Set to 1 to disable and sample from all tokens."
),
)
with gr.Column():
with gr.Row():
top_k = gr.Slider(
label="Top-k",
value=40,
minimum=5,
maximum=80,
step=1,
interactive=True,
info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
)
with gr.Column():
with gr.Row():
max_new_tokens = gr.Slider(
label="Maximum new tokens",
value=256,
minimum=0,
maximum=1024,
step=5,
interactive=True,
info="The maximum number of new tokens to generate",
)
with gr.Column():
with gr.Row():
seed = gr.Number(
label="Seed",
value=42,
interactive=True,
info="The seed to use for the generation",
precision=0
)
with gr.Row():
submit = gr.Button("Submit")
with gr.Row():
with gr.Tab():
gr.Markdown("**ChatGLM3-6b**")
output_7b = gr.Markdown()
with gr.Row():
gr.Examples(
examples=examples,
inputs=[instruction],
cache_examples=False,
fn=process_stream,
outputs=output_7b,
)
submit.click(
process_stream,
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
outputs=output_7b,
)
instruction.submit(
process_stream,
inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
outputs=output_7b,
)
demo.launch("0.0.0.0" ,debug=True) |