File size: 5,685 Bytes
4d15bb3
 
 
 
 
 
 
 
4cf98c1
 
 
 
 
 
4d15bb3
 
4cf98c1
4d15bb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e028464
 
 
 
 
 
 
 
 
 
 
 
4d15bb3
206f410
e028464
 
 
4d15bb3
e028464
4d15bb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dfecfdd
4d15bb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8fff3f5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import chatglm_cpp
import gradio as gr
from pathlib import Path

model_file_path = "chatglm3-ggml_q4_0.bin"
chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path))

examples = [
            "哈利波特和赫敏是什么关系?",
            "如何学好历史?",
            "明朝内阁制度的特点是什么?",
            "如何进行经济建设?", 
            "How to promote Chinese traditional culture ?",
            "你听说过马克思吗?",
]


def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
    if "[SEP]" not in instruction:
        streamer = chatglm_llm.generate(prompt=instruction,
        temperature=temperature,
        top_p=top_p,top_k=top_k,max_length=max_new_tokens,
        stream = True
    )
    else:
        history = instruction.split("[SEP]")
        streamer = chatglm_llm.chat(
            history=history,
            temperature=temperature,
            top_p=top_p,top_k=top_k,max_length=max_new_tokens,
            do_sample=False,
            stream = True
        )
    response = ""
    for new_text in streamer:
        response += new_text
        yield response


with gr.Blocks(
    theme=gr.themes.Soft(),
    css='''
    .header img {
          float: middle;
          width: 33px;
          height: 33px;
        }
    .header h1 {
          top: 18px;
          left: 10px;
        }
    .disclaimer {font-variant-caps: all-small-caps;}
    ''',
) as demo:
    gr.HTML(
        """
        <div class="header">
        <h1> <center> <img src="https://huggingface.co/spaces/svjack/chatglm3-6b-ggml/resolve/main/hanuman.png"> 
        ChatGLM3 on CPU in CPP </center></h1>
        </div>
        This demo uses the [chatglm.cpp](https://github.com/li-plus/chatglm.cpp) library on 2 CPU cores.
        """
    )
    with gr.Row():
        with gr.Column():
            with gr.Row():
                instruction = gr.Textbox(
                    placeholder="Enter your question or instruction here",
                    label="Question/Instruction",
                    elem_id="q-input",
                )
            with gr.Accordion("Advanced Options:", open=False):
                with gr.Row():
                    with gr.Column():
                        with gr.Row():
                            temperature = gr.Slider(
                                label="Temperature",
                                value=0.5,
                                minimum=0.1,
                                maximum=1.0,
                                step=0.1,
                                interactive=True,
                                info="Higher values produce more diverse outputs",
                            )
                    with gr.Column():
                        with gr.Row():
                            top_p = gr.Slider(
                                label="Top-p (nucleus sampling)",
                                value=0.95,
                                minimum=0.0,
                                maximum=1.0,
                                step=0.01,
                                interactive=True,
                                info=(
                                    "Sample from the smallest possible set of tokens whose cumulative probability "
                                    "exceeds top_p. Set to 1 to disable and sample from all tokens."
                                ),
                            )
                    with gr.Column():
                        with gr.Row():
                            top_k = gr.Slider(
                                label="Top-k",
                                value=40,
                                minimum=5,
                                maximum=80,
                                step=1,
                                interactive=True,
                                info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
                            )
                    with gr.Column():
                        with gr.Row():
                            max_new_tokens = gr.Slider(
                                label="Maximum new tokens",
                                value=256,
                                minimum=0,
                                maximum=1024,
                                step=5,
                                interactive=True,
                                info="The maximum number of new tokens to generate",
                            )

                    with gr.Column():
                        with gr.Row():
                            seed = gr.Number(
                                label="Seed",
                                value=42,
                                interactive=True,
                                info="The seed to use for the generation",
                                precision=0
                            )
    with gr.Row():
        submit = gr.Button("Submit")
    with gr.Row():
        with gr.Tab():
            gr.Markdown("**ChatGLM3-6b**")
            output_7b = gr.Markdown()

    with gr.Row():
        gr.Examples(
            examples=examples,
            inputs=[instruction],
            cache_examples=False,
            fn=process_stream,
            outputs=output_7b,
        )

    submit.click(
        process_stream,
        inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
        outputs=output_7b,
    )
    instruction.submit(
        process_stream,
        inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
        outputs=output_7b,
    )

demo.launch("0.0.0.0" ,debug=True)