Spaces:

svjack
/

chatglm3-6b-ggml

Sleeping

App Files Files Community

svjack commited on Nov 6, 2023

Commit

4d15bb3

1 Parent(s): 40c0254

Create app.py

Browse files

Files changed (1) hide show

app.py +142 -0

app.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import chatglm_cpp
+import gradio as gr
+from pathlib import Path
+model_file_path = "chatglm3-ggml_q4_0.bin"
+chatglm_llm = chatglm_cpp.Pipeline(Path(model_file_path))
+examples = [
+    "如何弘扬中华传统文化？",
+    "How to promote Chinese traditional culture ?",
+    "如何学好历史？",
+    "写一段孔子与马克思的对话录。",
+    "如何进行经济建设？",
+]
+def process_stream(instruction, temperature, top_p, top_k, max_new_tokens, seed):
+    if "[SEP]" not in instruction:
+        streamer = chatglm_llm.generate(prompt=instruction,
+        temperature=temperature,
+        top_p=top_p,top_k=top_k,max_length=max_new_tokens,
+        stream = True
+    )
+    else:
+        history = instruction.split("[SEP]")
+        streamer = chatglm_llm.chat(
+            history=history,
+            temperature=temperature,
+            top_p=top_p,top_k=top_k,max_length=max_new_tokens,
+            do_sample=False,
+            stream = True
+        )
+    response = ""
+    for new_text in streamer:
+        response += new_text
+        yield response
+with gr.Blocks(
+    theme=gr.themes.Soft(),
+    css=".disclaimer {font-variant-caps: all-small-caps;}",
+) as demo:
+    gr.Markdown(
+        """<h1> <center> <img src="https://huggingface.co/spaces/svjack/chatglm3-6b-ggml-v0/resolve/main/hanuman.png" alt="SD">
+        ChatGLM3 on CPU in CPP </center></h1>
+        This demo uses the [chatglm.cpp](https://github.com/li-plus/chatglm.cpp) library on 2 CPU cores.
+        """
+    )
+    with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                instruction = gr.Textbox(
+                    placeholder="Enter your question or instruction here",
+                    label="Question/Instruction",
+                    elem_id="q-input",
+                )
+            with gr.Accordion("Advanced Options:", open=False):
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            temperature = gr.Slider(
+                                label="Temperature",
+                                value=0.5,
+                                minimum=0.1,
+                                maximum=1.0,
+                                step=0.1,
+                                interactive=True,
+                                info="Higher values produce more diverse outputs",
+                            )
+                    with gr.Column():
+                        with gr.Row():
+                            top_p = gr.Slider(
+                                label="Top-p (nucleus sampling)",
+                                value=0.95,
+                                minimum=0.0,
+                                maximum=1.0,
+                                step=0.01,
+                                interactive=True,
+                                info=(
+                                    "Sample from the smallest possible set of tokens whose cumulative probability "
+                                    "exceeds top_p. Set to 1 to disable and sample from all tokens."
+                                ),
+                            )
+                    with gr.Column():
+                        with gr.Row():
+                            top_k = gr.Slider(
+                                label="Top-k",
+                                value=40,
+                                minimum=5,
+                                maximum=80,
+                                step=1,
+                                interactive=True,
+                                info="Sample from a shortlist of top-k tokens — 0 to disable and sample from all tokens.",
+                            )
+                    with gr.Column():
+                        with gr.Row():
+                            max_new_tokens = gr.Slider(
+                                label="Maximum new tokens",
+                                value=256,
+                                minimum=0,
+                                maximum=1024,
+                                step=5,
+                                interactive=True,
+                                info="The maximum number of new tokens to generate",
+                            )
+                    with gr.Column():
+                        with gr.Row():
+                            seed = gr.Number(
+                                label="Seed",
+                                value=42,
+                                interactive=True,
+                                info="The seed to use for the generation",
+                                precision=0
+                            )
+    with gr.Row():
+        submit = gr.Button("Submit")
+    with gr.Row():
+        with gr.Box():
+            gr.Markdown("**ChatGLM3-6b**")
+            output_7b = gr.Markdown()
+    with gr.Row():
+        gr.Examples(
+            examples=examples,
+            inputs=[instruction],
+            cache_examples=False,
+            fn=process_stream,
+            outputs=output_7b,
+        )
+    submit.click(
+        process_stream,
+        inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
+        outputs=output_7b,
+    )
+    instruction.submit(
+        process_stream,
+        inputs=[instruction, temperature, top_p, top_k, max_new_tokens,seed],
+        outputs=output_7b,
+    )
+demo.queue(max_size=4, concurrency_count=1).launch(debug=True)