Spaces:

zai-org
/

GLM-4.5-Space

Running

App Files Files Community

zRzRzRzRzRzRzR commited on 18 days ago

Commit

67199da

1 Parent(s): 2866134

update

Browse files

Files changed (2) hide show

app.py +212 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,212 @@

+import copy
+import re
+import time
+import html
+from openai import OpenAI
+import gradio as gr
+stop_generation = False
+def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
+    global stop_generation
+    client = OpenAI()
+    response = client.chat.completions.create(
+        model="glm-4.5",
+        messages=messages,
+        temperature=temperature,
+        stream=True,
+        max_tokens=32000,
+        extra_body={
+            "thinking":
+                {
+                    "type": "enabled" if thinking_enabled else "disabled",
+                }
+        }
+    )
+    print(response)
+    for chunk in response:
+        if stop_generation:
+            break
+        if chunk.choices and chunk.choices[0].delta:
+            delta = chunk.choices[0].delta
+            yield delta
+class GLM45Model:
+    def _strip_html(self, text: str) -> str:
+        return re.sub(r"<[^>]+>", "", text).strip()
+    def _wrap_text(self, text: str):
+        return [{"type": "text", "text": text}]
+    def _stream_fragment(self, reasoning_content: str = "", content: str = "", skip_think: bool = False):
+        think_html = ""
+        if reasoning_content and not skip_think:
+            think_content = html.escape(reasoning_content).replace("\n", "<br>")
+            think_html = (
+                    "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
+                    "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
+                    + think_content
+                    + "</div></details>"
+            )
+        answer_html = ""
+        if content:
+            content_escaped = html.escape(content)
+            content_formatted = content_escaped.replace("\n", "<br>")
+            answer_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_formatted}</div>"
+        return think_html + answer_html
+    def _build_messages(self, raw_hist, sys_prompt):
+        msgs = []
+        if sys_prompt.strip():
+            msgs.append({"role": "system", "content": [{"type": "text", "text": sys_prompt.strip()}]})
+        for h in raw_hist:
+            if h["role"] == "user":
+                msgs.append({"role": "user", "content": self._wrap_text(h["content"])})
+            else:
+                raw = re.sub(r"<details.*?</details>", "", h["content"], flags=re.DOTALL)
+                clean_content = self._strip_html(raw).strip()
+                if clean_content:
+                    msgs.append({"role": "assistant", "content": self._wrap_text(clean_content)})
+        return msgs
+    def stream_generate(self, raw_hist, sys_prompt: str, thinking_enabled: bool = True, temperature: float = 1.0):
+        global stop_generation
+        stop_generation = False
+        msgs = self._build_messages(raw_hist, sys_prompt)
+        reasoning_buffer = ""
+        content_buffer = ""
+        try:
+            for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
+                if stop_generation:
+                    break
+                if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
+                    reasoning_buffer += delta.reasoning_content
+                elif hasattr(delta, 'content') and delta.content:
+                    content_buffer += delta.content
+                else:
+                    if isinstance(delta, dict):
+                        if 'reasoning_content' in delta and delta['reasoning_content']:
+                            reasoning_buffer += delta['reasoning_content']
+                        if 'content' in delta and delta['content']:
+                            content_buffer += delta['content']
+                    elif hasattr(delta, 'content') and delta.content:
+                        content_buffer += delta.content
+                yield self._stream_fragment(reasoning_buffer, content_buffer, not thinking_enabled)
+        except Exception as e:
+            error_msg = f"Error during streaming: {str(e)}"
+            yield self._stream_fragment("", error_msg)
+glm45 = GLM45Model()
+def chat(msg, raw_hist, sys_prompt, thinking_enabled, temperature):
+    global stop_generation
+    stop_generation = False
+    if not msg.strip():
+        return raw_hist, copy.deepcopy(raw_hist), ""
+    user_rec = {"role": "user", "content": msg.strip()}
+    if raw_hist is None:
+        raw_hist = []
+    raw_hist.append(user_rec)
+    place = {"role": "assistant", "content": ""}
+    raw_hist.append(place)
+    yield raw_hist, copy.deepcopy(raw_hist), ""
+    try:
+        for chunk in glm45.stream_generate(raw_hist[:-1], sys_prompt, thinking_enabled, temperature):
+            if stop_generation:
+                break
+            place["content"] = chunk
+            yield raw_hist, copy.deepcopy(raw_hist), ""
+    except Exception as e:
+        error_content = f"<div style='color: red;'>Error: {html.escape(str(e))}</div>"
+        place["content"] = error_content
+        yield raw_hist, copy.deepcopy(raw_hist), ""
+    yield raw_hist, copy.deepcopy(raw_hist), ""
+def reset():
+    global stop_generation
+    stop_generation = True
+    time.sleep(0.1)
+    return [], [], ""
+demo = gr.Blocks(title="GLM-4.5 API Space", theme=gr.themes.Soft())
+with demo:
+    gr.Markdown(
+        "<div style='text-align:center;font-size:32px;font-weight:bold;margin-bottom:10px;'>GLM-4.5 API Space</div>"
+        "<div style='text-align:center;color:red;font-size:16px;margin-bottom:20px;'>"
+        "This space uses the API version of the service for faster response.<br>"
+        "Chat only. For tool use, MCP support, and web search, please refer to the API.</div>"
+        "<div style='text-align:center;'><a href='https://huggingface.co/THUDM/GLM-4.5'>Model Hub</a> | "
+        "<a href='https://github.com/THUDM/GLM-4.5'>Github</a> | "
+        "<a href='https://www.bigmodel.cn'>API</a></div>"
+    )
+    raw_history = gr.State([])
+    with gr.Row():
+        with gr.Column(scale=2):
+            chatbox = gr.Chatbot(
+                label="Chat",
+                type="messages",
+                height=600,
+                elem_classes="chatbot-container",
+                sanitize_html=False,
+                line_breaks=True
+            )
+            textbox = gr.Textbox(label="Message", lines=3)
+            with gr.Row():
+                send = gr.Button("Send", variant="primary")
+                clear = gr.Button("Clear")
+        with gr.Column(scale=1):
+            thinking_toggle = gr.Checkbox(label="Enable Thinking", value=True)
+            gr.Markdown(
+                "<div style='color:red;font-size:12px;margin-top:5px;margin-bottom:15px;'>"
+                "ON: Enable model thinking.<br>"
+                "OFF: Not enable model thinking, the model will directly answer the question without reasoning."
+                "</div>"
+            )
+            temperature_slider = gr.Slider(
+                minimum=0.0,
+                maximum=1.0,
+                value=1.0,
+                step=0.01,
+                label="Temperature"
+            )
+            sys = gr.Textbox(label="System Prompt", lines=8)
+    send.click(
+        chat,
+        inputs=[textbox, raw_history, sys, thinking_toggle, temperature_slider],
+        outputs=[chatbox, raw_history, textbox]
+    )
+    textbox.submit(
+        chat,
+        inputs=[textbox, raw_history, sys, thinking_toggle, temperature_slider],
+        outputs=[chatbox, raw_history, textbox]
+    )
+    clear.click(
+        reset,
+        outputs=[chatbox, raw_history, textbox]
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+gradio==5.38.2
+spaces>=0.37.1
+torch==2.5.1
+openai>=1.97.1