Spaces:

zai-org
/

GLM-4.5-Space

Running

App Files Files Community

zRzRzRzRzRzRzR commited on 15 days ago

Commit

9ec8fec

1 Parent(s): aa0c384

formt

Browse files

Files changed (1) hide show

app.py +53 -28

app.py CHANGED Viewed

@@ -35,30 +35,59 @@ def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
 class GLM45Model:
     def _strip_html(self, text: str) -> str:
         return re.sub(r"<[^>]+>", "", text).strip()
     def _wrap_text(self, text: str):
         return [{"type": "text", "text": text}]
-    def _stream_fragment(self, reasoning_content: str = "", content: str = "", skip_think: bool = False):
-        think_html = ""
-        if reasoning_content and not skip_think:
-            think_content = html.escape(reasoning_content).replace("\n", "<br>")
             think_html = (
                     "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
                     "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
-                    + think_content
-                    + "</div></details>"
             )
-        answer_html = ""
-        if content:
-            content_escaped = html.escape(content)
             content_formatted = content_escaped.replace("\n", "<br>")
-            answer_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_formatted}</div>"
-        return think_html + answer_html
     def _build_messages(self, raw_hist, sys_prompt):
         msgs = []
@@ -78,32 +107,28 @@ class GLM45Model:
         global stop_generation
         stop_generation = False
         msgs = self._build_messages(raw_hist, sys_prompt)
-        reasoning_buffer = ""
-        content_buffer = ""
         try:
             for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
                 if stop_generation:
                     break
-                if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
-                    reasoning_buffer += delta.reasoning_content
-                elif hasattr(delta, 'content') and delta.content:
-                    content_buffer += delta.content
-                else:
-                    if isinstance(delta, dict):
-                        if 'reasoning_content' in delta and delta['reasoning_content']:
-                            reasoning_buffer += delta['reasoning_content']
-                        if 'content' in delta and delta['content']:
-                            content_buffer += delta['content']
-                    elif hasattr(delta, 'content') and delta.content:
-                        content_buffer += delta.content
-                yield self._stream_fragment(reasoning_buffer, content_buffer, not thinking_enabled)
         except Exception as e:
             error_msg = f"Error during streaming: {str(e)}"
-            yield self._stream_fragment("", error_msg)
 glm45 = GLM45Model()

 class GLM45Model:
+    def __init__(self):
+        self.reset_state()
+    def reset_state(self):
+        self.accumulated_text = ""
     def _strip_html(self, text: str) -> str:
         return re.sub(r"<[^>]+>", "", text).strip()
     def _wrap_text(self, text: str):
         return [{"type": "text", "text": text}]
+    def _parse_thinking_content(self, text: str):
+        thinking_content = ""
+        regular_content = ""
+        if "<think>" in text:
+            think_pattern = r'<think>(.*?)</think>'
+            think_match = re.search(think_pattern, text, re.DOTALL)
+            if think_match:
+                thinking_content = think_match.group(1).strip()
+                regular_content = re.sub(think_pattern, '', text, flags=re.DOTALL).strip()
+            else:
+                think_start = text.find("<think>")
+                if think_start != -1:
+                    thinking_content = text[think_start + 7:]
+                    regular_content = text[:think_start].strip()
+        else:
+            regular_content = text
+        return thinking_content, regular_content
+    def _render_response(self, thinking_content: str, regular_content: str, skip_think: bool = False):
+        html_parts = []
+        if thinking_content and not skip_think:
+            thinking_escaped = html.escape(thinking_content).replace("\n", "<br>")
             think_html = (
                     "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
                     "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
+                    + thinking_escaped +
+                    "</div></details>"
             )
+            html_parts.append(think_html)
+        if regular_content:
+            content_escaped = html.escape(regular_content)
             content_formatted = content_escaped.replace("\n", "<br>")
+            content_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_formatted}</div>"
+            html_parts.append(content_html)
+        return "".join(html_parts)
     def _build_messages(self, raw_hist, sys_prompt):
         msgs = []
         global stop_generation
         stop_generation = False
         msgs = self._build_messages(raw_hist, sys_prompt)
+        self.reset_state()
         try:
             for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
                 if stop_generation:
                     break
+                delta_content = ""
+                if hasattr(delta, 'content') and delta.content:
+                    delta_content = delta.content
+                elif isinstance(delta, dict) and 'content' in delta and delta['content']:
+                    delta_content = delta['content']
+                if delta_content:
+                    self.accumulated_text += delta_content
+                    thinking_content, regular_content = self._parse_thinking_content(self.accumulated_text)
+                    yield self._render_response(thinking_content, regular_content, not thinking_enabled)
         except Exception as e:
             error_msg = f"Error during streaming: {str(e)}"
+            yield self._render_response("", error_msg)
 glm45 = GLM45Model()