Spaces:
Running
Running
zRzRzRzRzRzRzR
commited on
Commit
·
9ec8fec
1
Parent(s):
aa0c384
formt
Browse files
app.py
CHANGED
@@ -35,30 +35,59 @@ def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
|
|
35 |
|
36 |
|
37 |
class GLM45Model:
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
def _strip_html(self, text: str) -> str:
|
39 |
return re.sub(r"<[^>]+>", "", text).strip()
|
40 |
|
41 |
def _wrap_text(self, text: str):
|
42 |
return [{"type": "text", "text": text}]
|
43 |
|
44 |
-
def
|
45 |
-
|
46 |
-
|
47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
think_html = (
|
49 |
"<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
|
50 |
"<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
|
51 |
-
+
|
52 |
-
|
53 |
)
|
|
|
54 |
|
55 |
-
|
56 |
-
|
57 |
-
content_escaped = html.escape(content)
|
58 |
content_formatted = content_escaped.replace("\n", "<br>")
|
59 |
-
|
|
|
60 |
|
61 |
-
return
|
62 |
|
63 |
def _build_messages(self, raw_hist, sys_prompt):
|
64 |
msgs = []
|
@@ -78,32 +107,28 @@ class GLM45Model:
|
|
78 |
global stop_generation
|
79 |
stop_generation = False
|
80 |
msgs = self._build_messages(raw_hist, sys_prompt)
|
81 |
-
|
82 |
-
|
83 |
|
84 |
try:
|
85 |
for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
|
86 |
if stop_generation:
|
87 |
break
|
88 |
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
elif hasattr(delta, 'content') and delta.content:
|
100 |
-
content_buffer += delta.content
|
101 |
-
|
102 |
-
yield self._stream_fragment(reasoning_buffer, content_buffer, not thinking_enabled)
|
103 |
|
104 |
except Exception as e:
|
105 |
error_msg = f"Error during streaming: {str(e)}"
|
106 |
-
yield self.
|
107 |
|
108 |
|
109 |
glm45 = GLM45Model()
|
|
|
35 |
|
36 |
|
37 |
class GLM45Model:
|
38 |
+
def __init__(self):
|
39 |
+
self.reset_state()
|
40 |
+
|
41 |
+
def reset_state(self):
|
42 |
+
self.accumulated_text = ""
|
43 |
+
|
44 |
def _strip_html(self, text: str) -> str:
|
45 |
return re.sub(r"<[^>]+>", "", text).strip()
|
46 |
|
47 |
def _wrap_text(self, text: str):
|
48 |
return [{"type": "text", "text": text}]
|
49 |
|
50 |
+
def _parse_thinking_content(self, text: str):
|
51 |
+
thinking_content = ""
|
52 |
+
regular_content = ""
|
53 |
+
|
54 |
+
if "<think>" in text:
|
55 |
+
think_pattern = r'<think>(.*?)</think>'
|
56 |
+
think_match = re.search(think_pattern, text, re.DOTALL)
|
57 |
+
|
58 |
+
if think_match:
|
59 |
+
thinking_content = think_match.group(1).strip()
|
60 |
+
regular_content = re.sub(think_pattern, '', text, flags=re.DOTALL).strip()
|
61 |
+
else:
|
62 |
+
think_start = text.find("<think>")
|
63 |
+
if think_start != -1:
|
64 |
+
thinking_content = text[think_start + 7:]
|
65 |
+
regular_content = text[:think_start].strip()
|
66 |
+
else:
|
67 |
+
regular_content = text
|
68 |
+
|
69 |
+
return thinking_content, regular_content
|
70 |
+
|
71 |
+
def _render_response(self, thinking_content: str, regular_content: str, skip_think: bool = False):
|
72 |
+
html_parts = []
|
73 |
+
|
74 |
+
if thinking_content and not skip_think:
|
75 |
+
thinking_escaped = html.escape(thinking_content).replace("\n", "<br>")
|
76 |
think_html = (
|
77 |
"<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
|
78 |
"<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
|
79 |
+
+ thinking_escaped +
|
80 |
+
"</div></details>"
|
81 |
)
|
82 |
+
html_parts.append(think_html)
|
83 |
|
84 |
+
if regular_content:
|
85 |
+
content_escaped = html.escape(regular_content)
|
|
|
86 |
content_formatted = content_escaped.replace("\n", "<br>")
|
87 |
+
content_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_formatted}</div>"
|
88 |
+
html_parts.append(content_html)
|
89 |
|
90 |
+
return "".join(html_parts)
|
91 |
|
92 |
def _build_messages(self, raw_hist, sys_prompt):
|
93 |
msgs = []
|
|
|
107 |
global stop_generation
|
108 |
stop_generation = False
|
109 |
msgs = self._build_messages(raw_hist, sys_prompt)
|
110 |
+
|
111 |
+
self.reset_state()
|
112 |
|
113 |
try:
|
114 |
for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
|
115 |
if stop_generation:
|
116 |
break
|
117 |
|
118 |
+
delta_content = ""
|
119 |
+
if hasattr(delta, 'content') and delta.content:
|
120 |
+
delta_content = delta.content
|
121 |
+
elif isinstance(delta, dict) and 'content' in delta and delta['content']:
|
122 |
+
delta_content = delta['content']
|
123 |
+
|
124 |
+
if delta_content:
|
125 |
+
self.accumulated_text += delta_content
|
126 |
+
thinking_content, regular_content = self._parse_thinking_content(self.accumulated_text)
|
127 |
+
yield self._render_response(thinking_content, regular_content, not thinking_enabled)
|
|
|
|
|
|
|
|
|
128 |
|
129 |
except Exception as e:
|
130 |
error_msg = f"Error during streaming: {str(e)}"
|
131 |
+
yield self._render_response("", error_msg)
|
132 |
|
133 |
|
134 |
glm45 = GLM45Model()
|