zRzRzRzRzRzRzR commited on
Commit
9ec8fec
·
1 Parent(s): aa0c384
Files changed (1) hide show
  1. app.py +53 -28
app.py CHANGED
@@ -35,30 +35,59 @@ def stream_from_vllm(messages, thinking_enabled=True, temperature=1.0):
35
 
36
 
37
  class GLM45Model:
 
 
 
 
 
 
38
  def _strip_html(self, text: str) -> str:
39
  return re.sub(r"<[^>]+>", "", text).strip()
40
 
41
  def _wrap_text(self, text: str):
42
  return [{"type": "text", "text": text}]
43
 
44
- def _stream_fragment(self, reasoning_content: str = "", content: str = "", skip_think: bool = False):
45
- think_html = ""
46
- if reasoning_content and not skip_think:
47
- think_content = html.escape(reasoning_content).replace("\n", "<br>")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  think_html = (
49
  "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
50
  "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
51
- + think_content
52
- + "</div></details>"
53
  )
 
54
 
55
- answer_html = ""
56
- if content:
57
- content_escaped = html.escape(content)
58
  content_formatted = content_escaped.replace("\n", "<br>")
59
- answer_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_formatted}</div>"
 
60
 
61
- return think_html + answer_html
62
 
63
  def _build_messages(self, raw_hist, sys_prompt):
64
  msgs = []
@@ -78,32 +107,28 @@ class GLM45Model:
78
  global stop_generation
79
  stop_generation = False
80
  msgs = self._build_messages(raw_hist, sys_prompt)
81
- reasoning_buffer = ""
82
- content_buffer = ""
83
 
84
  try:
85
  for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
86
  if stop_generation:
87
  break
88
 
89
- if hasattr(delta, 'reasoning_content') and delta.reasoning_content:
90
- reasoning_buffer += delta.reasoning_content
91
- elif hasattr(delta, 'content') and delta.content:
92
- content_buffer += delta.content
93
- else:
94
- if isinstance(delta, dict):
95
- if 'reasoning_content' in delta and delta['reasoning_content']:
96
- reasoning_buffer += delta['reasoning_content']
97
- if 'content' in delta and delta['content']:
98
- content_buffer += delta['content']
99
- elif hasattr(delta, 'content') and delta.content:
100
- content_buffer += delta.content
101
-
102
- yield self._stream_fragment(reasoning_buffer, content_buffer, not thinking_enabled)
103
 
104
  except Exception as e:
105
  error_msg = f"Error during streaming: {str(e)}"
106
- yield self._stream_fragment("", error_msg)
107
 
108
 
109
  glm45 = GLM45Model()
 
35
 
36
 
37
  class GLM45Model:
38
+ def __init__(self):
39
+ self.reset_state()
40
+
41
+ def reset_state(self):
42
+ self.accumulated_text = ""
43
+
44
  def _strip_html(self, text: str) -> str:
45
  return re.sub(r"<[^>]+>", "", text).strip()
46
 
47
  def _wrap_text(self, text: str):
48
  return [{"type": "text", "text": text}]
49
 
50
+ def _parse_thinking_content(self, text: str):
51
+ thinking_content = ""
52
+ regular_content = ""
53
+
54
+ if "<think>" in text:
55
+ think_pattern = r'<think>(.*?)</think>'
56
+ think_match = re.search(think_pattern, text, re.DOTALL)
57
+
58
+ if think_match:
59
+ thinking_content = think_match.group(1).strip()
60
+ regular_content = re.sub(think_pattern, '', text, flags=re.DOTALL).strip()
61
+ else:
62
+ think_start = text.find("<think>")
63
+ if think_start != -1:
64
+ thinking_content = text[think_start + 7:]
65
+ regular_content = text[:think_start].strip()
66
+ else:
67
+ regular_content = text
68
+
69
+ return thinking_content, regular_content
70
+
71
+ def _render_response(self, thinking_content: str, regular_content: str, skip_think: bool = False):
72
+ html_parts = []
73
+
74
+ if thinking_content and not skip_think:
75
+ thinking_escaped = html.escape(thinking_content).replace("\n", "<br>")
76
  think_html = (
77
  "<details open><summary style='cursor:pointer;font-weight:bold;color:#007acc;'>💭 Thinking</summary>"
78
  "<div style='color:#555555;line-height:1.6;padding:15px;border-left:4px solid #007acc;margin:10px 0;background-color:#f0f7ff;border-radius:4px;'>"
79
+ + thinking_escaped +
80
+ "</div></details>"
81
  )
82
+ html_parts.append(think_html)
83
 
84
+ if regular_content:
85
+ content_escaped = html.escape(regular_content)
 
86
  content_formatted = content_escaped.replace("\n", "<br>")
87
+ content_html = f"<div style='margin:0.5em 0; white-space: pre-wrap; line-height:1.6;'>{content_formatted}</div>"
88
+ html_parts.append(content_html)
89
 
90
+ return "".join(html_parts)
91
 
92
  def _build_messages(self, raw_hist, sys_prompt):
93
  msgs = []
 
107
  global stop_generation
108
  stop_generation = False
109
  msgs = self._build_messages(raw_hist, sys_prompt)
110
+
111
+ self.reset_state()
112
 
113
  try:
114
  for delta in stream_from_vllm(msgs, thinking_enabled, temperature):
115
  if stop_generation:
116
  break
117
 
118
+ delta_content = ""
119
+ if hasattr(delta, 'content') and delta.content:
120
+ delta_content = delta.content
121
+ elif isinstance(delta, dict) and 'content' in delta and delta['content']:
122
+ delta_content = delta['content']
123
+
124
+ if delta_content:
125
+ self.accumulated_text += delta_content
126
+ thinking_content, regular_content = self._parse_thinking_content(self.accumulated_text)
127
+ yield self._render_response(thinking_content, regular_content, not thinking_enabled)
 
 
 
 
128
 
129
  except Exception as e:
130
  error_msg = f"Error during streaming: {str(e)}"
131
+ yield self._render_response("", error_msg)
132
 
133
 
134
  glm45 = GLM45Model()