Sachi Wagaarachchi
commited on
Commit
·
c285061
1
Parent(s):
60f0153
bugfixes, update response streaming, thought
Browse files- src/app.py +2 -3
- src/chat_logic.py +72 -10
src/app.py
CHANGED
@@ -91,9 +91,8 @@ def respond(
|
|
91 |
|
92 |
# Stream response tokens
|
93 |
response = ""
|
94 |
-
for
|
95 |
-
response
|
96 |
-
print(f"{response}")
|
97 |
yield response # Yield the accumulated response for streaming UI
|
98 |
|
99 |
except Exception as e:
|
|
|
91 |
|
92 |
# Stream response tokens
|
93 |
response = ""
|
94 |
+
for history, dbg in response_generator:
|
95 |
+
response = history[-1]['content']
|
|
|
96 |
yield response # Yield the accumulated response for streaming UI
|
97 |
|
98 |
except Exception as e:
|
src/chat_logic.py
CHANGED
@@ -40,6 +40,9 @@ class ChatProcessor:
|
|
40 |
Yields:
|
41 |
str: Response tokens as they are generated
|
42 |
"""
|
|
|
|
|
|
|
43 |
try:
|
44 |
# 1. PRE-PROCESSING
|
45 |
# Get model pipeline
|
@@ -80,19 +83,78 @@ class ChatProcessor:
|
|
80 |
# Start generation in a separate thread
|
81 |
thread = threading.Thread(target=pipe.model.generate, kwargs=generate_kwargs)
|
82 |
thread.start()
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
84 |
# 3. POST-PROCESSING
|
85 |
# Stream response tokens
|
86 |
-
response = ""
|
87 |
-
for token in streamer:
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
|
92 |
-
# Post-process the complete response
|
93 |
-
processed_response = postprocess_response(response)
|
94 |
-
# Yield the final processed response
|
95 |
-
yield processed_response
|
96 |
|
97 |
except Exception as e:
|
98 |
self.logger.error(f"Chat processing error: {str(e)}")
|
|
|
40 |
Yields:
|
41 |
str: Response tokens as they are generated
|
42 |
"""
|
43 |
+
|
44 |
+
cancel_event = threading.Event()
|
45 |
+
debug = ''
|
46 |
try:
|
47 |
# 1. PRE-PROCESSING
|
48 |
# Get model pipeline
|
|
|
83 |
# Start generation in a separate thread
|
84 |
thread = threading.Thread(target=pipe.model.generate, kwargs=generate_kwargs)
|
85 |
thread.start()
|
86 |
+
# Buffers for thought vs answer
|
87 |
+
thought_buf = ''
|
88 |
+
answer_buf = ''
|
89 |
+
in_thought = False
|
90 |
+
|
91 |
+
# Stream tokens
|
92 |
+
for chunk in streamer:
|
93 |
+
if cancel_event.is_set():
|
94 |
+
break
|
95 |
+
text = chunk
|
96 |
+
|
97 |
+
# Detect start of thinking
|
98 |
+
if not in_thought and '<think>' in text:
|
99 |
+
in_thought = True
|
100 |
+
# Insert thought placeholder
|
101 |
+
history.append({
|
102 |
+
'role': 'assistant',
|
103 |
+
'content': '',
|
104 |
+
'metadata': {'title': '💭 Thought'}
|
105 |
+
})
|
106 |
+
# Capture after opening tag
|
107 |
+
after = text.split('<think>', 1)[1]
|
108 |
+
thought_buf += after
|
109 |
+
# If closing tag in same chunk
|
110 |
+
if '</think>' in thought_buf:
|
111 |
+
before, after2 = thought_buf.split('</think>', 1)
|
112 |
+
history[-1]['content'] = before.strip()
|
113 |
+
in_thought = False
|
114 |
+
# Start answer buffer
|
115 |
+
answer_buf = after2
|
116 |
+
history.append({'role': 'assistant', 'content': answer_buf})
|
117 |
+
else:
|
118 |
+
history[-1]['content'] = thought_buf
|
119 |
+
yield history, debug
|
120 |
+
continue
|
121 |
+
|
122 |
+
# Continue thought streaming
|
123 |
+
if in_thought:
|
124 |
+
thought_buf += text
|
125 |
+
if '</think>' in thought_buf:
|
126 |
+
before, after2 = thought_buf.split('</think>', 1)
|
127 |
+
history[-1]['content'] = before.strip()
|
128 |
+
in_thought = False
|
129 |
+
# Start answer buffer
|
130 |
+
answer_buf = after2
|
131 |
+
history.append({'role': 'assistant', 'content': answer_buf})
|
132 |
+
else:
|
133 |
+
history[-1]['content'] = thought_buf
|
134 |
+
yield history, debug
|
135 |
+
continue
|
136 |
+
|
137 |
+
# Stream answer
|
138 |
+
if not answer_buf:
|
139 |
+
history.append({'role': 'assistant', 'content': ''})
|
140 |
+
answer_buf += text
|
141 |
+
history[-1]['content'] = answer_buf
|
142 |
+
yield history, debug
|
143 |
+
|
144 |
+
thread.join()
|
145 |
+
yield history, debug
|
146 |
# 3. POST-PROCESSING
|
147 |
# Stream response tokens
|
148 |
+
# response = ""
|
149 |
+
# for token in streamer:
|
150 |
+
# response += token
|
151 |
+
# # Yield each token for streaming UI
|
152 |
+
# yield token
|
153 |
|
154 |
+
# # Post-process the complete response
|
155 |
+
# processed_response = postprocess_response(response)
|
156 |
+
# # Yield the final processed response
|
157 |
+
# yield processed_response
|
158 |
|
159 |
except Exception as e:
|
160 |
self.logger.error(f"Chat processing error: {str(e)}")
|