Sachi Wagaarachchi commited on
Commit
c285061
·
1 Parent(s): 60f0153

bugfixes, update response streaming, thought

Browse files
Files changed (2) hide show
  1. src/app.py +2 -3
  2. src/chat_logic.py +72 -10
src/app.py CHANGED
@@ -91,9 +91,8 @@ def respond(
91
 
92
  # Stream response tokens
93
  response = ""
94
- for token in response_generator:
95
- response += token
96
- print(f"{response}")
97
  yield response # Yield the accumulated response for streaming UI
98
 
99
  except Exception as e:
 
91
 
92
  # Stream response tokens
93
  response = ""
94
+ for history, dbg in response_generator:
95
+ response = history[-1]['content']
 
96
  yield response # Yield the accumulated response for streaming UI
97
 
98
  except Exception as e:
src/chat_logic.py CHANGED
@@ -40,6 +40,9 @@ class ChatProcessor:
40
  Yields:
41
  str: Response tokens as they are generated
42
  """
 
 
 
43
  try:
44
  # 1. PRE-PROCESSING
45
  # Get model pipeline
@@ -80,19 +83,78 @@ class ChatProcessor:
80
  # Start generation in a separate thread
81
  thread = threading.Thread(target=pipe.model.generate, kwargs=generate_kwargs)
82
  thread.start()
83
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  # 3. POST-PROCESSING
85
  # Stream response tokens
86
- response = ""
87
- for token in streamer:
88
- response += token
89
- # Yield each token for streaming UI
90
- yield token
91
 
92
- # Post-process the complete response
93
- processed_response = postprocess_response(response)
94
- # Yield the final processed response
95
- yield processed_response
96
 
97
  except Exception as e:
98
  self.logger.error(f"Chat processing error: {str(e)}")
 
40
  Yields:
41
  str: Response tokens as they are generated
42
  """
43
+
44
+ cancel_event = threading.Event()
45
+ debug = ''
46
  try:
47
  # 1. PRE-PROCESSING
48
  # Get model pipeline
 
83
  # Start generation in a separate thread
84
  thread = threading.Thread(target=pipe.model.generate, kwargs=generate_kwargs)
85
  thread.start()
86
+ # Buffers for thought vs answer
87
+ thought_buf = ''
88
+ answer_buf = ''
89
+ in_thought = False
90
+
91
+ # Stream tokens
92
+ for chunk in streamer:
93
+ if cancel_event.is_set():
94
+ break
95
+ text = chunk
96
+
97
+ # Detect start of thinking
98
+ if not in_thought and '<think>' in text:
99
+ in_thought = True
100
+ # Insert thought placeholder
101
+ history.append({
102
+ 'role': 'assistant',
103
+ 'content': '',
104
+ 'metadata': {'title': '💭 Thought'}
105
+ })
106
+ # Capture after opening tag
107
+ after = text.split('<think>', 1)[1]
108
+ thought_buf += after
109
+ # If closing tag in same chunk
110
+ if '</think>' in thought_buf:
111
+ before, after2 = thought_buf.split('</think>', 1)
112
+ history[-1]['content'] = before.strip()
113
+ in_thought = False
114
+ # Start answer buffer
115
+ answer_buf = after2
116
+ history.append({'role': 'assistant', 'content': answer_buf})
117
+ else:
118
+ history[-1]['content'] = thought_buf
119
+ yield history, debug
120
+ continue
121
+
122
+ # Continue thought streaming
123
+ if in_thought:
124
+ thought_buf += text
125
+ if '</think>' in thought_buf:
126
+ before, after2 = thought_buf.split('</think>', 1)
127
+ history[-1]['content'] = before.strip()
128
+ in_thought = False
129
+ # Start answer buffer
130
+ answer_buf = after2
131
+ history.append({'role': 'assistant', 'content': answer_buf})
132
+ else:
133
+ history[-1]['content'] = thought_buf
134
+ yield history, debug
135
+ continue
136
+
137
+ # Stream answer
138
+ if not answer_buf:
139
+ history.append({'role': 'assistant', 'content': ''})
140
+ answer_buf += text
141
+ history[-1]['content'] = answer_buf
142
+ yield history, debug
143
+
144
+ thread.join()
145
+ yield history, debug
146
  # 3. POST-PROCESSING
147
  # Stream response tokens
148
+ # response = ""
149
+ # for token in streamer:
150
+ # response += token
151
+ # # Yield each token for streaming UI
152
+ # yield token
153
 
154
+ # # Post-process the complete response
155
+ # processed_response = postprocess_response(response)
156
+ # # Yield the final processed response
157
+ # yield processed_response
158
 
159
  except Exception as e:
160
  self.logger.error(f"Chat processing error: {str(e)}")