Spaces:

MicoGuild
/

Olubakka

Sleeping

App Files Files Community

Sachi Wagaarachchi commited on May 5

Commit

c285061

1 Parent(s): 60f0153

bugfixes, update response streaming, thought

Browse files

Files changed (2) hide show

src/app.py +2 -3
src/chat_logic.py +72 -10

src/app.py CHANGED Viewed

@@ -91,9 +91,8 @@ def respond(
             # Stream response tokens
             response = ""
-            for token in response_generator:
-                response += token
-                print(f"{response}")
                 yield response  # Yield the accumulated response for streaming UI
     except Exception as e:

             # Stream response tokens
             response = ""
+            for history, dbg in response_generator:
+                response = history[-1]['content']
                 yield response  # Yield the accumulated response for streaming UI
     except Exception as e:

src/chat_logic.py CHANGED Viewed

@@ -40,6 +40,9 @@ class ChatProcessor:
         Yields:
             str: Response tokens as they are generated
         """
         try:
             # 1. PRE-PROCESSING
             # Get model pipeline
@@ -80,19 +83,78 @@ class ChatProcessor:
             # Start generation in a separate thread
             thread = threading.Thread(target=pipe.model.generate, kwargs=generate_kwargs)
             thread.start()
             # 3. POST-PROCESSING
             # Stream response tokens
-            response = ""
-            for token in streamer:
-                response += token
-                # Yield each token for streaming UI
-                yield token
-            # Post-process the complete response
-            processed_response = postprocess_response(response)
-            # Yield the final processed response
-            yield processed_response
         except Exception as e:
             self.logger.error(f"Chat processing error: {str(e)}")

         Yields:
             str: Response tokens as they are generated
         """
+        cancel_event = threading.Event()
+        debug = ''
         try:
             # 1. PRE-PROCESSING
             # Get model pipeline
             # Start generation in a separate thread
             thread = threading.Thread(target=pipe.model.generate, kwargs=generate_kwargs)
             thread.start()
+                    # Buffers for thought vs answer
+            thought_buf = ''
+            answer_buf = ''
+            in_thought = False
+            # Stream tokens
+            for chunk in streamer:
+                if cancel_event.is_set():
+                    break
+                text = chunk
+                # Detect start of thinking
+                if not in_thought and '<think>' in text:
+                    in_thought = True
+                    # Insert thought placeholder
+                    history.append({
+                        'role': 'assistant',
+                        'content': '',
+                        'metadata': {'title': '💭 Thought'}
+                    })
+                    # Capture after opening tag
+                    after = text.split('<think>', 1)[1]
+                    thought_buf += after
+                    # If closing tag in same chunk
+                    if '</think>' in thought_buf:
+                        before, after2 = thought_buf.split('</think>', 1)
+                        history[-1]['content'] = before.strip()
+                        in_thought = False
+                        # Start answer buffer
+                        answer_buf = after2
+                        history.append({'role': 'assistant', 'content': answer_buf})
+                    else:
+                        history[-1]['content'] = thought_buf
+                    yield history, debug
+                    continue
+                # Continue thought streaming
+                if in_thought:
+                    thought_buf += text
+                    if '</think>' in thought_buf:
+                        before, after2 = thought_buf.split('</think>', 1)
+                        history[-1]['content'] = before.strip()
+                        in_thought = False
+                        # Start answer buffer
+                        answer_buf = after2
+                        history.append({'role': 'assistant', 'content': answer_buf})
+                    else:
+                        history[-1]['content'] = thought_buf
+                    yield history, debug
+                    continue
+                # Stream answer
+                if not answer_buf:
+                    history.append({'role': 'assistant', 'content': ''})
+                answer_buf += text
+                history[-1]['content'] = answer_buf
+                yield history, debug
+            thread.join()
+            yield history, debug
             # 3. POST-PROCESSING
             # Stream response tokens
+            # response = ""
+            # for token in streamer:
+            #     response += token
+            #     # Yield each token for streaming UI
+            #     yield token
+            # # Post-process the complete response
+            # processed_response = postprocess_response(response)
+            # # Yield the final processed response
+            # yield processed_response
         except Exception as e:
             self.logger.error(f"Chat processing error: {str(e)}")