Art3B-chat

Running on Zero

App Files Files Community

freeCS-dot-org commited on Jan 21

Commit

436bf67

verified ·

1 Parent(s): f4f5376

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -33

app.py CHANGED Viewed

@@ -33,7 +33,7 @@ class ConversationManager:
     def __init__(self):
         self.user_history = []  # For displaying to user (with markdown)
         self.model_history = []  # For feeding back to model (with original tags)
     def add_exchange(self, user_message, assistant_response, formatted_response):
         self.model_history.append((user_message, assistant_response))
         self.user_history.append((user_message, formatted_response))
@@ -42,13 +42,16 @@ class ConversationManager:
         print(f"User: {user_message}")
         print(f"Assistant (Original): {assistant_response}")
         print(f"Assistant (Formatted): {formatted_response}")
     def get_model_history(self):
         return self.model_history
     def get_user_history(self):
         return self.user_history
 device = "cuda"  # for GPU usage or "cpu" for CPU usage
@@ -72,52 +75,49 @@ def format_response(response):
 @spaces.GPU()
 def stream_chat(
     message: str,
-    history: list,
     system_prompt: str,
     temperature: float = 0.2,
     max_new_tokens: int = 4096,
     top_p: float = 1.0,
     top_k: int = 1,
     penalty: float = 1.1,
-    conversation_manager: ConversationManager = None # Pass the manager as argument
 ):
-    # Initialize the conversation manager for the first time only
-    if conversation_manager is None:
-        conversation_manager = ConversationManager()
     print(f'\nNew Chat Request:')
     print(f'Message: {message}')
-    print(f'History from UI: {history}')
     print(f'System Prompt: {system_prompt}')
     print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
     model_history = conversation_manager.get_model_history()
     print(f'Model History: {model_history}')
     conversation = []
     for prompt, answer in model_history:
         conversation.extend([
             {"role": "user", "content": prompt},
             {"role": "assistant", "content": answer},
         ])
     conversation.append({"role": "user", "content": message})
     print(f'\nFormatted Conversation for Model:')
     print(conversation)
     input_ids = tokenizer.apply_chat_template(
-        conversation,
-        add_generation_prompt=True,
         return_tensors="pt"
     ).to(model.device)
     streamer = TextIteratorStreamer(
-        tokenizer,
-        timeout=60.0,
-        skip_prompt=True,
         skip_special_tokens=True
     )
     generate_kwargs = dict(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,
@@ -129,43 +129,59 @@ def stream_chat(
         eos_token_id=[end_of_sentence],
         streamer=streamer,
     )
     buffer = ""
     original_response = ""
     with torch.no_grad():
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         thread.start()
         for new_text in streamer:
             buffer += new_text
             original_response += new_text
             formatted_buffer = format_response(buffer)
             if thread.is_alive() is False:
                 print(f'\nGeneration Complete:')
                 print(f'Original Response: {original_response}')
                 print(f'Formatted Response: {formatted_buffer}')
                 conversation_manager.add_exchange(
                     message,
                     original_response,  # Original for model
-                    formatted_buffer    # Formatted for user
                 )
-            yield formatted_buffer
 chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
-conversation_manager_session_state = gr.State(ConversationManager())
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
     gr.DuplicateButton(
-        value="Duplicate Space for private use",
         elem_classes="duplicate-button"
     )
-    gr.ChatInterface(
         fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
@@ -175,6 +191,7 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
             render=False
         ),
         additional_inputs=[
             gr.Textbox(
                 value="",
                 label="System Prompt",
@@ -220,7 +237,6 @@ with gr.Blocks(css=CSS, theme="soft") as demo:
                 label="Repetition penalty",
                 render=False,
             ),
-            conversation_manager_session_state, # Add the state to the input
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],

     def __init__(self):
         self.user_history = []  # For displaying to user (with markdown)
         self.model_history = []  # For feeding back to model (with original tags)
     def add_exchange(self, user_message, assistant_response, formatted_response):
         self.model_history.append((user_message, assistant_response))
         self.user_history.append((user_message, formatted_response))
         print(f"User: {user_message}")
         print(f"Assistant (Original): {assistant_response}")
         print(f"Assistant (Formatted): {formatted_response}")
     def get_model_history(self):
         return self.model_history
     def get_user_history(self):
         return self.user_history
+    def clear(self):
+        self.user_history = []
+        self.model_history = []
 device = "cuda"  # for GPU usage or "cpu" for CPU usage
 @spaces.GPU()
 def stream_chat(
     message: str,
+    history_state: gr.State,  # Access the internal history state
     system_prompt: str,
     temperature: float = 0.2,
     max_new_tokens: int = 4096,
     top_p: float = 1.0,
     top_k: int = 1,
     penalty: float = 1.1,
 ):
+    conversation_manager = history_state
     print(f'\nNew Chat Request:')
     print(f'Message: {message}')
+    print(f'History from UI: {conversation_manager.get_user_history()}')
     print(f'System Prompt: {system_prompt}')
     print(f'Parameters: temp={temperature}, max_tokens={max_new_tokens}, top_p={top_p}, top_k={top_k}, penalty={penalty}')
     model_history = conversation_manager.get_model_history()
     print(f'Model History: {model_history}')
     conversation = []
     for prompt, answer in model_history:
         conversation.extend([
             {"role": "user", "content": prompt},
             {"role": "assistant", "content": answer},
         ])
     conversation.append({"role": "user", "content": message})
     print(f'\nFormatted Conversation for Model:')
     print(conversation)
     input_ids = tokenizer.apply_chat_template(
+        conversation,
+        add_generation_prompt=True,
         return_tensors="pt"
     ).to(model.device)
     streamer = TextIteratorStreamer(
+        tokenizer,
+        timeout=60.0,
+        skip_prompt=True,
         skip_special_tokens=True
     )
     generate_kwargs = dict(
         input_ids=input_ids,
         max_new_tokens=max_new_tokens,
         eos_token_id=[end_of_sentence],
         streamer=streamer,
     )
     buffer = ""
     original_response = ""
     with torch.no_grad():
         thread = Thread(target=model.generate, kwargs=generate_kwargs)
         thread.start()
         for new_text in streamer:
             buffer += new_text
             original_response += new_text
             formatted_buffer = format_response(buffer)
             if thread.is_alive() is False:
                 print(f'\nGeneration Complete:')
                 print(f'Original Response: {original_response}')
                 print(f'Formatted Response: {formatted_buffer}')
                 conversation_manager.add_exchange(
                     message,
                     original_response,  # Original for model
+                    formatted_buffer  # Formatted for user
                 )
+            yield formatted_buffer, conversation_manager
+def clear_chat(history_state: gr.State):
+    history_state.clear()
+    return None, history_state
+# Initialize the conversation manager outside of the function
+conversation_manager = ConversationManager()
 chatbot = gr.Chatbot(height=600, placeholder=PLACEHOLDER)
 with gr.Blocks(css=CSS, theme="soft") as demo:
     gr.HTML(TITLE)
     gr.DuplicateButton(
+        value="Duplicate Space for private use",
         elem_classes="duplicate-button"
     )
+    # Pass the initial state to the ChatInterface
+    history_state = gr.State(conversation_manager)
+    clear_inputs_button = gr.ClearButton(
+        value="Clear Chat",
+        components=[chatbot],
+    )
+    clear_inputs_button.click(fn=clear_chat, inputs=[history_state], outputs=[chatbot, history_state])
+    chat_interface = gr.ChatInterface(
         fn=stream_chat,
         chatbot=chatbot,
         fill_height=True,
             render=False
         ),
         additional_inputs=[
+            history_state,  # Pass the state to the ChatInterface
             gr.Textbox(
                 value="",
                 label="System Prompt",
                 label="Repetition penalty",
                 render=False,
             ),
         ],
         examples=[
             ["Help me study vocabulary: write a sentence for me to fill in the blank, and I'll try to pick the correct option."],