FlameF0X commited on
Commit
26ba426
·
verified ·
1 Parent(s): 57682ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -22
app.py CHANGED
@@ -2,7 +2,6 @@ import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
  import torch
4
  from threading import Thread
5
- import re # For cleaning unwanted tokens
6
 
7
  # Load model and tokenizer
8
  model_name = "GoofyLM/gonzalez-v1"
@@ -19,17 +18,8 @@ if tokenizer.pad_token is None:
19
 
20
  # Define a custom chat template if one is not available
21
  if tokenizer.chat_template is None:
22
- tokenizer.chat_template = """{% for message in messages %}
23
- {% if message['role'] == 'system' %}<|system|>
24
- {{ message['content'] }}
25
- {% elif message['role'] == 'user' %}<|user|>
26
- {{ message['content'] }}
27
- {% elif message['role'] == 'assistant' %}<|assistant|>
28
- {{ message['content'] }}
29
- {% endif %}
30
- {% endfor %}
31
- {% if add_generation_prompt %}<|assistant|>
32
- {% endif %}"""
33
 
34
  def respond(
35
  message,
@@ -69,30 +59,26 @@ def respond(
69
  pad_token_id=tokenizer.pad_token_id
70
  )
71
 
72
- # Start generation in a separate thread
73
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
74
  thread.start()
75
-
76
- # Stream response with filtering
77
  response = ""
78
  for token in streamer:
79
  response += token
80
- # Remove angle-bracket tags (e.g. <|msg|...>, <username: ...>, <:...:...>)
81
- cleaned_response = re.sub(r"<[^>]+>", "", response)
82
- # Remove leading "Output:" if present (case-insensitive, line start)
83
- cleaned_response = re.sub(r"(?i)^\s*output:\s*", "", cleaned_response)
84
- yield cleaned_response.strip()
85
 
86
  # Create Gradio interface
87
  demo = gr.ChatInterface(
88
  respond,
89
  additional_inputs=[
90
  gr.Textbox(value="", label="System message"),
91
- gr.Slider(1, 2048, value=72, label="Max new tokens"),
92
  gr.Slider(0.1, 4.0, value=0.7, label="Temperature"),
93
  gr.Slider(0.1, 1.0, value=0.95, label="Top-p (nucleus sampling)"),
94
  ],
95
  )
96
 
97
  if __name__ == "__main__":
98
- demo.launch()
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
3
  import torch
4
  from threading import Thread
 
5
 
6
  # Load model and tokenizer
7
  model_name = "GoofyLM/gonzalez-v1"
 
18
 
19
  # Define a custom chat template if one is not available
20
  if tokenizer.chat_template is None:
21
+ # Basic ChatML-style template
22
+ tokenizer.chat_template = "{% for message in messages %}\n{% if message['role'] == 'system' %}<|system|>\n{{ message['content'] }}\n{% elif message['role'] == 'user' %}<|user|>\n{{ message['content'] }}\n{% elif message['role'] == 'assistant' %}<|assistant|>\n{{ message['content'] }}\n{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}<|assistant|>\n{% endif %}"
 
 
 
 
 
 
 
 
 
23
 
24
  def respond(
25
  message,
 
59
  pad_token_id=tokenizer.pad_token_id
60
  )
61
 
62
+ # Start generation in separate thread
63
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
64
  thread.start()
65
+
66
+ # Stream response
67
  response = ""
68
  for token in streamer:
69
  response += token
70
+ yield response
 
 
 
 
71
 
72
  # Create Gradio interface
73
  demo = gr.ChatInterface(
74
  respond,
75
  additional_inputs=[
76
  gr.Textbox(value="", label="System message"),
77
+ gr.Slider(1, 215, value=72, label="Max new tokens"),
78
  gr.Slider(0.1, 4.0, value=0.7, label="Temperature"),
79
  gr.Slider(0.1, 1.0, value=0.95, label="Top-p (nucleus sampling)"),
80
  ],
81
  )
82
 
83
  if __name__ == "__main__":
84
+ demo. launch()