FlameF0X commited on
Commit
1e560c3
·
verified ·
1 Parent(s): 776e30f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -4
app.py CHANGED
@@ -16,6 +16,11 @@ tokenizer = AutoTokenizer.from_pretrained(model_name)
16
  if tokenizer.pad_token is None:
17
  tokenizer.pad_token = tokenizer.eos_token
18
 
 
 
 
 
 
19
  def respond(
20
  message,
21
  history: list[tuple[str, str]],
@@ -34,11 +39,11 @@ def respond(
34
  messages.append({"role": "assistant", "content": assistant_msg})
35
 
36
  messages.append({"role": "user", "content": message})
37
-
38
  # Format prompt using chat template
39
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
40
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
41
-
42
  # Set up streaming
43
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
44
 
@@ -53,11 +58,11 @@ def respond(
53
  do_sample=do_sample,
54
  pad_token_id=tokenizer.pad_token_id
55
  )
56
-
57
  # Start generation in separate thread
58
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
59
  thread.start()
60
-
61
  # Stream response
62
  response = ""
63
  for token in streamer:
 
16
  if tokenizer.pad_token is None:
17
  tokenizer.pad_token = tokenizer.eos_token
18
 
19
+ # Define a custom chat template if one is not available
20
+ if tokenizer.chat_template is None:
21
+ # Basic ChatML-style template
22
+ tokenizer.chat_template = "{% for message in messages %}\n{% if message['role'] == 'system' %}<|system|>\n{{ message['content'] }}\n{% elif message['role'] == 'user' %}<|user|>\n{{ message['content'] }}\n{% elif message['role'] == 'assistant' %}<|assistant|>\n{{ message['content'] }}\n{% endif %}\n{% endfor %}\n{% if add_generation_prompt %}<|assistant|>\n{% endif %}"
23
+
24
  def respond(
25
  message,
26
  history: list[tuple[str, str]],
 
39
  messages.append({"role": "assistant", "content": assistant_msg})
40
 
41
  messages.append({"role": "user", "content": message})
42
+
43
  # Format prompt using chat template
44
  prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
45
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
46
+
47
  # Set up streaming
48
  streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
49
 
 
58
  do_sample=do_sample,
59
  pad_token_id=tokenizer.pad_token_id
60
  )
61
+
62
  # Start generation in separate thread
63
  thread = Thread(target=model.generate, kwargs=generation_kwargs)
64
  thread.start()
65
+
66
  # Stream response
67
  response = ""
68
  for token in streamer: