sdafd commited on
Commit
174c85a
·
verified ·
1 Parent(s): 2c21cd0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -15
app.py CHANGED
@@ -62,7 +62,7 @@ def chat(message, history, temperature, max_new_tokens):
62
  def generate():
63
  stop_tokens = ["<|endoftext|>", "<|im_end|>","|im_end|"]
64
  nonlocal generated_tokens
65
- for response in model_pipeline(
66
  prompt,
67
  max_new_tokens=max_new_tokens,
68
  temperature=temperature,
@@ -71,20 +71,12 @@ def chat(message, history, temperature, max_new_tokens):
71
  pad_token_id=tokenizer.eos_token_id,
72
  streamer=streamer # Use the TextStreamer here
73
  ):
74
- bot_text = response[0]["generated_text"]
75
- bot_text = bot_text.split("Assistant:")[-1].strip()
76
- if "</think>" in bot_text:
77
- bot_text = bot_text.split("</think>")[-1].strip()
78
-
79
- generated_tokens += len(bot_text.split())
80
- elapsed_time = time.time() - start_time
81
- tokens_per_second = generated_tokens / elapsed_time if elapsed_time > 0 else 0
82
-
83
- yield bot_text, f"Generating... Tokens/s: {tokens_per_second:.2f}"
84
-
85
- for partial_response, status in generate():
86
- yield partial_response, status
87
-
88
  def reload_model_button():
89
  """Reload the model manually via a button."""
90
  global model_loaded
 
62
  def generate():
63
  stop_tokens = ["<|endoftext|>", "<|im_end|>","|im_end|"]
64
  nonlocal generated_tokens
65
+ response = model_pipeline(
66
  prompt,
67
  max_new_tokens=max_new_tokens,
68
  temperature=temperature,
 
71
  pad_token_id=tokenizer.eos_token_id,
72
  streamer=streamer # Use the TextStreamer here
73
  ):
74
+ for new_token in streamer:
75
+ outputs.append(new_token)
76
+ if new_token in stop_tokens:
77
+
78
+ break
79
+ yield "".join(outputs), "not implemented"
 
 
 
 
 
 
 
 
80
  def reload_model_button():
81
  """Reload the model manually via a button."""
82
  global model_loaded