sdafd commited on
Commit
b576940
·
verified ·
1 Parent(s): ed6968c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -20
app.py CHANGED
@@ -43,7 +43,8 @@ def check_model_status():
43
 
44
  def chat(message, history, temperature, max_new_tokens):
45
  global model_pipeline
46
-
 
47
  # Ensure the model is loaded before proceeding
48
  if not check_model_status():
49
  yield "Model is not ready. Please try again later."
@@ -53,30 +54,34 @@ def chat(message, history, temperature, max_new_tokens):
53
 
54
  # Stream the response
55
  start_time = time.time()
56
- generated_tokens = 0
57
 
58
  # Create a TextStreamer for token streaming
59
  tokenizer = model_pipeline.tokenizer
60
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
61
 
62
- def generate():
63
- stop_tokens = ["<|endoftext|>", "<|im_end|>","|im_end|"]
64
- nonlocal generated_tokens
65
- response = model_pipeline(
66
- prompt,
67
- max_new_tokens=max_new_tokens,
68
- temperature=temperature,
69
- do_sample=True,
70
- truncation=True,
71
- pad_token_id=tokenizer.eos_token_id,
72
- streamer=streamer # Use the TextStreamer here
73
- )
74
- for new_token in streamer:
75
- outputs.append(new_token)
76
- if new_token in stop_tokens:
77
-
78
- break
79
- yield "".join(outputs), "not implemented"
 
 
 
 
 
80
  def reload_model_button():
81
  """Reload the model manually via a button."""
82
  global model_loaded
 
43
 
44
  def chat(message, history, temperature, max_new_tokens):
45
  global model_pipeline
46
+ stop_tokens = ["<|endoftext|>", "<|im_end|>","|im_end|"]
47
+
48
  # Ensure the model is loaded before proceeding
49
  if not check_model_status():
50
  yield "Model is not ready. Please try again later."
 
54
 
55
  # Stream the response
56
  start_time = time.time()
 
57
 
58
  # Create a TextStreamer for token streaming
59
  tokenizer = model_pipeline.tokenizer
60
  streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
61
 
62
+
63
+
64
+ pipeline_kwargs = dict(
65
+ prompt=prompt,
66
+ max_new_tokens=max_new_tokens,
67
+ temperature=temperature,
68
+ do_sample=True,
69
+ truncation=True,
70
+ pad_token_id=tokenizer.eos_token_id,
71
+ streamer=streamer # Use the TextStreamer here
72
+ )
73
+
74
+ # Create and start the thread with the model_pipeline function
75
+ t = threading.Thread(target=lambda: model_pipeline(**pipeline_kwargs))
76
+ t.start()
77
+
78
+ for new_token in streamer:
79
+ print(new_token)
80
+ outputs.append(new_token)
81
+ if new_token in stop_tokens:
82
+
83
+ break
84
+ yield "".join(outputs), "not implemented"
85
  def reload_model_button():
86
  """Reload the model manually via a button."""
87
  global model_loaded