sdafd commited on
Commit
53639d5
·
verified ·
1 Parent(s): 8858101

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -4
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import torch
2
- from transformers import pipeline
3
  import gradio as gr
4
  import threading
5
  import time
@@ -55,6 +55,10 @@ def chat(message, history, temperature, max_new_tokens):
55
  start_time = time.time()
56
  generated_tokens = 0
57
 
 
 
 
 
58
  def generate():
59
  nonlocal generated_tokens
60
  for response in model_pipeline(
@@ -63,9 +67,8 @@ def chat(message, history, temperature, max_new_tokens):
63
  temperature=temperature,
64
  do_sample=True,
65
  truncation=True,
66
- pad_token_id=50256,
67
- return_full_text=False,
68
- streamer=True
69
  ):
70
  bot_text = response[0]["generated_text"]
71
  bot_text = bot_text.split("Assistant:")[-1].strip()
 
1
  import torch
2
+ from transformers import pipeline, TextStreamer
3
  import gradio as gr
4
  import threading
5
  import time
 
55
  start_time = time.time()
56
  generated_tokens = 0
57
 
58
+ # Create a TextStreamer for token streaming
59
+ tokenizer = model_pipeline.tokenizer
60
+ streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
61
+
62
  def generate():
63
  nonlocal generated_tokens
64
  for response in model_pipeline(
 
67
  temperature=temperature,
68
  do_sample=True,
69
  truncation=True,
70
+ pad_token_id=tokenizer.eos_token_id,
71
+ streamer=streamer # Use the TextStreamer here
 
72
  ):
73
  bot_text = response[0]["generated_text"]
74
  bot_text = bot_text.split("Assistant:")[-1].strip()