hsuwill000 commited on
Commit
01ad3ce
·
verified ·
1 Parent(s): b18b8b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -1
app.py CHANGED
@@ -32,7 +32,13 @@ InUsed_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
32
  pipe = ov_genai.LLMPipeline(InUsed_model_name, device)
33
  tokenizer = pipe.get_tokenizer()
34
  tokenizer.set_chat_template(tokenizer.chat_template)
 
 
35
 
 
 
 
 
36
  def generate_response(prompt, model_name):
37
  global pipe, tokenizer, InUsed_model_name
38
  if(InUsed_model_name!=model_name):
@@ -46,7 +52,7 @@ def generate_response(prompt, model_name):
46
  InUsed_model_name=model_name
47
 
48
  try:
49
- generated = pipe.generate([prompt], max_length=1024)
50
  tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
51
 
52
  return tokenpersec, generated
 
32
  pipe = ov_genai.LLMPipeline(InUsed_model_name, device)
33
  tokenizer = pipe.get_tokenizer()
34
  tokenizer.set_chat_template(tokenizer.chat_template)
35
+ config = openvino_genai.GenerationConfig()
36
+ config.max_new_tokens = 1024
37
 
38
+ def streamer(subword):
39
+ print(subword, end='', flush=True)
40
+ return False
41
+
42
  def generate_response(prompt, model_name):
43
  global pipe, tokenizer, InUsed_model_name
44
  if(InUsed_model_name!=model_name):
 
52
  InUsed_model_name=model_name
53
 
54
  try:
55
+ generated = pipe.generate([prompt], config, streamer)
56
  tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
57
 
58
  return tokenpersec, generated