Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -32,7 +32,13 @@ InUsed_model_name = "Qwen3-0.6B-int4-ov" # Choose a default model
|
|
32 |
pipe = ov_genai.LLMPipeline(InUsed_model_name, device)
|
33 |
tokenizer = pipe.get_tokenizer()
|
34 |
tokenizer.set_chat_template(tokenizer.chat_template)
|
|
|
|
|
35 |
|
|
|
|
|
|
|
|
|
36 |
def generate_response(prompt, model_name):
|
37 |
global pipe, tokenizer, InUsed_model_name
|
38 |
if(InUsed_model_name!=model_name):
|
@@ -46,7 +52,7 @@ def generate_response(prompt, model_name):
|
|
46 |
InUsed_model_name=model_name
|
47 |
|
48 |
try:
|
49 |
-
generated = pipe.generate([prompt],
|
50 |
tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
51 |
|
52 |
return tokenpersec, generated
|
|
|
32 |
pipe = ov_genai.LLMPipeline(InUsed_model_name, device)
|
33 |
tokenizer = pipe.get_tokenizer()
|
34 |
tokenizer.set_chat_template(tokenizer.chat_template)
|
35 |
+
config = openvino_genai.GenerationConfig()
|
36 |
+
config.max_new_tokens = 1024
|
37 |
|
38 |
+
def streamer(subword):
|
39 |
+
print(subword, end='', flush=True)
|
40 |
+
return False
|
41 |
+
|
42 |
def generate_response(prompt, model_name):
|
43 |
global pipe, tokenizer, InUsed_model_name
|
44 |
if(InUsed_model_name!=model_name):
|
|
|
52 |
InUsed_model_name=model_name
|
53 |
|
54 |
try:
|
55 |
+
generated = pipe.generate([prompt], config, streamer)
|
56 |
tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
|
57 |
|
58 |
return tokenpersec, generated
|