hsuwill000 commited on
Commit
63924d6
·
verified ·
1 Parent(s): 2574109

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -59
app.py CHANGED
@@ -1,64 +1,18 @@
1
- import huggingface_hub as hf_hub
2
- import time
3
- import openvino_genai as ov_genai
4
- import numpy as np
5
  import gradio as gr
6
- import re
7
-
8
- # 下載模型
9
- model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
10
- model_path = "Qwen3-0.6B-int4-ov"
11
-
12
- hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False)
13
-
14
- # 建立推理管線
15
- device = "CPU"
16
- pipe = ov_genai.LLMPipeline(model_path, device)
17
- tokenizer = pipe.get_tokenizer()
18
- tokenizer.set_chat_template(tokenizer.chat_template)
19
-
20
-
21
- def generate_response(prompt):
22
- full_response = ""
23
- tokenpersec = "" # 初始化
24
-
25
- def streamer(subword):
26
- nonlocal full_response
27
- nonlocal tokenpersec # 声明 nonlocal
28
- full_response += subword
29
- yield tokenpersec, full_response # 使用 yield 逐步更新
30
-
31
- try:
32
- pipe.start_chat()
33
- generator = pipe.generate(prompt, streamer=streamer, max_new_tokens=1024) # 建立生成器
34
-
35
- # 迭代生成器,產生流式更新
36
- for tokenpersec, response_chunk in generator:
37
- yield tokenpersec, response_chunk # 產生中間更新
38
-
39
- pipe.finish_chat()
40
-
41
- generated = pipe.generate([prompt], max_length=1024) # 為了得到 perf_metrics
42
- tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
43
-
44
- yield tokenpersec, full_response # 產生最終完整更新
45
 
46
- except Exception as e:
47
- yield "發生錯誤", f"生成回應時發生錯誤:{e}"
 
 
 
 
48
 
 
 
 
49
 
50
- # 建立 Gradio 介面
51
- demo = gr.Interface(
52
- fn=generate_response,
53
- inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
54
- outputs=[
55
- gr.Textbox(label="tokens/sec"),
56
- gr.Markdown(label="回應") # 使用 Markdown
57
- ],
58
- title="Qwen3-0.6B-int4-ov ",
59
- description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI。 使用Markdown顯示"
60
- )
61
 
62
- if __name__ == "__main__":
63
- demo.queue()
64
- demo.launch()
 
 
 
 
 
1
  import gradio as gr
2
+ import time
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ def generate_markdown():
5
+ response = ""
6
+ for word in ["Hello", "World", "Gradio", "Markdown", "!"]:
7
+ response += word + " "
8
+ time.sleep(0.5)
9
+ yield response
10
 
11
+ with gr.Blocks() as demo:
12
+ markdown_output = gr.Markdown(label="回應")
13
+ button = gr.Button("生成")
14
 
15
+ button.click(generate_markdown, outputs=markdown_output)
 
 
 
 
 
 
 
 
 
 
16
 
17
+ demo.queue()
18
+ demo.launch()