hsuwill000 commited on
Commit
04d3fa9
·
verified ·
1 Parent(s): 8b31668

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -27
app.py CHANGED
@@ -18,43 +18,49 @@ tokenizer = pipe.get_tokenizer()
18
  tokenizer.set_chat_template(tokenizer.chat_template)
19
 
20
 
21
- def streamer(subword):
22
- yield subword
23
- return ov_genai.StreamingStatus.RUNNING
 
 
 
24
 
25
-
26
- def generate_response(prompt):
27
- try:
28
  full_response = ""
29
- token_count = 0
30
  start_time = time.time()
 
31
 
32
- for text in pipe.generate(prompt, streamer=streamer, max_new_tokens=1024):
33
- full_response += text
 
34
  token_count += 1
35
- yield (None, full_response) # 每次 yield 都会刷新界面
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- end_time = time.time()
38
- elapsed_time = end_time - start_time
39
- tokens_per_sec = token_count / elapsed_time if elapsed_time > 0 else 0
40
- tokenpersec=f'{tokens_per_sec:.2f}'
41
-
42
- yield (tokenpersec, full_response) # 最终 yield, 保证输出完整.
43
-
44
- except Exception as e:
45
- yield ("發生錯誤", f"生成回應時發生錯誤:{e}") # 使用 yield 错误信息
46
 
47
  # 建立 Gradio 介面
48
- demo = gr.Interface(
49
  fn=generate_response,
50
- inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
51
- outputs=[
52
- gr.Textbox(label="tokens/sec"),
53
- gr.Textbox(label="回應", streaming=True)
54
- ],
55
  title="Qwen3-0.6B-int4-ov ",
56
- description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI"
 
 
 
57
  )
58
 
 
59
  if __name__ == "__main__":
60
- demo.launch()
 
18
  tokenizer.set_chat_template(tokenizer.chat_template)
19
 
20
 
21
+ def generate_response(prompt, history=[]): # Added history parameter
22
+ full_prompt = tokenizer.apply_chat_template(
23
+ messages=history + [{"role": "user", "content": prompt}],
24
+ tokenize=False,
25
+ add_generation_prompt=True
26
+ )
27
 
28
+ # Create a generator function for streaming
29
+ def stream_generate():
 
30
  full_response = ""
 
31
  start_time = time.time()
32
+ token_count = 0
33
 
34
+ def streamer(subword):
35
+ nonlocal full_response, token_count
36
+ full_response += subword
37
  token_count += 1
38
+ yield full_response, f'{token_count / (time.time() - start_time):.2f}'
39
+ return ov_genai.StreamingStatus.RUNNING
40
+
41
+ try:
42
+ pipe.generate(full_prompt, streamer=streamer, max_new_tokens=1024)
43
+ # Add user prompt and full response to the history
44
+ history.append({"role": "user", "content": prompt})
45
+ history.append({"role": "assistant", "content": full_response})
46
+ except Exception as e:
47
+ yield "發生錯誤", f"生成回應時發生錯誤:{e}"
48
+
49
+ # Yield the final response and tokens/sec
50
+ yield full_response, f'{token_count / (time.time() - start_time):.2f}'
51
 
52
+ return stream_generate()
 
 
 
 
 
 
 
 
53
 
54
  # 建立 Gradio 介面
55
+ demo = gr.ChatInterface(
56
  fn=generate_response,
 
 
 
 
 
57
  title="Qwen3-0.6B-int4-ov ",
58
+ description="基於 Qwen3-0.6B-int4-ov 推理應用,支援思考過程分離與 GUI.",
59
+ additional_inputs=[
60
+ gr.Textbox(label="tokens/sec", value="", interactive=False)
61
+ ]
62
  )
63
 
64
+
65
  if __name__ == "__main__":
66
+ demo.queue().launch()