Spaces:

hsuwill000
/

qwen3_test

Running

App Files Files Community

hsuwill000 commited on Jun 16

Commit

7d7759a

verified ·

1 Parent(s): 3592035

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -34

app.py CHANGED Viewed

@@ -18,43 +18,29 @@ tokenizer = pipe.get_tokenizer()
 tokenizer.set_chat_template(tokenizer.chat_template)
-def generate_response(prompt, history=[]):
-    full_response = ""
-    token_count = 0
-    start_time = time.time()
-    def streamer(subword):
-        nonlocal full_response, token_count
-        full_response += subword
-        token_count += 1
-        return ov_genai.StreamingStatus.RUNNING
-    pipe.start_chat(history)  # Pass the history to start_chat
-    pipe.generate(prompt, streamer=streamer, max_new_tokens=1024)  # Adjust max_new_tokens as needed
-    pipe.finish_chat()  # Finish the chat
-    end_time = time.time()
-    elapsed_time = end_time - start_time
-    tokenpersec = f'{token_count / elapsed_time:.2f}' if elapsed_time > 0 else "0.00"
-    history.append((prompt, full_response))  # Store the conversation history
-    return tokenpersec, history
 # 建立 Gradio 介面
-with gr.Blocks() as demo:
-    chatbot = gr.Chatbot()
-    state = gr.State([])
-    msg = gr.Textbox(label="輸入提示 (Prompt)")
-    def respond(message, chat_history):
-        tokenpersec, chat_history = generate_response(message, chat_history)
-        response = chat_history[-1][1]
-        return "", chat_history, tokenpersec
-    msg.submit(respond, [msg, state], [msg, chatbot, gr.Textbox(label="tokens/sec")])
-    demo.title = "Qwen3-0.6B-int4-ov Streaming Demo"
-    demo.description = "基於 Qwen3-0.6B-int4-ov 推理應用，支援流式輸出與 GUI。"
 if __name__ == "__main__":
     demo.launch()

 tokenizer.set_chat_template(tokenizer.chat_template)
+def generate_response(prompt):
+    try:
+        generated = pipe.generate([prompt], max_length=1024)
+        tokenpersec=f'{generated.perf_metrics.get_throughput().mean:.2f}'
+        return tokenpersec, generated
+    except Exception as e:
+        return "發生錯誤", "發生錯誤", f"生成回應時發生錯誤：{e}"
 # 建立 Gradio 介面
+demo = gr.Interface(
+    fn=generate_response,
+    inputs=gr.Textbox(lines=5, label="輸入提示 (Prompt)"),
+    outputs=[
+        gr.Textbox(label="tokens/sec"),
+        #gr.Textbox(label="思考過程"),
+        #gr.Textbox(label="最終回應")
+        gr.Textbox(label="回應")
+    ],
+    title="Qwen3-0.6B-int4-ov ",
+    description="基於 Qwen3-0.6B-int4-ov 推理應用，支援思考過程分離與 GUI。"
+)
 if __name__ == "__main__":
     demo.launch()