qwen3_test / app.py
hsuwill000's picture
Update app.py
6e96eae verified
raw
history blame
2.3 kB
import huggingface_hub as hf_hub
import time
import openvino_genai as ov_genai
import numpy as np
import gradio as gr
# 下載模型
model_id = "OpenVINO/Qwen3-0.6B-int4-ov"
model_path = "Qwen3-0.6B-int4-ov"
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False) # Added local_dir_use_symlinks=False to avoid potential issues
# 建立推理管線
device = "CPU"
pipe = ov_genai.LLMPipeline(model_path, device)
tokenizer = pipe.get_tokenizer()
tokenizer.set_chat_template(tokenizer.chat_template)
def generate_response(prompt):
"""
Generates a response using the OpenVINO LLM pipeline.
Args:
prompt (str): The input prompt.
Returns:
str: The generated response.
"""
start_time = time.time()
output = pipe.generate([prompt], max_length=1024)
end_time = time.time()
generated_text = output.text[0] # Extract the generated text
performance_metrics = f"Generate duration: {output.perf_metrics.get_generate_duration().mean:.2f}ms\n"
performance_metrics += f'Throughput: {output.perf_metrics.get_throughput().mean:.2f} tokens/s'
return generated_text, performance_metrics
def main():
"""
Creates and launches the Gradio interface.
"""
with gr.Blocks() as demo:
gr.Markdown("# OpenVINO Qwen3-8B Demo") # Add a title
prompt_input = gr.Textbox(lines=3, label="Enter your prompt:")
output_text = gr.Textbox(label="Generated Response")
performance_text = gr.Textbox(label="Performance Metrics", visible=False) # Initially hidden
def update_output(prompt):
response, performance = generate_response(prompt)
return response, performance # return both values
prompt_input.change(
fn=update_output,
inputs=prompt_input,
outputs=[output_text, performance_text], # Output both response and metrics
)
# Button to show/hide performance metrics
show_metrics_button = gr.Button("Show/Hide Performance Metrics")
show_metrics_button.click(
fn=lambda visible: not visible,
inputs=[performance_text.visible],
outputs=[performance_text.visible],
)
demo.launch()
if __name__ == "__main__":
main()