Spaces:
Running
Running
import huggingface_hub as hf_hub | |
import time | |
import openvino_genai as ov_genai | |
import numpy as np | |
import gradio as gr | |
# 下載模型 | |
model_id = "OpenVINO/Qwen3-0.6B-int4-ov" | |
model_path = "Qwen3-0.6B-int4-ov" | |
hf_hub.snapshot_download(model_id, local_dir=model_path, local_dir_use_symlinks=False) # Added local_dir_use_symlinks=False to avoid potential issues | |
# 建立推理管線 | |
device = "CPU" | |
pipe = ov_genai.LLMPipeline(model_path, device) | |
tokenizer = pipe.get_tokenizer() | |
tokenizer.set_chat_template(tokenizer.chat_template) | |
def generate_response(prompt): | |
""" | |
Generates a response using the OpenVINO LLM pipeline. | |
Args: | |
prompt (str): The input prompt. | |
Returns: | |
str: The generated response. | |
""" | |
start_time = time.time() | |
output = pipe.generate([prompt], max_length=1024) | |
end_time = time.time() | |
generated_text = output.text[0] # Extract the generated text | |
performance_metrics = f"Generate duration: {output.perf_metrics.get_generate_duration().mean:.2f}ms\n" | |
performance_metrics += f'Throughput: {output.perf_metrics.get_throughput().mean:.2f} tokens/s' | |
return generated_text, performance_metrics | |
def main(): | |
""" | |
Creates and launches the Gradio interface. | |
""" | |
with gr.Blocks() as demo: | |
gr.Markdown("# OpenVINO Qwen3-8B Demo") # Add a title | |
prompt_input = gr.Textbox(lines=3, label="Enter your prompt:") | |
output_text = gr.Textbox(label="Generated Response") | |
performance_text = gr.Textbox(label="Performance Metrics", visible=False) # Initially hidden | |
def update_output(prompt): | |
response, performance = generate_response(prompt) | |
return response, performance # return both values | |
prompt_input.change( | |
fn=update_output, | |
inputs=prompt_input, | |
outputs=[output_text, performance_text], # Output both response and metrics | |
) | |
# Button to show/hide performance metrics | |
show_metrics_button = gr.Button("Show/Hide Performance Metrics") | |
show_metrics_button.click( | |
fn=lambda visible: not visible, | |
inputs=[performance_text.visible], | |
outputs=[performance_text.visible], | |
) | |
demo.launch() | |
if __name__ == "__main__": | |
main() |