Spaces:
Sleeping
Sleeping
File size: 1,952 Bytes
ab10ba2 e004a88 ab10ba2 03016fb ab10ba2 5c7b51d 53ad304 ab10ba2 e47b957 53ad304 b265954 5c7b51d ab10ba2 53ad304 ab10ba2 ac8b2d5 185553a 84ae698 ab2eeeb ab10ba2 5c7b51d bc42ba5 ab10ba2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
import time
import onnxruntime_genai as og
from huggingface_hub import snapshot_download
import os
# Load model and processor
local_dir = snapshot_download(
repo_id="microsoft/Phi-3.5-vision-instruct-onnx",
allow_patterns="cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4/*"
)
model_folder = os.path.join(
local_dir, "cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4"
)
model = og.Model(model_folder)
processor = model.create_multimodal_processor()
tokenizer_stream = processor.create_stream()
user_prompt = '<|user|>\n'
assistant_prompt = '<|assistant|>\n'
prompt_suffix = "<|end|>\n"
# Inference function
def ask_phi(image, question, max_length):
start_time = time.time()
prompt = f"{user_prompt}<|image_1|>\n{question}{prompt_suffix}{assistant_prompt}"
images = og.Images.open(image)
inputs = processor(prompt=prompt, images=images, return_tensors="pt")
params = og.GeneratorParams(model)
params.set_search_options(max_length=max_length)
params.set_inputs(inputs)
generator = og.Generator(model, params)
response = ""
while not generator.is_done():
generator.generate_next_token()
new_token = generator.get_next_tokens()[0]
output = tokenizer_stream.decode(new_token)
print(output, end="", flush=True)
response += output
print(f"\nInference took {time.time() - start_time} seconds")
del generator
return response
# Gradio Interface
demo = gr.Interface(
fn=ask_phi,
inputs=[
gr.Image(type="filepath", label="Upload Image"),
gr.Textbox(label="Your Prompt"),
gr.Slider(minimum=16, maximum=16384, step=16, value=5000, label="Context Length")
],
outputs=gr.Textbox(label="Phi-3.5 Response"),
title="Phi-3.5 Vision Instruct (ONNX)",
description="Ask a question about an image using Phi-3.5 ONNX on CPU"
)
demo.launch(debug=True) |