|
|
|
import gradio as gr |
|
import openai |
|
import base64 |
|
import io |
|
import requests |
|
|
|
|
|
|
|
def encode_image_to_base64(image): |
|
buffered = io.BytesIO() |
|
image.save(buffered, format="JPEG") |
|
img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") |
|
return img_str |
|
|
|
|
|
|
|
def ask_openai_with_image(instruction, json_prompt, low_quality_mode, image): |
|
|
|
openai.api_key = os.getenv("API_KEY") |
|
|
|
|
|
base64_image = encode_image_to_base64(image) |
|
|
|
instruction = instruction.strip() |
|
|
|
if json_prompt.strip() != "": |
|
instruction = f"{instruction}\n\nReturn in JSON format and include the following attributes:\n\n{json_prompt.strip()}" |
|
|
|
|
|
payload = { |
|
"model": "gpt-4-vision-preview", |
|
"messages": [ |
|
{ |
|
"role": "user", |
|
"content": [ |
|
{ |
|
"type": "text", |
|
"text": instruction, |
|
}, |
|
{ |
|
"type": "image_url", |
|
"image_url": { |
|
"url": f"data:image/jpeg;base64,{base64_image}", |
|
"detail": "low" if low_quality_mode else "high", |
|
}, |
|
}, |
|
], |
|
} |
|
], |
|
"max_tokens": 4095, |
|
} |
|
|
|
|
|
response = requests.post( |
|
"https://api.openai.com/v1/chat/completions", |
|
headers={"Authorization": f"Bearer {openai.api_key}"}, |
|
json=payload, |
|
) |
|
|
|
|
|
if response.status_code == 200: |
|
response_json = response.json() |
|
print("Response JSON:", response_json) |
|
try: |
|
|
|
return response_json["choices"][0]["message"]["content"] |
|
except Exception as e: |
|
|
|
print("Error in JSON structure:", e) |
|
print("Full JSON response:", response_json) |
|
return "Error processing the image response." |
|
else: |
|
|
|
return f"Error: {response.text}" |
|
|
|
|
|
json_schema = gr.Textbox( |
|
label="JSON Attributes", |
|
info="Define a list of attributes to force the model to respond in valid json format. Leave blank to disable json formatting.", |
|
lines=3, |
|
placeholder="""Example: |
|
- name: Name of the object |
|
- color: Color of the object |
|
""", |
|
) |
|
|
|
instructions = gr.Textbox( |
|
label="Instructions", |
|
info="Instructions for the vision model to follow. Leave blank to use default.", |
|
lines=2, |
|
placeholder="""Default: |
|
I've uploaded an image and I'd like to know what it depicts and any interesting details you can provide.""", |
|
) |
|
|
|
low_quality_mode = gr.Checkbox( |
|
label="Low Quality Mode", |
|
info="See here: https://platform.openai.com/docs/guides/vision/low-or-high-fidelity-image-understanding.", |
|
) |
|
|
|
|
|
vision_playground = gr.Interface( |
|
fn=ask_openai_with_image, |
|
inputs=[ |
|
instructions, |
|
json_schema, |
|
low_quality_mode, |
|
gr.Image(type="pil", label="Image"), |
|
], |
|
outputs=[gr.Markdown()], |
|
title="GPT-4-Vision Playground", |
|
description="Upload an image and get a description from GPT-4 with Vision.", |
|
) |
|
|
|
|
|
vision_playground.launch() |
|
|