Spaces:
Running
Running
import openai | |
import base64 | |
def describe_image_with_gpt4o(image_path, api_key): | |
""" | |
Given an image file, sends it to GPT-4o Vision API and gets a detailed description. | |
Returns the description as a string. | |
""" | |
openai.api_key = api_key | |
# Read and encode image | |
with open(image_path, "rb") as img_file: | |
img_base64 = base64.b64encode(img_file.read()).decode("utf-8") | |
img_data_url = f"data:image/png;base64,{img_base64}" | |
# Compose the prompt | |
prompt = ( | |
"Describe this face with as much detail as possible, " | |
"focusing on facial features, hairstyle, expression, accessories, and notable traits. " | |
"Be specific, provide detail on Clothing, style and background" | |
"Be specific, as if explaining to an artist who will draw a sticker of this person." | |
) | |
# Call GPT-4o Vision API | |
response = openai.chat.completions.create( | |
model="gpt-4o", | |
messages=[ | |
{ | |
"role": "user", | |
"content": [ | |
{"type": "text", "text": prompt}, | |
{"type": "image_url","image_url": {"url": img_data_url}}, | |
], | |
} | |
], | |
max_tokens=256, | |
) | |
# Extract the description | |
description = response.choices[0].message.content.strip() | |
return description | |