import sys
import os
import re
import json
import base64
from io import BytesIO
from PIL import Image
import argparse
from vis_python_exe import PythonExecutor
from openai import OpenAI
from typing import Optional, Union
import gradio as gr
import markdown
def encode_image(image):
"""
将PIL.Image对象或图像文件路径转换为base64编码字符串
参数:
image: 可以是PIL.Image对象或图像文件路径
返回:
base64编码的字符串
"""
if isinstance(image, str):
# 处理文件路径的情况
with open(image, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
else:
# 处理PIL.Image对象的情况
buffered = BytesIO()
image.save(buffered, format=image.format if hasattr(image, 'format') else 'PNG')
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def excute_codes(codes, messages, executor: PythonExecutor):
no_code_idx = []
codes_use = []
for i, code in enumerate(codes):
if code == "":
no_code_idx.append(i)
else:
codes_use.append(code)
batch_results = executor.batch_apply(codes_use, messages)
return batch_results, no_code_idx
def process_prompt_init(question, image, prompt_template, prompt_type):
prompt_prefix = prompt_template[prompt_type]
image_base64 = encode_image(image)
question_with_options = question
messages = [
{
"role": "user",
"content": [{"type": "text", "text": "" in response_text:
stop_reason = "
"
return response_text, stop_reason
except Exception as e:
print(f"API Error: {str(e)}")
return None, None
def evaluate_single_data(data, client, executor, prompt_template, prompt_type):
messages = process_prompt_init(data["question"], data['image'], prompt_template, prompt_type)
# 生成初始响应
response_text, pred_stop_reason = call_chatgpt_api(
messages,
client,
max_tokens=10000,
stop=[""]
)
if response_text is None:
print("Failed to get response from API")
return {
"input": data["question"],
"output": data["answer"],
"prediction": {
"solution": "API Error",
"correctness": False,
"code_execution_count": 0,
}
}
# 处理响应
final_response = response_text
code_execution_count = 0
image_clue_idx = 1
while True:
# 检查是否需要执行代码
if pred_stop_reason == "":
# 提取要执行的代码
messages = update_messages_with_code(messages, response_text)
code_to_execute = response_text.split("```python")[-1].split("```")[0].strip()
# 执行代码
exe_result = excute_codes([code_to_execute], messages, executor)[0][0]
if exe_result is None:
text_result = "None"
images_result = None
else:
output, report = exe_result
try:
text_result = exe_result[0]['text']
except:
text_result = None
try:
images_result = exe_result[0]['images']
except:
images_result = None
messages, new_image_clue_idx = update_messages_with_excu_content(messages, images_result, text_result, image_clue_idx)
image_clue_idx = new_image_clue_idx
code_execution_count += 1
# 生成下一部分响应
response_text, pred_stop_reason = call_chatgpt_api(
messages,
client,
max_tokens=10000,
stop=[""]
)
else:
final_response = response_text
messages = update_messages_with_text(messages, response_text)
break
return messages
def process_message(messages):
# 创建HTML输出
html_output = '