Update app.py
Browse files
app.py
CHANGED
@@ -15,23 +15,40 @@ import markdown
|
|
15 |
|
16 |
def encode_image(image):
|
17 |
"""
|
18 |
-
将PIL.Image对象或图像文件路径转换为base64
|
19 |
|
20 |
参数:
|
21 |
image: 可以是PIL.Image对象或图像文件路径
|
22 |
|
23 |
返回:
|
24 |
-
|
|
|
|
|
|
|
|
|
25 |
"""
|
|
|
|
|
26 |
if isinstance(image, str):
|
27 |
# 处理文件路径的情况
|
|
|
28 |
with open(image, "rb") as image_file:
|
29 |
-
|
30 |
else:
|
31 |
# 处理PIL.Image对象的情况
|
|
|
32 |
buffered = BytesIO()
|
33 |
image.save(buffered, format='PNG')
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
def excute_codes(codes, messages, executor: PythonExecutor):
|
37 |
no_code_idx = []
|
@@ -47,13 +64,16 @@ def excute_codes(codes, messages, executor: PythonExecutor):
|
|
47 |
def process_prompt_init(question, image, prompt_template, prompt_type):
|
48 |
prompt_prefix = prompt_template[prompt_type]
|
49 |
|
50 |
-
|
|
|
|
|
|
|
51 |
question_with_options = question
|
52 |
|
53 |
messages = [
|
54 |
{
|
55 |
"role": "user",
|
56 |
-
"content": [{"type": "text", "text": "<image_clue_0>"}] + [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}] + [{"type": "text", "text": "</image_clue_0>\n\n"}] + [{"type": "text", "text": prompt_prefix.format(query=question_with_options)}]
|
57 |
}
|
58 |
]
|
59 |
|
@@ -250,7 +270,7 @@ def o3_chat(model_name, api_key, base_url, question, image):
|
|
250 |
# executor = SharedRuntimeExecutor(var_whitelist="RETAIN_ALL_VARS")
|
251 |
|
252 |
prompt_template = json.load(open("./prompt_template_vis.json", "r", encoding="utf-8"))
|
253 |
-
prompt_type = '
|
254 |
|
255 |
data = {
|
256 |
"question": question,
|
|
|
15 |
|
16 |
def encode_image(image):
|
17 |
"""
|
18 |
+
将PIL.Image对象或图像文件路径转换为base64编码字符串,并获取分辨率信息
|
19 |
|
20 |
参数:
|
21 |
image: 可以是PIL.Image对象或图像文件路径
|
22 |
|
23 |
返回:
|
24 |
+
包含以下键的字典:
|
25 |
+
- 'base64': base64编码的字符串
|
26 |
+
- 'width': 图片宽度(像素)
|
27 |
+
- 'height': 图片高度(像素)
|
28 |
+
- 'resolution': 字符串形式的"宽度x高度"
|
29 |
"""
|
30 |
+
img_obj = None
|
31 |
+
|
32 |
if isinstance(image, str):
|
33 |
# 处理文件路径的情况
|
34 |
+
img_obj = Image.open(image)
|
35 |
with open(image, "rb") as image_file:
|
36 |
+
base64_str = base64.b64encode(image_file.read()).decode('utf-8')
|
37 |
else:
|
38 |
# 处理PIL.Image对象的情况
|
39 |
+
img_obj = image
|
40 |
buffered = BytesIO()
|
41 |
image.save(buffered, format='PNG')
|
42 |
+
base64_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
|
43 |
+
|
44 |
+
# 获取分辨率信息
|
45 |
+
width, height = img_obj.size
|
46 |
+
|
47 |
+
return {
|
48 |
+
'base64': base64_str,
|
49 |
+
'width': width,
|
50 |
+
'height': height
|
51 |
+
}
|
52 |
|
53 |
def excute_codes(codes, messages, executor: PythonExecutor):
|
54 |
no_code_idx = []
|
|
|
64 |
def process_prompt_init(question, image, prompt_template, prompt_type):
|
65 |
prompt_prefix = prompt_template[prompt_type]
|
66 |
|
67 |
+
img_result = encode_image(image)
|
68 |
+
image_base64 = img_result['base64']
|
69 |
+
width = img_result['width']
|
70 |
+
height = img_result['height']
|
71 |
question_with_options = question
|
72 |
|
73 |
messages = [
|
74 |
{
|
75 |
"role": "user",
|
76 |
+
"content": [{"type": "text", "text": "<image_clue_0>"}] + [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}] + [{"type": "text", "text": "</image_clue_0>\n\n"}] + [{"type": "text", "text": prompt_prefix.format(query=question_with_options, width=str(width), height=str(height))}]
|
77 |
}
|
78 |
]
|
79 |
|
|
|
270 |
# executor = SharedRuntimeExecutor(var_whitelist="RETAIN_ALL_VARS")
|
271 |
|
272 |
prompt_template = json.load(open("./prompt_template_vis.json", "r", encoding="utf-8"))
|
273 |
+
prompt_type = 'vistool_with_img_info'
|
274 |
|
275 |
data = {
|
276 |
"question": question,
|