import sys
import os
import re
import json
import base64
from io import BytesIO
from PIL import Image
import argparse
# from vis_python_exe import PythonExecutor
from shared_vis_python_exe import PythonExecutor
from openai import OpenAI
from typing import Optional, Union
import gradio as gr
import markdown

def encode_image(image):
    """
    将PIL.Image对象或图像文件路径转换为base64编码字符串
    
    参数:
        image: 可以是PIL.Image对象或图像文件路径
        
    返回:
        base64编码的字符串
    """
    if isinstance(image, str):
        # 处理文件路径的情况
        with open(image, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    else:
        # 处理PIL.Image对象的情况
        buffered = BytesIO()
        image.save(buffered, format='PNG')
        return base64.b64encode(buffered.getvalue()).decode('utf-8')

def excute_codes(codes, messages, executor: PythonExecutor):
    no_code_idx = []
    codes_use = []
    for i, code in enumerate(codes):
        if code == "":
            no_code_idx.append(i)
        else:
            codes_use.append(code)
    batch_results = executor.batch_apply(codes_use, messages)
    return batch_results, no_code_idx

def process_prompt_init(question, image, prompt_template, prompt_type):
    prompt_prefix = prompt_template[prompt_type]

    image_base64 = encode_image(image)
    question_with_options = question

    messages = [
        {
            "role": "user",
            "content": [{"type": "text", "text": "<image_clue_0>"}] + [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}}] + [{"type": "text", "text": "</image_clue_0>\n\n"}] + [{"type": "text", "text": prompt_prefix.format(query=question_with_options)}]
        }
    ]

    return messages

def update_messages_with_excu_content(messages, images_result, text_result, image_clue_idx):
    new_messages = []
    image_content = []
    for message_item in messages[:-1]:
        new_messages.append(message_item)

    assistant_message_item = messages[-1]['content']
    interpreter_message_text_prefix = [{"type": "text", "text": f"<interpreter>\nText Result:\n{text_result}\nImage Result:\n"}]
    if images_result is not None:
        for image_base64_item in images_result[image_clue_idx-1:]:
            interpreter_message_images = [{"type": "text", "text": f"<image_clue_{image_clue_idx}>"}] + [{"type": "image_url", "image_url": {"url": f"data:image/jpeg;base64,{image_base64_item}"}}] + [{"type": "text", "text": f"</image_clue_{image_clue_idx}>"}]
            image_content += interpreter_message_images
            image_clue_idx += 1
    else:
        image_content = [{"type": "text", "text": "None"}]
    interpreter_message_text_profill = [{"type": "text", "text": "</interpreter>\n"}]

    assistant_message_item = assistant_message_item + interpreter_message_text_prefix + image_content + interpreter_message_text_profill
    new_messages.append({"role": "assistant", "content": assistant_message_item})
    return new_messages, image_clue_idx


def update_messages_with_code(messages, generated_content):
    message_item = {
        "role": "assistant",
        "content": [{"type": "text", "text": f"{generated_content}</code>\n"}]
    }

    messages.append(message_item)
    return messages

def update_messages_with_text(messages, generated_content):
    message_item = {
        "role": "assistant",
        "content": [{"type": "text", "text": f"{generated_content}"}]
    }

    messages.append(message_item)
    return messages

def call_chatgpt_api(messages, client, max_tokens=10000, stop=None, temperature=0.6):
    """Call ChatGPT API with the given messages"""
    try:
        response = client.chat.completions.create(
            model="gpt-4.1",  # 使用支持视觉的模型
            messages=messages,
            max_tokens=max_tokens,
            temperature=temperature,
            top_p=1.0,
            stop=stop
        )
        
        response_text = response.choices[0].message.content
        
        # 检查是否遇到停止标记
        stop_reason = None
        if stop and any(s in response_text for s in stop):
            for s in stop:
                if s in response_text:
                    stop_reason = s
                    break
        else:
            stop_reason = response.choices[0].finish_reason

        if "<code>" in response_text:
            stop_reason = "</code>"
        
        return response_text, stop_reason
    
    except Exception as e:
        print(f"API Error: {str(e)}")
        return None, None

def evaluate_single_data(data, client, executor, prompt_template, prompt_type):

    messages = process_prompt_init(data["question"], data['image'], prompt_template, prompt_type)
    
    # 生成初始响应
    response_text, pred_stop_reason = call_chatgpt_api(
        messages, 
        client,
        max_tokens=10000,
        stop=["</code>"]
    )
    
    # 处理响应
    final_response = response_text
    code_execution_count = 0
    image_clue_idx = 1
    
    while True:
        # 检查是否需要执行代码
        if pred_stop_reason == "</code>":
            # 提取要执行的代码
            messages = update_messages_with_code(messages, response_text)
            code_to_execute = response_text.split("```python")[-1].split("```")[0].strip()
            
            # 执行代码
            exe_result = excute_codes([code_to_execute], messages, executor)[0][0]
            if exe_result is None:
                text_result = "None"
                images_result = None
            else:
                output, report = exe_result
                try:
                    text_result = exe_result[0]['text']
                except:
                    text_result = None
                    print("text result is none.")
                try:
                    images_result = exe_result[0]['images']
                except:
                    images_result = None
                    print("image result is none.")

            messages, new_image_clue_idx = update_messages_with_excu_content(messages, images_result, text_result, image_clue_idx)
            image_clue_idx = new_image_clue_idx
            
            code_execution_count += 1
            print(f"Code Execution Count: {code_execution_count}")
            
            # 生成下一部分响应
            response_text, pred_stop_reason = call_chatgpt_api(
                messages, 
                client,
                max_tokens=10000,
                stop=["</code>"]
            )


        else:
            final_response = response_text
            messages = update_messages_with_text(messages, response_text)
            print("GPT-4.1 finish.")
            break
    
    return messages

def process_message(messages):
    # 创建HTML输出
    html_output = '<div style="color: black;">'  # 添加一个包裹所有内容的div，设置文本颜色为黑色
    
    for message_item in messages:
        role = message_item['role']
        content = message_item['content']
        
        # 根据角色设置样式
        if role == "user" or role == "human":
            html_output += f'<div style="background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>User:</strong><br>'
        elif role == "assistant":
            html_output += f'<div style="background-color: #e6f7ff; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>Assistant:</strong><br>'
        else:
            html_output += f'<div style="background-color: #f9f9f9; padding: 10px; margin: 10px 0; border-radius: 10px; color: black;"><strong>{role.capitalize()}:</strong><br>'
        
        # 处理内容
        for content_item in content:
            content_type = content_item['type']
            
            if content_type == "text":
                # 将Markdown文本转换为HTML
                md_text = content_item['text']
                html_text = markdown.markdown(md_text, extensions=['fenced_code', 'codehilite'])
                # html_text = markdown.markdown(md_text)
                # html_text = md_text
                html_output += f'<div style="color: black;">{html_text}</div>'
            
            elif content_type == "image_url":
                content_value = content_item['image_url']['url']
                # 如果是base64图片
                if content_value.startswith("data:"):
                    html_output += f'<img src="{content_value}" style="max-width: 100%; margin: 10px 0;">'
                else:
                    html_output += f'<img src="{content_value}" style="max-width: 100%; margin: 10px 0;">'
        
        html_output += '</div>'
    
    html_output += '</div>'  # 关闭最外层div
    return html_output

def o3_chat(api_key, base_url, question, image):
    print("done!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
    # 初始化组件
    client = OpenAI(api_key=api_key, base_url=base_url)
    executor = PythonExecutor()
    # executor = SharedRuntimeExecutor(var_whitelist="RETAIN_ALL_VARS")

    prompt_template = json.load(open("./prompt_template_vis.json", "r", encoding="utf-8"))
    prompt_type = 'vistool'

    data = {
        "question": question,
        "image": image,
    }
    
    # 评估单个数据点
    messages = evaluate_single_data(data, client, executor, prompt_template, prompt_type)
    html_output = process_message(messages)
    
    # 将消息转换为JSON字符串，用于下载
    json_str = json.dumps(messages, ensure_ascii=False, indent=4)
    
    return html_output

# Gradio界面
def create_demo():
    with gr.Blocks(title="GPT-4.1 with Python Interpreter", css="div.prose * {color: black !important;}") as demo:
        gr.Markdown("# GPT-4.1 with Python Interpreter")
        gr.Markdown("please do not share to others")
        gr.Markdown("Upload an image and ask a question to get a response with code execution capabilities.")
        
        with gr.Row():
            with gr.Column(scale=1):
                api_key = gr.Textbox(label="OpenAI API Key", type="password", value="sk-kBQuM0gvNBhOHmKz43b3iQut01bsOgg8Pv76eMKguu6jvncm")
                base_url = gr.Textbox(label="Base URL (optional)", value="https://api.claudeshop.top/v1")
                image_input = gr.Image(label="Upload Image", type="pil")
                question = gr.Textbox(label="Question", placeholder="Ask a question about the image...")
                submit_btn = gr.Button("Submit")

        with gr.Row():
            output = gr.HTML(label="Response")
        
        # 处理提交
        submit_btn.click(
            fn=o3_chat,
            inputs=[api_key, base_url, question, image_input],
            outputs=[output]
        )
        
        # 示例部分
        examples = [
            [
                "./examples/1.png",
                "From the information on that advertising board, what is the type of this shop?\nA. The shop is a yoga studio.\nB. The shop is a cafe.\nC. The shop is a seven-eleven.\nD. The shop is a milk tea shop."
            ],
            [
                "./examples/2.png",
                "What is the diagnosis for the abnormality seen in this image?\nA. Pulmonary embolism.\nB. Tuberculosis.\nC. COVID-19 infection.\nD. Influenza."
            ],
            [
                "./examples/3.png",
                "What is the color of the liquid contained in the glass on the table?\nA. The color of the liquid contained in the glass on the table is green.\nB. The color of the liquid contained in the glass on the table is transparent.\nC. The color of the liquid contained in the glass on the table is white.\nD. The color of the liquid contained in the glass on the table is orange."
            ],
            [
                "./examples/4.png",
                "Is the dog on the left or right side of the bicycle?\nA. The dog is on the right side of the bicycle.\nB. The dog is on the left side of the bicycle."
            ],
            [
                "./examples/5.png",
                "Is the kid with black shirt on the left or right side of the kid with blue shirt?\nA. The kid with black shirt is on the left side of the kid with blue shirt.\nB. The kid with black shirt is on the right side of the kid with blue shirt."
            ],
            [
                "./examples/6.png",
                "What can be observed in this image?\nA. Nerve entrapment.\nB. Musculoskeletal abnormality.\nC. Arteriovenous anomaly.\nD. Renal cyst."
            ],
            [
                "./examples/7.png",
                "What is the specific stage of cancer depicted in the image? A)Stage Ib, B)Stage IIIb, C)Stage IIc, D)Stage IIIa"
            ],
            [
                "./examples/8.png",
                "A gymnast jotted down the number of cartwheels she did each day. What is the mode of the numbers?"
            ],
            [
                "./examples/9.png",
                "Does Virginia have the highest value in the USA ?"
            ],
            [
                "./examples/10.png",
                "AB is the diameter of ⊙O, PA is tangent to ⊙O at point A, and PO intersects ⊙O at point C; connect BC, if ∠P = 40.0, then ∠B is equal to ()"
            ],
            [
                "./examples/11.png",
                "How many single-color paths go from C to A?"
            ],
        ]
        
        gr.Examples(
            examples,
            [image_input, question],
            label="Click any example to try it out!"
        )
        
        gr.Markdown("""
        ### Tips
        1. Click the 'log' botton top left to check the output log.
        2. It may take 2~5 min.
        """)
        
    return demo

# 创建并启动应用
if __name__ == "__main__":
    demo = create_demo()
    demo.launch()