Spaces:

Jimmyzheng-10
/

ScreenCoder

Running

File size: 14,606 Bytes

a383d0e

from utils import encode_image, Doubao, Qwen_2_5_VL
from PIL import Image
import bs4
from threading import Thread
import time

# user instruction for each component
# user_instruction = {
#     "sidebar": "将所有的图标改为更加美观的样式；请你填充相应的适量有关的英文文字信息在其中；请你将布局美化",
#     "header": "将Google的logo更改美观；将头像颜色更改美观",
#     "navigation": "请你将布局美化",
#     "main content": "保持基本的布局不变，请你在对应的位置填充适量的关于elon trump的英文文本信息；将图像块的布局重新美化。"
# }

# Prompt for each component
PROMPT_DICT = {
    "sidebar": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the layout, icon style, size, and text information of all blocks need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
    <div>
    your code here
    </div>,
    only return the code within the <div> and </div> tags""",

    "header": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
    <div>
    your code here
    </div>,
    only return the code within the <div> and </div> tags""",

    "navigation": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that the relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. Please use the same icons as in the original screenshot. The following is the code for filling in:
    <div>
    your code here
    </div>,
    only return the code within the <div> and </div> tags""",

    "main content": f"""This is a screenshot of a container. Please fill in a complete HTML and tail-wind CSS code to accurately reproduce the given container. Please note that all images displayed in the screenshot must be replaced with pure gray-400 image blocks of the same size as the corresponding images in the original screenshot, and the text information in the images does not need to be recognized. The relative position, layout, text information, and color of all blocks in the boundary box need to be basically consistent with the original screenshot based on the user's additional conditions. The following is the code for filling in:
    <div>
    your code here
    </div>,
    only return the code within the <div> and </div> tags""",
}

# PROMPT_sidebar = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的排版、图标样式、大小、文字信息需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：

#     <div>
#     your code here
#     </div>

#     只需返回<div>和</div>标签内的代码"""

# PROMPT_header = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：

#     <div>
#     your code here
#     </div>

#     只需返回<div>和</div>标签内的代码"""

# PROMPT_navigation = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。请注意所有组块的在boundary box中的相对位置、文字排版、颜色需要在用户额外条件的基础上与原始截图基本保持一致。请你直接使用原始截图中一致的图标。以下是供填写的代码：

#     <div>
#     your code here
#     </div>

#     只需返回<div>和</div>标签内的代码"""

# PROMPT_main_content = f"""这是一个container的截图。请填写一段完整的HTML和tail-wind CSS代码以准确再现给定的容器。截图中显示的图像务必全部用与原始截图中对应图像同样大小的纯灰色图像块替换，不需要识别图像中的文字信息。请注意所有组块在boundary box中的相对位置、排版、文字信息、颜色需要在用户额外条件的基础上与原始截图基本保持一致。以下是供填写的代码：

#     <div>
#     your code here
#     </div>

#     只需返回<div>和</div>标签内的代码"""

# Generate code for each component
def generate_code(bbox_tree, img_path, bot):
    """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
    img = Image.open(img_path)
    code_dict = {}
    
    def _generate_code(node):
        if node["children"] == []:
            bbox = node["bbox"]
            # bbox is already in pixel coordinates [x1, y1, x2, y2]
            cropped_img = img.crop(bbox)
            
            # Select prompt based on node type
            if "type" in node:
                if node["type"] == "sidebar":
                    prompt = PROMPT_DICT["sidebar"]
                elif node["type"] == "header":
                    prompt = PROMPT_DICT["header"]
                elif node["type"] == "navigation":
                    prompt = PROMPT_DICT["navigation"]
                elif node["type"] == "main content":
                    prompt = PROMPT_DICT["main content"]
                else:
                    print(f"Unknown component type: {node['type']}")
                    return
            else:
                print("Node type not found")
                return
                
            try:
                code = bot.ask(prompt, encode_image(cropped_img))
                code_dict[node["id"]] = code
            except Exception as e:
                print(f"Error generating code for {node.get('type', 'unknown')}: {str(e)}")
                code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
        else:
            for child in node["children"]:
                _generate_code(child)

    _generate_code(bbox_tree)
    return code_dict

# Generate code for each component in parallel
# def generate_code_parallel(bbox_tree, img_path, prompt, bot):
    """generate code for all the leaf nodes in the bounding box tree, return a dictionary: {'id': 'code'}"""
    code_dict = {}
    t_list = []
    
    def _generate_code_with_retry(node, max_retries=3, retry_delay=2):
        """Generate code with retry mechanism for rate limit errors"""
        try:
            # Create a new image instance for each thread
            with Image.open(img_path) as img:
                bbox = node["bbox"]
                cropped_img = img.crop(bbox)
                
                for attempt in range(max_retries):
                    try:
                        code = bot.ask(prompt, encode_image(cropped_img))
                        code_dict[node["id"]] = code
                        return
                    except Exception as e:
                        if "rate_limit" in str(e).lower() and attempt < max_retries - 1:
                            print(f"Rate limit hit, retrying in {retry_delay} seconds... (Attempt {attempt + 1}/{max_retries})")
                            time.sleep(retry_delay)
                            retry_delay *= 2  # Exponential backoff
                        else:
                            print(f"Error generating code for node {node['id']}: {str(e)}")
                            code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"
                            return
        except Exception as e:
            print(f"Error processing image for node {node['id']}: {str(e)}")
            code_dict[node["id"]] = f"<!-- Error: {str(e)} -->"

    def _generate_code(node):
        if node["children"] == []:
            t = Thread(target=lambda: _generate_code_with_retry(node))
            t.start()
            t_list.append(t)
        else:
            for child in node["children"]:
                _generate_code(child)

    _generate_code(bbox_tree)
    
    # Wait for all threads to complete
    for t in t_list:
        t.join()
        
    return code_dict

# Generate HTML from the bounding box tree
def generate_html(bbox_tree, output_file="output.html", img_path="data/test1.png"):
    """
    Generates an HTML file with nested containers based on the bounding box tree.

    :param bbox_tree: Dictionary representing the bounding box tree.
    :param output_file: The name of the output HTML file.
    """
    # HTML and CSS templates
    # the container class is used to create grid and position the boxes
    # include the tailwind css in the head tag
    html_template_start = """
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Bounding Boxes Layout</title>
        <style>
            body, html {
                margin: 0;
                padding: 0;
                width: 100%;
                height: 100%;
            }
            .container { 
                position: relative;
                width: 100%;
                height: 100%;
                box-sizing: border-box;
            }
            .box {
                position: absolute;
                box-sizing: border-box;
                overflow: hidden;
            }
            .box > .container {
                display: grid;
                width: 100%;
                height: 100%;
            }
        </style>
        <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/tailwind.min.css" rel="stylesheet">
    </head>
    <body>
        <div class="container">
    """

    html_template_end = """
        </div>
    </body>
    </html>
    """

    # Function to recursively generate HTML
    def process_bbox(node, parent_width, parent_height, parent_left, parent_top, img):
        bbox = node['bbox']
        children = node.get('children', [])
        id = node['id']

        # Calculate relative positions and sizes
        left = (bbox[0] - parent_left) / parent_width * 100
        top = (bbox[1] - parent_top) / parent_height * 100
        width = (bbox[2] - bbox[0]) / parent_width * 100
        height = (bbox[3] - bbox[1]) / parent_height * 100

        # Start the box div
        html = f'''
            <div id="{id}" class="box" style="left: {left}%; top: {top}%; width: {width}%; height: {height}%;">
        '''

        if children:
            # If there are children, add a nested container
            html += '''
                <div class="container">
            '''
            # Get the current box's width and height in pixels for child calculations
            current_width = bbox[2] - bbox[0]
            current_height = bbox[3] - bbox[1]
            for child in children:
                html += process_bbox(child, current_width, current_height, bbox[0], bbox[1], img)
            html += '''
                </div>
            '''
        
        # Close the box div
        html += '''
            </div>
        '''
        return html

    root_bbox = bbox_tree['bbox']
    root_children = bbox_tree.get('children', [])
    root_width = root_bbox[2]
    root_height = root_bbox[3]
    root_x = root_bbox[0]
    root_y = root_bbox[1]

    html_content = html_template_start
    for child in root_children:
        html_content += process_bbox(child, root_width, root_height, root_x, root_y, img)
    html_content += html_template_end

    soup = bs4.BeautifulSoup(html_content, 'html.parser')
    html_content = soup.prettify()

    with open(output_file, 'w') as f:
        f.write(html_content)

# Substitute the code in the html file
def code_substitution(html_file, code_dict):
    """substitute the code in the html file"""
    with open(html_file, "r") as f:
        html = f.read()
    soup = bs4.BeautifulSoup(html, 'html.parser')
    for id, code in code_dict.items():
        code = code.replace("```html", "").replace("```", "")
        div = soup.find(id=id)
        # replace the inner html of the div
        if div:
            div.append(bs4.BeautifulSoup(code, 'html.parser'))
    with open(html_file, "w") as f:
        f.write(soup.prettify())

# Main
if __name__ == "__main__":
    import json
    import time
    from PIL import Image
    
    # Load bboxes from block_parsing.py output
    boxes_data = json.load(open("data/tmp/test1_bboxes.json"))

    img_path = "data/input/test1.png"
    with Image.open(img_path) as img:
        width, height = img.size
    
    # Create root node with actual image dimensions
    root = {
        "bbox": [0, 0, width, height],  # Use actual image dimensions
        "children": []
    }
    
    # Map region IDs to component types
    region_type_mapping = {
        "1": "sidebar",
        "2": "header", 
        "3": "navigation",
        "4": "main content"
    }
    
    # Add each region as a child with its type
    for region in boxes_data.get("regions", []):
        # Convert normalized coordinates to pixel coordinates
        x = region["x"] * width
        y = region["y"] * height
        w = region["w"] * width
        h = region["h"] * height
        
        child = {
            "bbox": [x, y, x + w, y + h],  # Convert to [x1, y1, x2, y2] format
            "children": [],
            "type": region_type_mapping.get(region["id"], "unknown")
        }
        root["children"].append(child)
    
    # Assign IDs to all nodes
    def assign_id(node, id):
        node["id"] = id
        for child in node.get("children", []):
            id = assign_id(child, id+1)
        return id
    
    assign_id(root, 0)
    
    # print(root)
    # Generate initial HTML layout
    generate_html(root, 'data/output/test1_layout.html')

    # Initialize the bot
    bot = Doubao("doubao_api.txt", model = "doubao-1.5-thinking-vision-pro-250428")
    # bot = Qwen_2_5_VL("qwen_api.txt", model="qwen2.5-vl-72b-instruct")

    # Generate code for each component
    code_dict = generate_code(root, img_path, bot)
    
    # Substitute the generated code into the HTML
    code_substitution('data/output/test1_layout.html', code_dict)