ScreenCoder / app.py
Jimmyzheng-10's picture
Add title and github link
8c0246a
raw
history blame
19.8 kB
import gradio as gr
import os
import tempfile
import cv2
import numpy as np
import urllib.parse
from screencoder.main import generate_html_for_demo
from PIL import Image
import shutil
import html
import base64
from bs4 import BeautifulSoup
from pathlib import Path
# Predefined examples
examples_data = [
[
"screencoder/data/input/test1.png",
"",
"",
"",
"",
"screencoder/data/input/test1.png"
],
[
"screencoder/data/input/test3.png",
"",
"",
"",
"",
"screencoder/data/input/test3.png"
],
[
"screencoder/data/input/draft.png",
"Add more text about 'Trump-Musk Fued' in the whole area.",
"Beautify the logo 'Google'.",
"",
"Add text content about 'Trump and Musk' in 'Top Stories' and 'Wikipedia'. Add boundary box for each part.",
"screencoder/data/input/draft.png"
],
]
example_rows = [row[:5] for row in examples_data]
# TAILWIND_SCRIPT = "<script src='https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4'></script>"
def image_to_data_url(image_path):
"""Convert an image file to a data URL for embedding in HTML."""
try:
with open(image_path, 'rb') as img_file:
img_data = img_file.read()
# Detect image type from file extension
ext = os.path.splitext(image_path)[1].lower()
mime_type = {
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.webp': 'image/webp'
}.get(ext, 'image/png')
encoded = base64.b64encode(img_data).decode('utf-8')
return f'data:{mime_type};base64,{encoded}'
except Exception as e:
print(f"Error converting image to data URL: {e}")
return None
def patch_css_js_paths(soup: BeautifulSoup, output_dir: Path):
"""
Fix CSS and JS paths in the HTML to work with Gradio's file serving.
Converts relative paths to /file= paths or removes them if files don't exist.
"""
try:
# CSS
for link in soup.find_all("link", rel=lambda x: x and "stylesheet" in x):
href = link.get("href", "")
if href.startswith(("http", "data:")):
continue
f = output_dir / href.lstrip("./")
if f.exists():
link["href"] = f"/file={f}"
print(f"Fixed CSS path: {href} -> /file={f}")
else:
print(f"Removing non-existent CSS: {href}")
link.decompose()
# JS
for script in soup.find_all("script", src=True):
src = script["src"]
if src.startswith(("http", "data:")):
continue
f = output_dir / src.lstrip("./")
if f.exists():
script["src"] = f"/file={f}"
print(f"Fixed JS path: {src} -> /file={f}")
else:
print(f"Removing non-existent JS: {src}")
script.decompose()
except Exception as e:
print(f"Error in patch_css_js_paths: {e}")
return soup
def render_preview(code: str, width: int, height: int, scale: float) -> str:
"""
Preview renderer with both width and height control for the inner canvas.
"""
try:
soup = BeautifulSoup(code, 'html.parser')
for script in soup.find_all('script'):
src = script.get('src', '')
if src and any(pattern in src for pattern in ['assets/', 'index-', 'iframeResizer']):
script.decompose()
for link in soup.find_all('link'):
href = link.get('href', '')
if href and any(pattern in href for pattern in ['assets/', 'index-']):
link.decompose()
cleaned_code = str(soup)
except Exception as e:
print(f"Error cleaning HTML in render_preview: {e}")
# Fallback to original code if cleaning fails
cleaned_code = code
safe_code = html.escape(cleaned_code).replace("'", "&apos;")
iframe_html = f"""
<div style="width: 100%; max-width: 1920px; margin: 0 auto; overflow-x: auto; overflow-y: hidden;">
<div style="
width: 1920px;
height: 1000px;
margin: 0 auto;
display: flex;
justify-content: center;
align-items: center;
border: 1px solid #ddd;
overflow: hidden;
background: #f9fafb;
position: relative;
box-shadow: 0 4px 12px rgba(0,0,0,0.1);">
<div style="
width: {width}px;
height: {height}px;
transform: scale({scale});
transform-origin: left center;
border: none;
position: relative;">
<iframe
style="width: 100%; height: 100%; border: none; display: block;"
srcdoc='{safe_code}'>
</iframe>
</div>
</div>
</div>
"""
return iframe_html
def process_and_generate(image_input, image_path_from_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt):
"""
Main processing pipeline: takes an image (path or numpy), generates code, creates a downloadable
package, and returns the initial preview and code outputs for both layout and final versions.
"""
final_image_path = ""
is_temp_file = False
# Handle image_input which can be a numpy array (from upload) or a string (from example)
if isinstance(image_input, str) and os.path.exists(image_input):
final_image_path = image_input
elif image_input is not None: # Assumes numpy array
is_temp_file = True
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
# Gradio Image component provides RGB numpy array
cv2.imwrite(tmp.name, cv2.cvtColor(image_input, cv2.COLOR_RGB2BGR))
final_image_path = tmp.name
elif image_path_from_state:
final_image_path = image_path_from_state
else:
# Return empty values for all outputs
return "No image provided.", "", "", "Please upload or select an image.", gr.update(visible=False), None
instructions = {
"sidebar": sidebar_prompt, "header": header_prompt,
"navigation": navigation_prompt, "main content": main_content_prompt
}
layout_html, final_html, run_id = generate_html_for_demo(final_image_path, instructions)
if not run_id: # Handle potential errors from the generator
error_message = f"Generation failed. Error: {layout_html}"
return error_message, "", "", error_message, gr.update(visible=False), None
# --- Helper function to process HTML content ---
def process_html(html_content, run_id):
if not html_content:
return "", "" # Return empty strings if content is missing
base_dir = Path(__file__).parent.resolve()
soup = BeautifulSoup(html_content, 'html.parser')
# Fix CSS and JS paths
try:
output_dir = base_dir / 'screencoder' / 'data' / 'output' / run_id
soup = patch_css_js_paths(soup, output_dir)
except Exception as e:
print(f"Error fixing CSS/JS paths: {e}")
# Convert image paths to data URLs
for img in soup.find_all('img'):
if img.get('src') and not img['src'].startswith(('http', 'data:')):
original_src = img['src']
img_path = base_dir / 'screencoder' / 'data' / 'output' / run_id / original_src
if img_path.exists():
data_url = image_to_data_url(str(img_path))
if data_url:
img['src'] = data_url
else:
img['src'] = f'/file={str(img_path)}'
else:
img['src'] = original_src # Keep original if not found
processed_html = str(soup)
preview = render_preview(processed_html, 1920, 1080, 0.55)
return preview, processed_html
# --- Process both HTML versions ---
layout_preview, layout_code = process_html(layout_html, run_id)
final_preview, final_code = process_html(final_html, run_id)
# --- Package the output ---
base_dir = Path(__file__).parent.resolve()
output_dir = base_dir / 'screencoder' / 'data' / 'output' / run_id
packages_dir = base_dir / 'screencoder' / 'data' / 'packages'
packages_dir.mkdir(exist_ok=True)
package_path = packages_dir / f'{run_id}.zip'
shutil.make_archive(str(packages_dir / run_id), 'zip', str(output_dir))
package_url = f'/file={str(package_path)}'
if is_temp_file:
os.unlink(final_image_path)
# Return all the outputs, including for the state objects
return layout_preview, final_preview, final_code, layout_code, final_code, gr.update(value=package_url, visible=True)
with gr.Blocks(css="""
@import url('https://fonts.googleapis.com/css2?family=Poppins:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500;600&display=swap');
* {
font-family: 'Poppins', -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Helvetica Neue', Arial, sans-serif !important;
font-feature-settings: 'liga' 1, 'calt' 1 !important;
text-rendering: optimizeLegibility !important;
-webkit-font-smoothing: antialiased !important;
-moz-osx-font-smoothing: grayscale !important;
}
h1, h2, h3, h4, h5, h6 {
font-weight: 600 !important;
color: #1f2937 !important;
letter-spacing: -0.02em !important;
line-height: 1.2 !important;
}
h1 {
font-size: 2.5rem !important;
font-weight: 700 !important;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
background-clip: text;
margin-bottom: 1.5rem !important;
letter-spacing: -0.03em !important;
}
h2 {
font-size: 1.75rem !important;
font-weight: 600 !important;
color: #374151 !important;
margin-bottom: 1rem !important;
letter-spacing: -0.01em !important;
}
.gr-button {
font-weight: 500 !important;
font-family: 'Poppins', sans-serif !important;
border-radius: 10px !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
letter-spacing: 0.01em !important;
}
.gr-button:hover {
transform: translateY(-2px) !important;
box-shadow: 0 8px 25px rgba(0, 0, 0, 0.15) !important;
}
.gr-textbox, .gr-slider {
border-radius: 10px !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-textbox input, .gr-textbox textarea {
font-family: 'Poppins', sans-serif !important;
font-size: 14px !important;
font-weight: 400 !important;
letter-spacing: 0.01em !important;
line-height: 1.5 !important;
}
.gr-slider {
font-family: 'Poppins', sans-serif !important;
font-weight: 500 !important;
}
.gr-tabs {
border-radius: 12px !important;
overflow: hidden !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-tab-nav {
background: linear-gradient(135deg, #f8fafc 0%, #e2e8f0 100%) !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-tab-nav button {
font-weight: 500 !important;
font-family: 'Poppins', sans-serif !important;
color: #64748b !important;
transition: all 0.3s cubic-bezier(0.4, 0, 0.2, 1) !important;
letter-spacing: 0.01em !important;
}
.gr-tab-nav button.selected {
color: #3b82f6 !important;
background: white !important;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1) !important;
font-weight: 600 !important;
}
.gr-accordion {
border-radius: 12px !important;
border: 1px solid #e5e7eb !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-accordion-header {
font-weight: 500 !important;
font-family: 'Poppins', sans-serif !important;
color: #374151 !important;
letter-spacing: 0.01em !important;
}
.gr-markdown {
font-family: 'Poppins', sans-serif !important;
line-height: 1.7 !important;
font-weight: 400 !important;
letter-spacing: 0.01em !important;
}
.gr-markdown strong {
color: #059669 !important;
font-weight: 600 !important;
}
.gr-examples {
border-radius: 12px !important;
border: 1px solid #e5e7eb !important;
background: #f9fafb !important;
font-family: 'Poppins', sans-serif !important;
}
.gr-examples-header {
font-weight: 600 !important;
font-family: 'Poppins', sans-serif !important;
color: #374151 !important;
letter-spacing: 0.01em !important;
}
.gr-code {
font-family: 'JetBrains Mono', 'Fira Code', 'Consolas', 'Monaco', monospace !important;
font-size: 13px !important;
line-height: 1.6 !important;
letter-spacing: 0.01em !important;
}
.gr-label {
font-family: 'Poppins', sans-serif !important;
font-weight: 500 !important;
color: #374151 !important;
letter-spacing: 0.01em !important;
}
.gr-dropdown {
font-family: 'Poppins', sans-serif !important;
font-weight: 400 !important;
}
.gr-checkbox {
font-family: 'Poppins', sans-serif !important;
font-weight: 400 !important;
}
""") as demo:
gr.Markdown("# ScreenCoder: Advancing Visual-to-Code Generation for Front-End Automation via Modular Multimodal Agents")
gr.Markdown("## [Github](https://github.com/leigest519/ScreenCoder/tree/main)")
gr.Markdown("**Tips**: Use the sliders to adjust preview size and zoom level. Swipe to change viewing angle. Click download button to get the package.")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("## Step 1: Provide an Image")
active_image = gr.Image(type="filepath", height=400)
upload_button = gr.UploadButton("Click to Upload", file_types=["image"], variant="primary")
gr.Markdown("## Step 2: Write Prompts (Optional)")
with gr.Accordion("Component-specific Prompts", open=False):
sidebar_prompt = gr.Textbox(label="Sidebar", placeholder="Instructions for the sidebar...")
header_prompt = gr.Textbox(label="Header", placeholder="Instructions for the header...")
navigation_prompt = gr.Textbox(label="Navigation", placeholder="Instructions for the navigation...")
main_content_prompt = gr.Textbox(label="Main Content", placeholder="Instructions for the main content...")
generate_btn = gr.Button("Generate HTML", variant="primary")
with gr.Column(scale=2):
gr.Markdown("## Preview Area")
with gr.Tabs():
with gr.TabItem("Preview With Placeholder"):
with gr.Row():
scale_slider = gr.Slider(0.2, 1.5, value=0.55, step=0.05, label="Zoom")
width_slider = gr.Slider(400, 2000, value=1920, step=50, label="Canvas Width (px)")
height_slider = gr.Slider(300, 1200, value=1080, step=50, label="Canvas Height (px)")
html_preview = gr.HTML(label="Rendered HTML", show_label=False)
with gr.TabItem("Preview"):
with gr.Row():
scale_slider_with_placeholder = gr.Slider(0.2, 1.5, value=0.55, step=0.05, label="Zoom")
width_slider_with_placeholder = gr.Slider(400, 2000, value=1920, step=100, label="Canvas Width (px)")
height_slider_with_placeholder = gr.Slider(300, 1200, value=1080, step=50, label="Canvas Height (px)")
html_preview_with_placeholder = gr.HTML(label="Rendered HTML", show_label=False)
with gr.TabItem("Code"):
html_code_output = gr.Code(label="Generated HTML", language="html")
download_button = gr.Button("Download Package", visible=False, variant="secondary")
gr.Examples(
examples=example_rows,
inputs=[active_image, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
cache_examples=False,
label="Examples"
)
# State to hold the HTML content for each preview tab
layout_code_state = gr.State("")
final_code_state = gr.State("")
active_image_path_state = gr.State()
active_image.change(
lambda p: p if isinstance(p, str) else None,
inputs=active_image,
outputs=active_image_path_state,
show_progress=False
)
demo.load(
lambda: (examples_data[0][0], examples_data[0][0]), None, [active_image, active_image_path_state]
)
def handle_upload(uploaded_image_np):
# When a new image is uploaded, it's numpy. Clear the path state.
return uploaded_image_np, None, gr.update(visible=False)
upload_button.upload(handle_upload, upload_button, [active_image, active_image_path_state, download_button])
generate_btn.click(
process_and_generate,
[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
[html_preview, html_preview_with_placeholder, html_code_output, layout_code_state, final_code_state, download_button],
show_progress="full"
)
preview_controls = [scale_slider, width_slider, height_slider]
for control in preview_controls:
control.change(
render_preview,
[layout_code_state, width_slider, height_slider, scale_slider],
html_preview,
show_progress=True
)
preview_controls_with_placeholder = [scale_slider_with_placeholder, width_slider_with_placeholder, height_slider_with_placeholder]
for control in preview_controls_with_placeholder:
control.change(
render_preview,
[final_code_state, width_slider_with_placeholder, height_slider_with_placeholder, scale_slider_with_placeholder],
html_preview_with_placeholder,
show_progress=True
)
download_button.click(None, download_button, None, js= \
"(url) => { const link = document.createElement('a'); link.href = url; link.download = ''; document.body.appendChild(link); link.click(); document.body.removeChild(link); }")
base_dir = Path(__file__).parent.resolve()
allowed_paths = [
str(base_dir),
str(base_dir / 'screencoder' / 'data' / 'output'),
str(base_dir / 'screencoder' / 'data' / 'packages')
]
for example in examples_data:
example_abs_path = (base_dir / example[0]).resolve()
example_dir = example_abs_path.parent
if str(example_dir) not in allowed_paths:
allowed_paths.append(str(example_dir))
print("Allowed paths for file serving:")
for path in allowed_paths:
print(f" - {path}")
if __name__ == "__main__":
demo.launch(
allowed_paths=allowed_paths,
server_name="0.0.0.0",
server_port=7860,
share=False
)