ScreenCoder / app.py
Jimmyzheng-10's picture
Enhanced HTML cleaning to remove iframeResizer scripts and fix console errors
2ad4034
raw
history blame
12.3 kB
import gradio as gr
import os
import tempfile
import cv2
import numpy as np
import urllib.parse
from screencoder.main import generate_html_for_demo
from PIL import Image
import shutil
import html
import base64
from bs4 import BeautifulSoup
from pathlib import Path
# Predefined examples
examples_data = [
[
"screencoder/data/input/test1.png",
"",
"",
"",
"",
"screencoder/data/input/test1.png"
],
[
"screencoder/data/input/test2.png",
"",
"",
"",
"",
"screencoder/data/input/test2.png"
],
[
"screencoder/data/input/test3.png",
"",
"",
"",
"",
"screencoder/data/input/test3.png"
],
]
TAILWIND_SCRIPT = "<script src='https://cdn.jsdelivr.net/npm/@tailwindcss/browser@4'></script>"
def image_to_data_url(image_path):
"""Convert an image file to a data URL for embedding in HTML."""
try:
with open(image_path, 'rb') as img_file:
img_data = img_file.read()
# Detect image type from file extension
ext = os.path.splitext(image_path)[1].lower()
mime_type = {
'.png': 'image/png',
'.jpg': 'image/jpeg',
'.jpeg': 'image/jpeg',
'.gif': 'image/gif',
'.webp': 'image/webp'
}.get(ext, 'image/png')
encoded = base64.b64encode(img_data).decode('utf-8')
return f'data:{mime_type};base64,{encoded}'
except Exception as e:
print(f"Error converting image to data URL: {e}")
return None
def render_preview(code: str, width: int, height: int, scale: float) -> str:
"""
Preview renderer with both width and height control for the inner canvas.
"""
# Clean up the HTML code to remove problematic script and CSS references
soup = BeautifulSoup(code, 'html.parser')
# Remove any script tags that reference local assets or iframeResizer
for script in soup.find_all('script'):
src = script.get('src', '')
if src and ('assets/' in src or 'index-' in src or 'iframeResizer' in src):
script.decompose()
# Remove any link tags that reference local CSS assets
for link in soup.find_all('link'):
href = link.get('href', '')
if href and ('assets/' in href or 'index-' in href):
link.decompose()
# Also remove any inline scripts that might contain problematic content
for script in soup.find_all('script'):
if script.string and ('assets/' in script.string or 'index-' in script.string):
script.decompose()
# Get the cleaned HTML
cleaned_code = str(soup)
safe_code = html.escape(cleaned_code).replace("'", "&apos;")
iframe_html = f"""
<div style="width: 100%; max-width: 1920px; margin: 0 auto; overflow-x: auto; overflow-y: hidden;">
<div style="
width: 1920px;
height: 1000px;
margin: 0 auto;
display: flex;
justify-content: center;
align-items: center;
border: 1px solid #ddd;
overflow: hidden;
background: #f9fafb;
position: relative;
box-shadow: 0 4px 12px rgba(0,0,0,0.1);">
<div style="
width: {width}px;
height: {height}px;
transform: scale({scale});
transform-origin: top left;
border: none;
position: relative;">
<iframe
style="width: 100%; height: 100%; border: none; display: block;"
srcdoc='{safe_code}'>
</iframe>
</div>
</div>
</div>
"""
return iframe_html
def process_and_generate(image_np, image_path_from_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt):
"""
Main processing pipeline: takes an image, generates code, creates a downloadable
package, and returns the initial preview and code outputs.
"""
final_image_path = ""
is_temp_file = False
if image_path_from_state:
final_image_path = image_path_from_state
elif image_np is not None:
is_temp_file = True
with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as tmp:
cv2.imwrite(tmp.name, cv2.cvtColor(image_np, cv2.COLOR_RGB2BGR))
final_image_path = tmp.name
else:
return "No image provided.", "Please upload or select an image.", gr.update(visible=False), None
instructions = {
"sidebar": sidebar_prompt, "header": header_prompt,
"navigation": navigation_prompt, "main content": main_content_prompt
}
html_content, run_id = generate_html_for_demo(final_image_path, instructions)
if not run_id: # Handle potential errors from the generator
return "Generation failed.", f"Error: {html_content}", gr.update(visible=False), None
# Rewrite image paths to be absolute for Gradio serving
# HF Spaces: Use Path objects for robust path handling
base_dir = Path(__file__).parent.resolve()
soup = BeautifulSoup(html_content, 'html.parser')
print(f"Processing HTML for run_id: {run_id}")
# Clean up problematic script and CSS references
for script in soup.find_all('script'):
src = script.get('src', '')
if src and ('assets/' in src or 'index-' in src or 'iframeResizer' in src):
print(f"Removing problematic script: {src}")
script.decompose()
for link in soup.find_all('link'):
href = link.get('href', '')
if href and ('assets/' in href or 'index-' in href):
print(f"Removing problematic CSS link: {href}")
link.decompose()
# Also remove any inline scripts that might contain problematic content
for script in soup.find_all('script'):
if script.string and ('assets/' in script.string or 'index-' in script.string):
print(f"Removing problematic inline script")
script.decompose()
for img in soup.find_all('img'):
if img.get('src') and not img['src'].startswith(('http', 'data:')):
original_src = img['src']
print(f"Processing image: {original_src}")
# In HF Spaces, paths can be tricky. We'll rely on the fact
# that the image replacer creates a predictable structure.
img_path = base_dir / 'screencoder' / 'data' / 'output' / run_id / original_src
if img_path.exists():
print(f"Found image at: {img_path}")
# Convert to base64 data URL for better iframe compatibility
data_url = image_to_data_url(str(img_path))
if data_url:
print(f"Converted to data URL: {original_src}")
img['src'] = data_url
else:
# Fallback to Gradio file path (might not work in all Spaces configs)
img['src'] = f'/file={str(img_path)}'
else:
print(f"Image not found at expected path: {img_path}")
# Keep original path as fallback
img['src'] = original_src
html_content = str(soup)
output_dir = base_dir / 'screencoder' / 'data' / 'output' / run_id
packages_dir = base_dir / 'screencoder' / 'data' / 'packages'
packages_dir.mkdir(exist_ok=True)
shutil.make_archive(str(packages_dir / run_id), 'zip', str(output_dir))
package_path = packages_dir / f'{run_id}.zip'
package_url = f'/file={str(package_path)}'
if is_temp_file:
os.unlink(final_image_path)
initial_preview = render_preview(html_content, 1280, 600, 0.7)
return initial_preview, html_content, gr.update(value=package_url, visible=True)
with gr.Blocks(head=TAILWIND_SCRIPT, theme=gr.themes.Soft(primary_hue="blue", secondary_hue="sky")) as demo:
gr.Markdown("# ScreenCoder: Screenshot to Code")
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### Step 1: Provide an Image")
active_image = gr.Image(type="numpy", height=400, value=examples_data[0][0])
upload_button = gr.UploadButton("Click to Upload", file_types=["image"], variant="primary")
gr.Markdown("### Step 2: Write Prompts (Optional)")
with gr.Accordion("Component-specific Prompts", open=False):
sidebar_prompt = gr.Textbox(label="Sidebar", placeholder="Instructions for the sidebar...")
header_prompt = gr.Textbox(label="Header", placeholder="Instructions for the header...")
navigation_prompt = gr.Textbox(label="Navigation", placeholder="Instructions for the navigation...")
main_content_prompt = gr.Textbox(label="Main Content", placeholder="Instructions for the main content...")
generate_btn = gr.Button("Generate HTML", variant="primary")
with gr.Column(scale=2):
with gr.Tabs():
with gr.TabItem("Preview"):
with gr.Row():
scale_slider = gr.Slider(0.2, 1.5, value=0.7, step=0.05, label="Zoom")
width_slider = gr.Slider(400, 1920, value=1280, step=100, label="Canvas Width (px)")
height_slider = gr.Slider(300, 1080, value=600, step=50, label="Canvas Height (px)")
html_preview = gr.HTML(label="Rendered HTML", show_label=False)
with gr.TabItem("Code"):
html_code_output = gr.Code(label="Generated HTML", language="html")
download_button = gr.Button("⬇️ Download Package", visible=False, variant="secondary")
gr.Examples(
examples=examples_data,
fn=lambda *args: args[0],
inputs=[gr.State(examples_data[0][0])],
outputs=[active_image],
cache_examples=False,
)
active_image_path_state = gr.State(examples_data[0][5])
def handle_example_click(img_path):
return img_path, img_path
demo.load(
lambda: (examples_data[0][0], examples_data[0][5]), None, [active_image, active_image_path_state]
)
def handle_upload(uploaded_image_np):
return uploaded_image_np, None, gr.update(visible=False)
upload_button.upload(handle_upload, upload_button, [active_image, active_image_path_state, download_button])
generate_btn.click(
process_and_generate,
[active_image, active_image_path_state, sidebar_prompt, header_prompt, navigation_prompt, main_content_prompt],
[html_preview, html_code_output, download_button],
show_progress="full"
)
preview_controls = [scale_slider, width_slider, height_slider]
for control in preview_controls:
control.change(
render_preview,
[html_code_output, width_slider, height_slider, scale_slider],
html_preview,
show_progress=True
)
download_button.click(None, download_button, None, js= \
"(url) => { const link = document.createElement('a'); link.href = url; link.download = ''; document.body.appendChild(link); link.click(); document.body.removeChild(link); }")
base_dir = Path(__file__).parent.resolve()
allowed_paths = [
str(base_dir),
str(base_dir / 'screencoder' / 'data' / 'output'),
str(base_dir / 'screencoder' / 'data' / 'packages')
]
# Add all example file paths to allowed_paths to ensure they are accessible
for example in examples_data:
# HF Spaces: Ensure the path is absolute by joining with base_dir
# The example path is relative, so we join it with the base_dir to make it absolute.
example_abs_path = (base_dir / example[0]).resolve()
example_dir = example_abs_path.parent
if str(example_dir) not in allowed_paths:
allowed_paths.append(str(example_dir))
print("Allowed paths for file serving:")
for path in allowed_paths:
print(f" - {path}")
if __name__ == "__main__":
demo.launch(
allowed_paths=allowed_paths,
show_error=True,
quiet=False,
favicon_path=None,
prevent_thread_lock=True
)