Spaces:

Jimmyzheng-10
/

ScreenCoder

Running

App Files Files Community

ScreenCoder / screencoder /image_replacer.py

Jimmyzheng-10

Add app.py and the screencoder repo

a383d0e 23 days ago

raw

history blame

6.17 kB

	import argparse
	import json
	from pathlib import Path
	from bs4 import BeautifulSoup
	import cv2
	import re

	def main(args):
	# --- Phase 1: Crop and Save All Images First ---

	# 1. Load data
	mapping_data = json.loads(args.mapping.read_text())
	uied_data = json.loads(args.uied.read_text())
	original_image = cv2.imread(str(args.original_image))

	if original_image is None:
	raise ValueError(f"Could not load the original image from {args.original_image}")

	# Get image shapes to calculate a simple, global scaling factor
	H_proc, W_proc, _ = uied_data['img_shape']
	H_orig, W_orig, _ = original_image.shape
	scale_x = W_orig / W_proc
	scale_y = H_orig / H_proc
	print(f"Using global scaling for cropping: scale_x={scale_x:.3f}, scale_y={scale_y:.3f}")

	uied_boxes = {
	comp['id']: (comp['column_min'], comp['row_min'], comp['width'], comp['height'])
	for comp in uied_data['compos']
	}

	# 2. Create a directory for cropped images
	crop_dir = args.output_html.parent / "cropped_images"
	crop_dir.mkdir(exist_ok=True)
	print(f"Saving cropped images to: {crop_dir.resolve()}")

	# 3. Iterate through mappings and save cropped images to files
	for region_id, region_data in mapping_data.items():
	for placeholder_id, uied_id in region_data['mapping'].items():
	if uied_id not in uied_boxes:
	print(f"Warning: UIED ID {uied_id} from mapping not found. Skipping placeholder {placeholder_id}.")
	continue

	uied_bbox = uied_boxes[uied_id]

	x_proc, y_proc, w_proc, h_proc = uied_bbox
	x_tf = x_proc * scale_x
	y_tf = y_proc * scale_y
	w_tf = w_proc * scale_x
	h_tf = h_proc * scale_y

	x1, y1 = int(x_tf), int(y_tf)
	x2, y2 = int(x_tf + w_tf), int(y_tf + h_tf)

	h_img, w_img, _ = original_image.shape
	x1, y1 = max(0, x1), max(0, y1)
	x2, y2 = min(w_img, x2), min(h_img, y2)

	cropped_img = original_image[y1:y2, x1:x2]

	if cropped_img.size == 0:
	print(f"Warning: Cropped image for {placeholder_id} is empty. Skipping.")
	continue

	output_path = crop_dir / f"{placeholder_id}.png"
	cv2.imwrite(str(output_path), cropped_img)

	# --- Phase 2: Use BeautifulSoup to Replace Placeholders by Order ---

	print("\nStarting offline HTML processing with BeautifulSoup...")
	html_content = args.gray_html.read_text()
	soup = BeautifulSoup(html_content, 'html.parser')

	# 1. Find all placeholder elements by their class, in document order.
	placeholder_elements = soup.find_all(class_="bg-gray-400")

	# 2. Get the placeholder IDs from the mapping file in the correct, sorted order.
	def natural_sort_key(s):
	return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]

	ordered_placeholder_ids = []
	# Sort region IDs numerically to process them in order
	for region_id in sorted(mapping_data.keys(), key=int):
	region_mapping = mapping_data[region_id]['mapping']
	# Sort the placeholder IDs within each region naturally (e.g., ph1, ph2, ph10)
	sorted_ph_ids = sorted(region_mapping.keys(), key=natural_sort_key)
	ordered_placeholder_ids.extend(sorted_ph_ids)

	# 3. Check for count mismatches
	if len(placeholder_elements) != len(ordered_placeholder_ids):
	print(f"Warning: Mismatch in counts! Found {len(placeholder_elements)} gray boxes in HTML, but {len(ordered_placeholder_ids)} mappings.")
	else:
	print(f"Found {len(placeholder_elements)} gray boxes to replace.")

	# 4. Iterate through both lists, create a proper <img> tag, and replace the placeholder.
	for i, ph_element in enumerate(placeholder_elements):
	if i >= len(ordered_placeholder_ids):
	print(f"Warning: More gray boxes in HTML than mappings. Stopping at box {i+1}.")
	break

	ph_id = ordered_placeholder_ids[i]
	relative_img_path = f"{crop_dir.name}/{ph_id}.png"

	# --- Create a new <img> tag and replace the placeholder ---

	# a. Get all classes from the original placeholder to preserve styling.
	original_classes = ph_element.get('class', [])
	if 'bg-gray-400' in original_classes:
	original_classes.remove('bg-gray-400') # Remove the placeholder background

	# b. Create the new <img> tag
	img_tag = soup.new_tag("img", src=relative_img_path)
	img_tag['class'] = original_classes

	# c. Replace the placeholder with the new image tag.
	ph_element.replace_with(img_tag)

	# Save the modified HTML
	args.output_html.write_text(str(soup))

	print(f"\nSuccessfully replaced {min(len(placeholder_elements), len(ordered_placeholder_ids))} placeholders.")
	print(f"Final HTML generated at {args.output_html.resolve()}")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Replace placeholder divs in an HTML file with cropped images based on UIED mappings.")
	parser.add_argument("--mapping", type=Path, required=False, help="Path to the mapping JSON file from mapping.py.")
	parser.add_argument("--uied", type=Path, required=False, help="Path to the UIED JSON file.")
	parser.add_argument("--original-image", type=Path, required=False, help="Path to the original screenshot image.")
	parser.add_argument("--gray-html", type=Path, required=False, help="Path to the input HTML file with gray placeholders.")
	parser.add_argument("--output-html", type=Path, required=False, help="Path to save the final, modified HTML file.")

	parser.set_defaults(
	mapping=Path('data/tmp/mapping_full_test1.json'),
	uied=Path('data/tmp/ip/test1.json'),
	original_image=Path('data/input/test1.png'),
	gray_html=Path('data/output/test1_layout.html'),
	output_html=Path('data/output/test1_layout_final.html')
	)

	args = parser.parse_args()
	main(args)