Chain-of-Zoom / app.py
alexnasa's picture
Update app.py
24ee135 verified
raw
history blame
6.87 kB
import gradio as gr
import subprocess
import os
import shutil
from pathlib import Path
from PIL import Image
import spaces
# -----------------------------------------------------------------------------
# CONFIGURE THESE PATHS TO MATCH YOUR PROJECT STRUCTURE
# -----------------------------------------------------------------------------
INPUT_DIR = "samples"
OUTPUT_DIR = "inference_results/coz_vlmprompt"
# -----------------------------------------------------------------------------
# HELPER FUNCTION TO RUN INFERENCE AND RETURN THE OUTPUT IMAGE
# -----------------------------------------------------------------------------
@spaces.GPU()
def run_with_upload(uploaded_image_path, upscale_option):
"""
1) Clear INPUT_DIR
2) Save the uploaded file as input.png in INPUT_DIR
3) Read `upscale_option` (e.g. "1x", "2x", "4x") → turn it into "1", "2", or "4"
4) Call inference_coz.py with `--upscale <that_value>`
5) (Here we assume you still stitch together 1.png–4.png, or however you want.)
"""
# 1) Make sure INPUT_DIR exists; if it does, delete everything inside.
os.makedirs(INPUT_DIR, exist_ok=True)
for fn in os.listdir(INPUT_DIR):
full_path = os.path.join(INPUT_DIR, fn)
try:
if os.path.isfile(full_path) or os.path.islink(full_path):
os.remove(full_path)
elif os.path.isdir(full_path):
shutil.rmtree(full_path)
except Exception as e:
print(f"Warning: could not delete {full_path}: {e}")
# 2) Copy the uploaded image into INPUT_DIR.
# Gradio will give us a path like "/tmp/gradio_xyz.png"
if uploaded_image_path is None:
return None
try:
# Open with PIL (this handles JPEG, BMP, TIFF, etc.)
pil_img = Image.open(uploaded_image_path).convert("RGB")
except Exception as e:
print(f"Error: could not open uploaded image: {e}")
return None
# Save it as "input.png" in our INPUT_DIR
save_path = Path(INPUT_DIR) / "input.png"
try:
pil_img.save(save_path, format="PNG")
except Exception as e:
print(f"Error: could not save as PNG: {e}")
return None
# 3) Build and run your inference_coz.py command.
# This will block until it completes.
upscale_value = upscale_option.replace("x", "") # e.g. "2x" → "2"
cmd = [
"python", "inference_coz.py",
"-i", INPUT_DIR,
"-o", OUTPUT_DIR,
"--rec_type", "recursive_multiscale",
"--prompt_type", "vlm",
"--upscale", upscale_value,
"--lora_path", "ckpt/SR_LoRA/model_20001.pkl",
"--vae_path", "ckpt/SR_VAE/vae_encoder_20001.pt",
"--pretrained_model_name_or_path", "stabilityai/stable-diffusion-3-medium-diffusers",
"--ram_ft_path", "ckpt/DAPE/DAPE.pth",
"--ram_path", "ckpt/RAM/ram_swin_large_14m.pth"
]
try:
subprocess.run(cmd, check=True)
except subprocess.CalledProcessError as err:
# If inference_coz.py crashes, we can print/log the error.
print("Inference failed:", err)
return None
# -------------------------------------------------------------------------
# 4) After inference, look for the four numbered PNGs and stitch them
# -------------------------------------------------------------------------
per_sample_dir = os.path.join(OUTPUT_DIR, "per-sample", "input")
expected_files = [os.path.join(per_sample_dir, f"{i}.png") for i in range(1, 5)]
pil_images = []
for fp in expected_files:
if not os.path.isfile(fp):
print(f"Warning: expected file not found: {fp}")
return None
try:
img = Image.open(fp).convert("RGB")
pil_images.append(img)
except Exception as e:
print(f"Error opening {fp}: {e}")
return None
if len(pil_images) != 4:
print(f"Error: found {len(pil_images)} images, but need 4.")
return None
widths, heights = zip(*(im.size for im in pil_images))
w, h = widths[0], heights[0]
grid_w = w * 2
grid_h = h * 2
# composite = Image.new("RGB", (grid_w, grid_h))
# composite.paste(pil_images[0], (0, 0))
# composite.paste(pil_images[1], (w, 0))
# composite.paste(pil_images[2], (0, h))
# composite.paste(pil_images[3], (w, h))
return [pil_images[0], pil_images[1], pil_images[2], pil_images[3]]
# -------------------------------------------------------------
# BUILD THE GRADIO INTERFACE
# -----------------------------------------------------------------------------
css="""
#col-container {
margin: 0 auto;
max-width: 1024px;
}
"""
with gr.Blocks(css=css) as demo:
gr.HTML(
"""
<div style="text-align: center;">
<h1>Chain-of-Zoom</h1>
<p style="font-size:16px;">Extreme Super-Resolution via Scale Autoregression and Preference Alignment </p>
</div>
<br>
<div style="display: flex; justify-content: center; align-items: center; text-align: center;">
<a href="https://github.com/bryanswkim/Chain-of-Zoom">
<img src='https://img.shields.io/badge/GitHub-Repo-blue'>
</a>
</div>
"""
)
with gr.Column(elem_id="col-container"):
with gr.Row():
with gr.Column():
# 1) Image upload component. We set type="filepath" so the callback
# (run_with_upload) will receive a local path to the uploaded file.
upload_image = gr.Image(
label="Upload your input image",
type="filepath"
)
# 2) Radio for choosing 1× / 2× / 4× upscaling
upscale_radio = gr.Radio(
choices=["1x", "2x", "4x"],
value="2x",
show_label=False
)
# 2) A button that the user will click to launch inference.
run_button = gr.Button("Chain-of-Zoom it")
# (3) Gallery to display multiple output images
output_gallery = gr.Gallery(
label="Inference Results",
show_label=True,
elem_id="gallery",
columns=[2], rows=[2]
)
# Wire the button: when clicked, call run_with_upload(upload_image), put
# its return value into output_image.
run_button.click(
fn=run_with_upload,
inputs=[upload_image, upscale_radio],
outputs=output_gallery
)
# -----------------------------------------------------------------------------
# START THE GRADIO SERVER
# -----------------------------------------------------------------------------
demo.launch(share=True)