Spaces:

comrender
/

fluxhdupscaler

Running on Zero

App Files Files Community

comrender commited on 2 days ago

Commit

d45f4bc

verified ·

1 Parent(s): fbe598e

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -107

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ import numpy as np
 import spaces
 import torch
 from diffusers import FluxImg2ImgPipeline
-from transformers import AutoProcessor, AutoModelForCausalLM
 from gradio_imageslider import ImageSlider
 from PIL import Image
 from huggingface_hub import snapshot_download
@@ -40,82 +39,10 @@ device = "cpu"
 # Get HuggingFace token
 huggingface_token = os.getenv("HF_TOKEN")
-# Download FLUX model
-print("📥 Downloading FLUX model...")
-model_path = snapshot_download(
-    repo_id="black-forest-labs/FLUX.1-dev",
-    repo_type="model",
-    ignore_patterns=["*.md", "*.gitattributes"],
-    local_dir="FLUX.1-dev",
-    token=huggingface_token,
-)
-# Load Florence-2 model for image captioning on CPU
-print("📥 Loading Florence-2 model...")
-florence_model = AutoModelForCausalLM.from_pretrained(
-    "microsoft/Florence-2-large",
-    torch_dtype=torch.float32,  # Force CPU dtype
-    trust_remote_code=True,
-    attn_implementation="eager"
-).to(device)
-florence_processor = AutoProcessor.from_pretrained(
-    "microsoft/Florence-2-large",
-    trust_remote_code=True
-)
-# Load FLUX Img2Img pipeline on CPU
-print("📥 Loading FLUX Img2Img...")
-pipe = FluxImg2ImgPipeline.from_pretrained(
-    model_path,
-    torch_dtype=torch.float32  # Force CPU dtype
-)
-pipe.enable_vae_tiling()
-pipe.enable_vae_slicing()
-print("✅ All models loaded successfully!")
-# Download ESRGAN model if using
-if USE_ESRGAN:
-    esrgan_path = "4x-UltraSharp.pth"
-    if not os.path.exists(esrgan_path):
-        url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
-        with open(esrgan_path, "wb") as f:
-            f.write(requests.get(url).content)
-    esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
-    state_dict = torch.load(esrgan_path)['params_ema']
-    esrgan_model.load_state_dict(state_dict)
-    esrgan_model.eval()
 MAX_SEED = 1000000
 MAX_PIXEL_BUDGET = 8192 * 8192  # Increased for tiling support
-def generate_caption(image):
-    """Generate detailed caption using Florence-2"""
-    try:
-        task_prompt = "<MORE_DETAILED_CAPTION>"
-        prompt = task_prompt
-        inputs = florence_processor(text=prompt, images=image, return_tensors="pt").to(device)
-        generated_ids = florence_model.generate(
-            input_ids=inputs["input_ids"],
-            pixel_values=inputs["pixel_values"],
-            max_new_tokens=1024,
-            num_beams=3,
-            do_sample=True,
-        )
-        generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
-        parsed_answer = florence_processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
-        caption = parsed_answer[task_prompt]
-        return caption
-    except Exception as e:
-        print(f"Caption generation failed: {e}")
-        return "a high quality detailed image"
 def process_input(input_image, upscale_factor):
     """Process input image and handle size constraints"""
     w, h = input_image.size
@@ -216,21 +143,54 @@ def enhance_image(
     num_inference_steps,
     upscale_factor,
     denoising_strength,
-    use_generated_caption,
     custom_prompt,
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
-    # Move models to GPU with fallback to CPU
-    try:
-        device = "cuda"
-        pipe.to(device)
-        florence_model.to(device)
-        if USE_ESRGAN:
-            esrgan_model.to(device)
-    except Exception as e:
-        print(f"GPU error: {e}, falling back to CPU")
-        device = "cpu"
     # Handle image input
     if image_input is not None:
@@ -250,13 +210,7 @@ def enhance_image(
         input_image, upscale_factor
     )
-    # Generate caption if requested
-    if use_generated_caption:
-        gr.Info("🔍 Generating image caption...")
-        generated_caption = generate_caption(input_image)
-        prompt = generated_caption
-    else:
-        prompt = custom_prompt if custom_prompt.strip() else ""
     generator = torch.Generator(device=device).manual_seed(seed)
@@ -289,21 +243,21 @@ def enhance_image(
     # Resize input image to match output size for slider alignment
     resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
-    # Move back to CPU to release GPU
-    pipe.to("cpu")
-    florence_model.to("cpu")
-    if USE_ESRGAN:
-        esrgan_model.to("cpu")
     return [resized_input, image]
 # Create Gradio interface
-with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as demo:
     gr.HTML("""
     <div class="main-header">
         <h1>🎨 AI Image Upscaler</h1>
-        <p>Upload an image or provide a URL to upscale it using Florence-2 captioning and FLUX upscaling</p>
         <p>Currently running on <strong>{}</strong></p>
     </div>
     """.format(power_device))
@@ -327,17 +281,11 @@ with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as d
                         value="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg"
                     )
-            gr.HTML("<h3>🎛️ Caption Settings</h3>")
-            use_generated_caption = gr.Checkbox(
-                label="Use AI-generated caption (Florence-2)",
-                value=True,
-                info="Generate detailed caption automatically"
-            )
             custom_prompt = gr.Textbox(
                 label="Custom Prompt (optional)",
-                placeholder="Enter custom prompt or leave empty for generated caption",
                 lines=2
             )
@@ -412,7 +360,6 @@ with gr.Blocks(css=css, title="🎨 AI Image Upscaler - Florence-2 + FLUX") as d
             num_inference_steps,
             upscale_factor,
             denoising_strength,
-            use_generated_caption,
             custom_prompt,
         ],
         outputs=[result_slider]

 import spaces
 import torch
 from diffusers import FluxImg2ImgPipeline
 from gradio_imageslider import ImageSlider
 from PIL import Image
 from huggingface_hub import snapshot_download
 # Get HuggingFace token
 huggingface_token = os.getenv("HF_TOKEN")
 MAX_SEED = 1000000
 MAX_PIXEL_BUDGET = 8192 * 8192  # Increased for tiling support
 def process_input(input_image, upscale_factor):
     """Process input image and handle size constraints"""
     w, h = input_image.size
     num_inference_steps,
     upscale_factor,
     denoising_strength,
     custom_prompt,
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
+    # Lazy loading of models
+    global pipe, esrgan_model
+    if 'pipe' not in globals():
+        try:
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            dtype = torch.bfloat16 if device == "cuda" else torch.float32
+            print(f"📥 Loading FLUX Img2Img on {device}...")
+            pipe = FluxImg2ImgPipeline.from_pretrained(
+                "black-forest-labs/FLUX.1-dev",
+                torch_dtype=dtype,
+                low_cpu_mem_usage=True,
+                device_map="auto"
+            )
+            pipe.enable_vae_tiling()
+            pipe.enable_vae_slicing()
+            pipe.enable_model_cpu_offload() if device == "cuda" else None
+            if USE_ESRGAN:
+                esrgan_path = "4x-UltraSharp.pth"
+                if not os.path.exists(esrgan_path):
+                    url = "https://huggingface.co/uwg/upscaler/resolve/main/ESRGAN/4x-UltraSharp.pth"
+                    with open(esrgan_path, "wb") as f:
+                        f.write(requests.get(url).content)
+                esrgan_model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64, num_block=23, num_grow_ch=32, scale=4)
+                state_dict = torch.load(esrgan_path)['params_ema']
+                esrgan_model.load_state_dict(state_dict)
+                esrgan_model.eval()
+                esrgan_model.to(device)
+            print("✅ Models loaded successfully!")
+        except Exception as e:
+            print(f"Model loading error: {e}, falling back to CPU")
+            device = "cpu"
+            dtype = torch.float32
+            # Reload on CPU if needed
+            pipe = FluxImg2ImgPipeline.from_pretrained(
+                "black-forest-labs/FLUX.1-dev",
+                torch_dtype=dtype,
+                low_cpu_mem_usage=True,
+                device_map="auto"
+            )
+            pipe.enable_vae_tiling()
+            pipe.enable_vae_slicing()
     # Handle image input
     if image_input is not None:
         input_image, upscale_factor
     )
+    prompt = custom_prompt if custom_prompt.strip() else ""
     generator = torch.Generator(device=device).manual_seed(seed)
     # Resize input image to match output size for slider alignment
     resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
+    # Move back to CPU to release GPU if possible
+    if device == "cuda":
+        pipe.to("cpu")
+        if USE_ESRGAN:
+            esrgan_model.to("cpu")
     return [resized_input, image]
 # Create Gradio interface
+with gr.Blocks(css=css, title="🎨 AI Image Upscaler - FLUX") as demo:
     gr.HTML("""
     <div class="main-header">
         <h1>🎨 AI Image Upscaler</h1>
+        <p>Upload an image or provide a URL to upscale it using FLUX upscaling</p>
         <p>Currently running on <strong>{}</strong></p>
     </div>
     """.format(power_device))
                         value="https://upload.wikimedia.org/wikipedia/commons/thumb/a/a7/Example.jpg/800px-Example.jpg"
                     )
+            gr.HTML("<h3>🎛️ Prompt Settings</h3>")
             custom_prompt = gr.Textbox(
                 label="Custom Prompt (optional)",
+                placeholder="Enter custom prompt or leave empty",
                 lines=2
             )
             num_inference_steps,
             upscale_factor,
             denoising_strength,
             custom_prompt,
         ],
         outputs=[result_slider]