Spaces:

comrender
/

fluxhdupscaler

Running on Zero

App Files Files Community

comrender commited on 9 days ago

Commit

c84d7da

verified ·

1 Parent(s): 46b008e

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -15

app.py CHANGED Viewed

@@ -33,13 +33,9 @@ css = """
 }
 """
-# Device setup
-if torch.cuda.is_available():
-    power_device = "GPU"
-    device = "cuda"
-else:
-    power_device = "CPU"
-    device = "cpu"
 # Get HuggingFace token
 huggingface_token = os.getenv("HF_TOKEN")
@@ -54,26 +50,25 @@ model_path = snapshot_download(
     token=huggingface_token,
 )
-# Load Florence-2 model for image captioning
 print("📥 Loading Florence-2 model...")
 florence_model = AutoModelForCausalLM.from_pretrained(
     "microsoft/Florence-2-large",
-    torch_dtype=torch.float16,
     trust_remote_code=True,
-    attn_implementation="eager"  # Fix for SDPA compatibility issue
 ).to(device)
 florence_processor = AutoProcessor.from_pretrained(
     "microsoft/Florence-2-large",
     trust_remote_code=True
 )
-# Load FLUX Img2Img pipeline
 print("📥 Loading FLUX Img2Img...")
 pipe = FluxImg2ImgPipeline.from_pretrained(
     model_path,
-    torch_dtype=torch.bfloat16
 )
-pipe.to(device)
 pipe.enable_vae_tiling()
 pipe.enable_vae_slicing()
@@ -90,7 +85,6 @@ if USE_ESRGAN:
     state_dict = torch.load(esrgan_path)['params_ema']
     esrgan_model.load_state_dict(state_dict)
     esrgan_model.eval()
-    esrgan_model.to(device)
 MAX_SEED = 1000000
 MAX_PIXEL_BUDGET = 8192 * 8192  # Increased for tiling support
@@ -227,6 +221,10 @@ def enhance_image(
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
     # Handle image input
     if image_input is not None:
         input_image = image_input
@@ -253,13 +251,15 @@ def enhance_image(
     else:
         prompt = custom_prompt if custom_prompt.strip() else ""
-    generator = torch.Generator().manual_seed(seed)
     gr.Info("🚀 Upscaling image...")
     # Initial upscale
     if USE_ESRGAN and upscale_factor == 4:
         control_image = esrgan_upscale(input_image, upscale_factor)
     else:
         w, h = input_image.size
         control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
@@ -284,6 +284,10 @@ def enhance_image(
     # Resize input image to match output size for slider alignment
     resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
     return [resized_input, image]

 }
 """
+# Device setup - Force CPU for startup in ZeroGPU
+power_device = "ZeroGPU"
+device = "cpu"
 # Get HuggingFace token
 huggingface_token = os.getenv("HF_TOKEN")
     token=huggingface_token,
 )
+# Load Florence-2 model for image captioning on CPU
 print("📥 Loading Florence-2 model...")
 florence_model = AutoModelForCausalLM.from_pretrained(
     "microsoft/Florence-2-large",
+    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
     trust_remote_code=True,
+    attn_implementation="eager"
 ).to(device)
 florence_processor = AutoProcessor.from_pretrained(
     "microsoft/Florence-2-large",
     trust_remote_code=True
 )
+# Load FLUX Img2Img pipeline on CPU
 print("📥 Loading FLUX Img2Img...")
 pipe = FluxImg2ImgPipeline.from_pretrained(
     model_path,
+    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
 )
 pipe.enable_vae_tiling()
 pipe.enable_vae_slicing()
     state_dict = torch.load(esrgan_path)['params_ema']
     esrgan_model.load_state_dict(state_dict)
     esrgan_model.eval()
 MAX_SEED = 1000000
 MAX_PIXEL_BUDGET = 8192 * 8192  # Increased for tiling support
     progress=gr.Progress(track_tqdm=True),
 ):
     """Main enhancement function"""
+    # Move models to GPU inside the function
+    pipe.to("cuda")
+    florence_model.to("cuda")
     # Handle image input
     if image_input is not None:
         input_image = image_input
     else:
         prompt = custom_prompt if custom_prompt.strip() else ""
+    generator = torch.Generator(device="cuda").manual_seed(seed)
     gr.Info("🚀 Upscaling image...")
     # Initial upscale
     if USE_ESRGAN and upscale_factor == 4:
+        esrgan_model.to("cuda")
         control_image = esrgan_upscale(input_image, upscale_factor)
+        esrgan_model.to("cpu")
     else:
         w, h = input_image.size
         control_image = input_image.resize((w * upscale_factor, h * upscale_factor), resample=Image.LANCZOS)
     # Resize input image to match output size for slider alignment
     resized_input = true_input_image.resize(image.size, resample=Image.LANCZOS)
+    # Move back to CPU to release GPU
+    pipe.to("cpu")
+    florence_model.to("cpu")
     return [resized_input, image]