SUPIR

Running

App Files Files Community

Fabrice-TIERCELIN commited on Jun 23

Commit

dcdec0b

verified ·

1 Parent(s): 3b7822b

Use LORA

Browse files

Files changed (1) hide show

app.py +47 -0

app.py CHANGED Viewed

@@ -425,6 +425,53 @@ def worker(input_image, image_position, prompts, n_prompt, seed, resolution, tot
         image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
         # Sampling
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling ...'))))

         image_encoder_last_hidden_state = image_encoder_last_hidden_state.to(transformer.dtype)
+        # Load transformer model
+        if model_changed:
+            stream.output_queue.push(("progress", (None, "", make_progress_bar_html(0, "Loading transformer ..."))))
+            transformer = None
+            time.sleep(1.0)  # wait for the previous model to be unloaded
+            torch.cuda.empty_cache()
+            gc.collect()
+            previous_lora_file = lora_file
+            previous_lora_multiplier = lora_multiplier
+            previous_fp8_optimization = fp8_optimization
+            transformer = load_transfomer()  # bfloat16, on cpu
+            if lora_file is not None or fp8_optimization:
+                state_dict = transformer.state_dict()
+                # LoRA should be merged before fp8 optimization
+                if lora_file is not None:
+                    # TODO It would be better to merge the LoRA into the state dict before creating the transformer instance.
+                    # Use from_config() instead of from_pretrained to make the instance without loading.
+                    print(f"Merging LoRA file {os.path.basename(lora_file)} ...")
+                    state_dict = merge_lora_to_state_dict(state_dict, lora_file, lora_multiplier, device=gpu)
+                    gc.collect()
+                if fp8_optimization:
+                    TARGET_KEYS = ["transformer_blocks", "single_transformer_blocks"]
+                    EXCLUDE_KEYS = ["norm"]  # Exclude norm layers (e.g., LayerNorm, RMSNorm) from FP8
+                    # inplace optimization
+                    print("Optimizing for fp8")
+                    state_dict = optimize_state_dict_with_fp8(state_dict, gpu, TARGET_KEYS, EXCLUDE_KEYS, move_to_device=False)
+                    # apply monkey patching
+                    apply_fp8_monkey_patch(transformer, state_dict, use_scaled_mm=False)
+                    gc.collect()
+                info = transformer.load_state_dict(state_dict, strict=True, assign=True)
+                print(f"LoRA and/or fp8 optimization applied: {info}")
+            if not high_vram:
+                DynamicSwapInstaller.install_model(transformer, device=gpu)
+            else:
+                transformer.to(gpu)
         # Sampling
         stream.output_queue.push(('progress', (None, '', make_progress_bar_html(0, 'Start sampling ...'))))