Spaces:

jukofyork
/

merge-lora

Running

App Files Files Community

jukofyork commited on Jul 11

Commit

747bd3e

verified ·

1 Parent(s): e60537d

Update app.py

Browse files

Files changed (1) hide show

app.py +87 -27

app.py CHANGED Viewed

@@ -32,8 +32,6 @@ def load_lora_state(lora_model_name):
     with open(config_path, 'r') as f:
         lora_config = json.load(f)
-    scale = lora_config['lora_alpha'] / lora_config['r']
     # Download adapter weights
     try:
         adapter_path = hf_hub_download(
@@ -52,18 +50,18 @@ def load_lora_state(lora_model_name):
         )
         lora_state = torch.load(adapter_path, map_location='cpu')
-    return lora_state, scale, temp_lora_dir
 def find_lora_weights(lora_state, key):
     """Find corresponding LoRA A and B weights for a given key"""
     lora_A = None
     lora_B = None
-    # Remove .weight suffix and handle potential prefixes
-    clean_key = key.replace('.weight', '')
     for lora_key, lora_weight in lora_state.items():
-        if clean_key in lora_key or clean_key.replace('language_model.', '') in lora_key:
             if 'lora_A' in lora_key:
                 lora_A = lora_weight
             elif 'lora_B' in lora_key:
@@ -118,17 +116,27 @@ def download_and_upload_non_model_files(base_model_name, output_repo_name):
         shutil.rmtree(temp_config_dir, ignore_errors=True)
 def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo_name,
-                         multiplicative_lora, progress=gr.Progress()):
     temp_lora_dir = None
     try:
         login(hf_token)
         progress(0.1, desc="Loading LoRA adapter...")
         info_fn("Loading LoRA adapter...")
         # Load LoRA state (this downloads the adapter)
-        lora_state, scale, temp_lora_dir = load_lora_state(lora_model_name)
-        info_fn(f"Using LoRA scale: {scale}")
         progress(0.2, desc="Creating output repository...")
@@ -157,6 +165,18 @@ def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo
         info_fn(f"Found {len(shard_files)} model shards to process")
         merged_tensors = 0
         total_shards = len(shard_files)
@@ -194,29 +214,47 @@ def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo
                         lora_A, lora_B = find_lora_weights(lora_state, key)
                         if lora_A is not None and lora_B is not None:
-                            lora_type = "Multiplicative" if multiplicative_lora else "Additive"
-                            info_fn(f"Merging {lora_type} LoRA weights for {key}")
                             shard_merged_count += 1
                             merged_tensors += 1
                             # Convert to float32 for computation
                             original_dtype = tensor.dtype
-                            tensor_f32 = tensor.to(torch.float32)
-                            lora_A_f32 = lora_A.to(torch.float32)
-                            lora_B_f32 = lora_B.to(torch.float32)
                             if multiplicative_lora:
-                                # Apply Multiplicative-LoRA: W = W + scale * B @ A @ W
-                                tensor_f32 += scale * lora_B_f32 @ lora_A_f32 @ tensor_f32
                             else:
-                                # Apply standard LoRA: W = W + scale * B @ A
-                                tensor_f32 += scale * lora_B_f32 @ lora_A_f32
                             # Convert back to original dtype
-                            tensor = tensor_f32.to(original_dtype)
                             # Clean up intermediate tensors
-                            del tensor_f32, lora_A_f32, lora_B_f32
                             if torch.cuda.is_available():
                                 torch.cuda.empty_cache()
@@ -246,7 +284,7 @@ def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo
         progress(1.0, desc="Upload completed!")
-        success_msg = f"✓ Successfully merged and uploaded model!\nModel URL: https://huggingface.co/{output_repo_name}\nProcessed {total_shards} shards\nMerged {merged_tensors} layers with LoRA weights"
         info_fn("Merge completed successfully!")
         return success_msg
@@ -272,15 +310,23 @@ This tool merges LoRA (Low-Rank Adaptation) adapters with base models using a me
 - **Streaming Processing**: Downloads → Processes → Uploads → Deletes each shard sequentially
 - **Automatic Cleanup**: Temporary files are automatically removed after processing
 - **Progress Tracking**: Real-time status updates throughout the merge process
-- **Advanced Options**: Multiplicative LoRA support
 """
 DETAILS_TEXT = """
 ### How It Works
-LoRA enables efficient fine-tuning by adding small adapter weights rather than modifying the entire model. This tool applies the LoRA transformation:
-- **Standard Additive-LoRA**: `W_new = W + scale × B^T @ A`
-- **Multiplicative LoRA**: `W_new = W + scale × B^T @ A @ W`
 ### Memory Efficiency
 - **Traditional approach**: Loads entire model (~15GB+ for 7B parameter models)
@@ -328,10 +374,23 @@ with gr.Blocks(title="Memory-Efficient LoRA Merge", theme=gr.themes.Soft()) as d
             )
             gr.Markdown("### Advanced Options")
             multiplicative_lora = gr.Checkbox(
                 label="Multiplicative LoRA",
                 value=False,
-                info="Apply a \"multiplicative-LoRA\" instead of a standard \"additive-LoRA\""
             )
         with gr.Column(scale=1):
@@ -348,7 +407,8 @@ with gr.Blocks(title="Memory-Efficient LoRA Merge", theme=gr.themes.Soft()) as d
     submit_btn.click(
         fn=merge_lora_efficient,
-        inputs=[hf_token, base_model_name, lora_model_name, output_repo_name, multiplicative_lora],
         outputs=output_text
     )

     with open(config_path, 'r') as f:
         lora_config = json.load(f)
     # Download adapter weights
     try:
         adapter_path = hf_hub_download(
         )
         lora_state = torch.load(adapter_path, map_location='cpu')
+    return lora_state, lora_config, temp_lora_dir
 def find_lora_weights(lora_state, key):
     """Find corresponding LoRA A and B weights for a given key"""
     lora_A = None
     lora_B = None
+    # Remove .weight suffix for matching
+    clean_key = key.strip('.weight')
     for lora_key, lora_weight in lora_state.items():
+        if clean_key in lora_key:
             if 'lora_A' in lora_key:
                 lora_A = lora_weight
             elif 'lora_B' in lora_key:
         shutil.rmtree(temp_config_dir, ignore_errors=True)
 def merge_lora_efficient(hf_token, base_model_name, lora_model_name, output_repo_name,
+                         scale_factor, multiplicative_lora, inverse_lora, progress=gr.Progress()):
     temp_lora_dir = None
     try:
+        # Validate scale factor
+        if not (0 < scale_factor < 2):
+            error_msg = "Scale factor must be in the range (0, 2)"
+            warning_fn(error_msg)
+            return f"✗ Error: {error_msg}"
         login(hf_token)
         progress(0.1, desc="Loading LoRA adapter...")
         info_fn("Loading LoRA adapter...")
         # Load LoRA state (this downloads the adapter)
+        lora_state, lora_config, temp_lora_dir = load_lora_state(lora_model_name)
+        # Calculate scale with user factor
+        base_scale = lora_config['lora_alpha'] / lora_config['r']
+        scale = base_scale * scale_factor
+        info_fn(f"Using LoRA scale: {scale} (base: {base_scale:.3f} × factor: {scale_factor})")
         progress(0.2, desc="Creating output repository...")
         info_fn(f"Found {len(shard_files)} model shards to process")
+        # Determine merge mode
+        if multiplicative_lora and inverse_lora:
+            merge_mode = "Multiplicative Inverse"
+        elif multiplicative_lora:
+            merge_mode = "Multiplicative"
+        elif inverse_lora:
+            merge_mode = "Additive Inverse"
+        else:
+            merge_mode = "Additive"
+        info_fn(f"Merge mode: {merge_mode}")
         merged_tensors = 0
         total_shards = len(shard_files)
                         lora_A, lora_B = find_lora_weights(lora_state, key)
                         if lora_A is not None and lora_B is not None:
+                            info_fn(f"Merging {merge_mode} LoRA weights for {key}")
                             shard_merged_count += 1
                             merged_tensors += 1
                             # Convert to float32 for computation
                             original_dtype = tensor.dtype
+                            tensor = tensor.to(torch.float32)
+                            lora_delta = scale * lora_B.to(torch.float32) @ lora_A.to(torch.float32)
                             if multiplicative_lora:
+                                # Validate dimensions for multiplicative LoRA
+                                if lora_delta.shape[0] != lora_delta.shape[1]:
+                                    raise ValueError(f"Multiplicative LoRA requires square delta matrix for {key}: got shape {lora_delta.shape}")
+                                if lora_delta.shape[-1] != tensor.shape[-2]:
+                                    raise ValueError(f"Multiplicative LoRA dimension mismatch for {key}: {lora_delta.shape} vs {tensor.shape}")
+                                if inverse_lora:
+                                    # Inverse multiplicative: tensor = (I + lora_delta)^(-1) @ tensor
+                                    identity = torch.eye(lora_delta.shape[0], device=lora_delta.device, dtype=torch.float32)
+                                    inverse_matrix = torch.linalg.inv(identity + lora_delta)
+                                    tensor = inverse_matrix @ tensor
+                                else:
+                                    # Forward multiplicative: tensor = (I + lora_delta) @ tensor
+                                    tensor += lora_delta @ tensor
                             else:
+                                # Validate dimensions for additive LoRA
+                                if lora_delta.shape != tensor.shape:
+                                    raise ValueError(f"Additive LoRA dimension mismatch for {key}: {lora_delta.shape} vs {tensor.shape}")
+                                if inverse_lora:
+                                    # Inverse additive: tensor = tensor - lora_delta
+                                    tensor -= lora_delta
+                                else:
+                                    # Forward additive: tensor = tensor + lora_delta
+                                    tensor += lora_delta
                             # Convert back to original dtype
+                            tensor = tensor.to(original_dtype)
                             # Clean up intermediate tensors
+                            del lora_delta
                             if torch.cuda.is_available():
                                 torch.cuda.empty_cache()
         progress(1.0, desc="Upload completed!")
+        success_msg = f"✓ Successfully merged and uploaded model!\nModel URL: https://huggingface.co/{output_repo_name}\nMerge mode: {merge_mode}\nScale factor: {scale_factor}\nProcessed {total_shards} shards\nMerged {merged_tensors} layers with LoRA weights"
         info_fn("Merge completed successfully!")
         return success_msg
 - **Streaming Processing**: Downloads → Processes → Uploads → Deletes each shard sequentially
 - **Automatic Cleanup**: Temporary files are automatically removed after processing
 - **Progress Tracking**: Real-time status updates throughout the merge process
+- **Advanced Options**: Multiplicative LoRA, inverse merging, and custom scale factors
 """
 DETAILS_TEXT = """
 ### How It Works
+LoRA enables efficient fine-tuning by adding small adapter weights rather than modifying the entire model. This tool supports four merge modes:
+- **Additive LoRA**: `W_new = W + scale × B @ A`
+- **Additive Inverse**: `W_new = W - scale × B @ A` (removes LoRA effect)
+- **Multiplicative LoRA**: `W_new = W + scale × B @ A @ W`
+- **Multiplicative Inverse**: `W_new = (I + scale × B @ A)^(-1) @ W`
+### Scale Factor
+The scale factor (0 < scale < 2) controls the strength of the LoRA merge:
+- **1.0**: Full strength (default)
+- **0.5**: Half strength
+- **1.5**: 150% strength
 ### Memory Efficiency
 - **Traditional approach**: Loads entire model (~15GB+ for 7B parameter models)
             )
             gr.Markdown("### Advanced Options")
+            scale_factor = gr.Slider(
+                minimum=0.01,
+                maximum=1.99,
+                value=1.0,
+                step=0.01,
+                label="Scale Factor",
+                info="Strength of LoRA merge (0 < scale < 2)"
+            )
             multiplicative_lora = gr.Checkbox(
                 label="Multiplicative LoRA",
                 value=False,
+                info="Apply multiplicative LoRA instead of additive LoRA"
+            )
+            inverse_lora = gr.Checkbox(
+                label="Inverse Merge",
+                value=False,
+                info="Apply inverse operation (subtract/invert the LoRA effect)"
             )
         with gr.Column(scale=1):
     submit_btn.click(
         fn=merge_lora_efficient,
+        inputs=[hf_token, base_model_name, lora_model_name, output_repo_name,
+                scale_factor, multiplicative_lora, inverse_lora],
         outputs=output_text
     )