Spaces:

MoraxCheng
/

Transeption_iGEM_BASISCHINA_2025

Running on Zero

App Files Files Community

MoraxCheng commited on 20 days ago

Commit

b55bd43

1 Parent(s): 95230fb

Add patch for transformers URL handling and enhance model loading with manual config download

Browse files

Files changed (1) hide show

app.py +64 -22

app.py CHANGED Viewed

@@ -13,8 +13,30 @@ os.environ['HF_DATASETS_CACHE'] = '/tmp/huggingface/datasets'
 os.environ['HF_ENDPOINT'] = 'https://huggingface.co'
 # Disable offline mode to allow downloads
 os.environ['TRANSFORMERS_OFFLINE'] = '0'
 import torch
 import transformers
 from transformers import PreTrainedTokenizerFast
 import numpy as np
 import pandas as pd
@@ -107,21 +129,10 @@ def load_model_cached(model_type):
         cache_dir = "/tmp/huggingface/transformers"
         os.makedirs(cache_dir, exist_ok=True)
-        # Clear any potential proxy issues
-        import requests
-        session = requests.Session()
-        session.trust_env = False
-        # Try loading with explicit parameters
         model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(
-            pretrained_model_name_or_path=model_path,
-            cache_dir=cache_dir,
-            local_files_only=False,  # Allow downloading if not cached
-            resume_download=True,    # Resume incomplete downloads
-            force_download=False,    # Don't force re-download if cached
-            proxies=None,           # Explicitly set no proxies
-            use_auth_token=None,    # No auth token needed for public models
-            revision="main"         # Use main branch
         )
         MODEL_CACHE[model_type] = model
         print(f"{model_type} model loaded and cached")
@@ -130,21 +141,52 @@ def load_model_cached(model_type):
         print(f"Error loading {model_type} model: {e}")
         print(f"Attempting alternative loading method...")
-        # Try alternative loading approach
         try:
-            # Manually specify the full model ID
-            full_model_id = f"PascalNotin/Tranception_{model_type}"
             model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(
-                full_model_id,
-                cache_dir=cache_dir,
-                local_files_only=False,
-                trust_remote_code=True  # Allow custom model code
             )
             MODEL_CACHE[model_type] = model
-            print(f"{model_type} model loaded successfully with alternative method")
             return model
         except Exception as e2:
             print(f"Alternative loading also failed: {e2}")
             # Fallback to Medium if requested model fails
             if model_type != "Medium":
                 print("Falling back to Medium model...")

 os.environ['HF_ENDPOINT'] = 'https://huggingface.co'
 # Disable offline mode to allow downloads
 os.environ['TRANSFORMERS_OFFLINE'] = '0'
+# Patch for transformers 4.17.0 URL issue in HF Spaces
+import urllib.parse
+def patch_transformers_url():
+    """Fix URL scheme issue in transformers 4.17.0"""
+    try:
+        import transformers.file_utils
+        original_get_from_cache = transformers.file_utils.get_from_cache
+        def patched_get_from_cache(url, *args, **kwargs):
+            # Fix URLs that start with /api/ by prepending https://huggingface.co
+            if isinstance(url, str) and url.startswith('/api/'):
+                url = 'https://huggingface.co' + url
+            return original_get_from_cache(url, *args, **kwargs)
+        transformers.file_utils.get_from_cache = patched_get_from_cache
+        print("Applied URL patch for transformers")
+    except Exception as e:
+        print(f"Warning: Could not patch transformers URL handling: {e}")
 import torch
 import transformers
+patch_transformers_url()
 from transformers import PreTrainedTokenizerFast
 import numpy as np
 import pandas as pd
         cache_dir = "/tmp/huggingface/transformers"
         os.makedirs(cache_dir, exist_ok=True)
+        # Try loading with minimal parameters first
         model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(
+            model_path,
+            cache_dir=cache_dir
         )
         MODEL_CACHE[model_type] = model
         print(f"{model_type} model loaded and cached")
         print(f"Error loading {model_type} model: {e}")
         print(f"Attempting alternative loading method...")
+        # Try alternative loading approach with full URL
         try:
+            # Use full URL to bypass any path resolution issues
+            full_url = f"https://huggingface.co/PascalNotin/Tranception_{model_type}"
             model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(
+                full_url,
+                cache_dir=cache_dir
             )
             MODEL_CACHE[model_type] = model
+            print(f"{model_type} model loaded successfully with full URL")
             return model
         except Exception as e2:
             print(f"Alternative loading also failed: {e2}")
+            # Final attempt: manually download config first
+            try:
+                import json
+                import requests
+                # Download config.json manually
+                config_url = f"https://huggingface.co/PascalNotin/Tranception_{model_type}/raw/main/config.json"
+                print(f"Manually downloading config from: {config_url}")
+                response = requests.get(config_url)
+                if response.status_code == 200:
+                    # Save config locally
+                    local_model_dir = f"/tmp/Tranception_{model_type}"
+                    os.makedirs(local_model_dir, exist_ok=True)
+                    with open(f"{local_model_dir}/config.json", "w") as f:
+                        json.dump(response.json(), f)
+                    # Now try loading from the HF model ID again
+                    model = tranception.model_pytorch.TranceptionLMHeadModel.from_pretrained(
+                        f"PascalNotin/Tranception_{model_type}",
+                        cache_dir=cache_dir,
+                        local_files_only=False
+                    )
+                    MODEL_CACHE[model_type] = model
+                    print(f"{model_type} model loaded successfully after manual config download")
+                    return model
+                else:
+                    print(f"Failed to download config: {response.status_code}")
+            except Exception as e3:
+                print(f"Manual download also failed: {e3}")
             # Fallback to Medium if requested model fails
             if model_type != "Medium":
                 print("Falling back to Medium model...")