Spaces:

Sushyy
/

bug-explainer-ml

Sleeping

App Files Files Community

Sushwetabm commited on 18 days ago

Commit

ebe67a1

1 Parent(s): c16d4e7

updated model.py

Browse files

Files changed (1) hide show

model.py +80 -145

model.py CHANGED Viewed

@@ -1,21 +1,21 @@
-# # model.py - Optimized version
-# from transformers import AutoTokenizer, AutoModelForCausalLM
-# import torch
-# from functools import lru_cache
-# import os
-# import asyncio
-# from concurrent.futures import ThreadPoolExecutor
-# import logging
-# logger = logging.getLogger(__name__)
-# # Global variables to store loaded model
-# _tokenizer = None
-# _model = None
-# _model_loading = False
-# _model_loaded = False
-# @lru_cache(maxsize=1)
 # def get_model_config():
 #     """Cache model configuration"""
 #     return {
@@ -27,167 +27,102 @@
 #         "low_cpu_mem_usage": True,
 #         "use_cache": True,
 #     }
-# def load_model_sync():
-#     """Synchronous model loading with optimizations"""
-#     global _tokenizer, _model, _model_loaded
-#     if _model_loaded:
-#         return _tokenizer, _model
-#     config = get_model_config()
-#     model_id = config["model_id"]
-#     logger.info(f"🔧 Loading model {model_id}...")
-#     try:
-#         # Set cache directory to avoid re-downloading
-#         cache_dir = os.environ.get("TRANSFORMERS_CACHE", "./model_cache")
-#         os.makedirs(cache_dir, exist_ok=True)
-#         # Load tokenizer first (faster)
-#         logger.info("📝 Loading tokenizer...")
-#         _tokenizer = AutoTokenizer.from_pretrained(
-#             model_id,
-#             trust_remote_code=config["trust_remote_code"],
-#             cache_dir=cache_dir,
-#             use_fast=True,  # Use fast tokenizer if available
-#         )
-#         # Load model with optimizations
-#         logger.info("🧠 Loading model...")
-#         _model = AutoModelForCausalLM.from_pretrained(
-#             model_id,
-#             trust_remote_code=config["trust_remote_code"],
-#             torch_dtype=config["torch_dtype"],
-#             device_map=config["device_map"],
-#             low_cpu_mem_usage=config["low_cpu_mem_usage"],
-#             cache_dir=cache_dir,
-#             offload_folder="offload",
-#              offload_state_dict=True
-#         )
-#         # Set to evaluation mode
-#         _model.eval()
-#         _model_loaded = True
-#         logger.info("✅ Model loaded successfully!")
-#         return _tokenizer, _model
-#     except Exception as e:
-#         logger.error(f"❌ Failed to load model: {e}")
-#         raise
-# async def load_model_async():
-#     """Asynchronous model loading"""
-#     global _model_loading
-#     if _model_loaded:
-#         return _tokenizer, _model
-#     if _model_loading:
-#         # Wait for ongoing loading to complete
-#         while _model_loading and not _model_loaded:
-#             await asyncio.sleep(0.1)
-#         return _tokenizer, _model
-#     _model_loading = True
-#     try:
-#         # Run model loading in thread pool to avoid blocking
-#         loop = asyncio.get_event_loop()
-#         with ThreadPoolExecutor(max_workers=1) as executor:
-#             tokenizer, model = await loop.run_in_executor(
-#                 executor, load_model_sync
-#             )
-#         return tokenizer, model
-#     finally:
-#         _model_loading = False
-# def get_model():
-#     """Get the loaded model (for synchronous access)"""
-#     if not _model_loaded:
-#         return load_model_sync()
-#     return _tokenizer, _model
-# def is_model_loaded():
-#     """Check if model is loaded"""
-#     return _model_loaded
-# def get_model_info():
-#     """Get model information without loading"""
-#     config = get_model_config()
-#     return {
-#         "model_id": config["model_id"],
-#         "loaded": _model_loaded,
-#         "loading": _model_loading,
-#     }
-from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
-from functools import lru_cache
-import logging
-import asyncio
-logger = logging.getLogger(__name__)
-_model_loaded = False
-_tokenizer = None
-_model = None
-@lru_cache(maxsize=1)
 def get_model_config():
     return {
         "model_id": "Salesforce/codet5p-220m",
         "trust_remote_code": True
     }
 def load_model_sync():
     global _tokenizer, _model, _model_loaded
     if _model_loaded:
         return _tokenizer, _model
     config = get_model_config()
     model_id = config["model_id"]
     try:
-        _tokenizer = AutoTokenizer.from_pretrained(model_id)
-        _model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
         _model.eval()
         _model_loaded = True
         return _tokenizer, _model
     except Exception as e:
         logger.error(f"❌ Failed to load model: {e}")
         raise
 async def load_model_async():
-    global _tokenizer, _model, _model_loaded
     if _model_loaded:
-        return
-    config = get_model_config()
-    model_id = config["model_id"]
     try:
-        _tokenizer = AutoTokenizer.from_pretrained(model_id)
-        _model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
-        _model.eval()
-        _model_loaded = True
-        logger.info(f"✅ Model {model_id} loaded successfully.")
-    except Exception as e:
-        logger.error(f"❌ Failed to load model: {e}")
-        raise
 def get_model():
     if not _model_loaded:
-        raise ValueError("Model not loaded yet")
     return _tokenizer, _model
 def is_model_loaded():
     return _model_loaded
 def get_model_info():
     return {
-        "model_id": get_model_config()["model_id"],
         "loaded": _model_loaded,
-        "loading": not _model_loaded
     }

+# model.py - Optimized version
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from functools import lru_cache
+import os
+import asyncio
+from concurrent.futures import ThreadPoolExecutor
+import logging
+logger = logging.getLogger(__name__)
+# Global variables to store loaded model
+_tokenizer = None
+_model = None
+_model_loading = False
+_model_loaded = False
+@lru_cache(maxsize=1)
 # def get_model_config():
 #     """Cache model configuration"""
 #     return {
 #         "low_cpu_mem_usage": True,
 #         "use_cache": True,
 #     }
 def get_model_config():
     return {
         "model_id": "Salesforce/codet5p-220m",
         "trust_remote_code": True
     }
 def load_model_sync():
+    """Synchronous model loading with optimizations"""
     global _tokenizer, _model, _model_loaded
     if _model_loaded:
         return _tokenizer, _model
     config = get_model_config()
     model_id = config["model_id"]
+    logger.info(f"🔧 Loading model {model_id}...")
     try:
+        # Set cache directory to avoid re-downloading
+        cache_dir = os.environ.get("TRANSFORMERS_CACHE", "./model_cache")
+        os.makedirs(cache_dir, exist_ok=True)
+        # Load tokenizer first (faster)
+        logger.info("📝 Loading tokenizer...")
+        _tokenizer = AutoTokenizer.from_pretrained(
+            model_id,
+            trust_remote_code=config["trust_remote_code"],
+            cache_dir=cache_dir,
+            use_fast=True,  # Use fast tokenizer if available
+        )
+        # Load model with optimizations
+        logger.info("🧠 Loading model...")
+        _model = AutoModelForCausalLM.from_pretrained(
+            model_id,
+            trust_remote_code=config["trust_remote_code"],
+            torch_dtype=config["torch_dtype"],
+            device_map=config["device_map"],
+            low_cpu_mem_usage=config["low_cpu_mem_usage"],
+            cache_dir=cache_dir,
+            offload_folder="offload",
+             offload_state_dict=True
+        )
+        # Set to evaluation mode
         _model.eval()
         _model_loaded = True
+        logger.info("✅ Model loaded successfully!")
         return _tokenizer, _model
     except Exception as e:
         logger.error(f"❌ Failed to load model: {e}")
         raise
 async def load_model_async():
+    """Asynchronous model loading"""
+    global _model_loading
     if _model_loaded:
+        return _tokenizer, _model
+    if _model_loading:
+        # Wait for ongoing loading to complete
+        while _model_loading and not _model_loaded:
+            await asyncio.sleep(0.1)
+        return _tokenizer, _model
+    _model_loading = True
     try:
+        # Run model loading in thread pool to avoid blocking
+        loop = asyncio.get_event_loop()
+        with ThreadPoolExecutor(max_workers=1) as executor:
+            tokenizer, model = await loop.run_in_executor(
+                executor, load_model_sync
+            )
+        return tokenizer, model
+    finally:
+        _model_loading = False
 def get_model():
+    """Get the loaded model (for synchronous access)"""
     if not _model_loaded:
+        return load_model_sync()
     return _tokenizer, _model
 def is_model_loaded():
+    """Check if model is loaded"""
     return _model_loaded
 def get_model_info():
+    """Get model information without loading"""
+    config = get_model_config()
     return {
+        "model_id": config["model_id"],
         "loaded": _model_loaded,
+        "loading": _model_loading,
     }