Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaya
commited on
Commit
·
9469eae
1
Parent(s):
0e60add
Improve model size calculation
Browse files
backend/app/utils/model_validation.py
CHANGED
|
@@ -5,10 +5,12 @@ import re
|
|
| 5 |
from typing import Tuple, Optional, Dict, Any
|
| 6 |
import aiohttp
|
| 7 |
from huggingface_hub import HfApi, ModelCard, hf_hub_download
|
|
|
|
| 8 |
from transformers import AutoConfig, AutoTokenizer
|
| 9 |
from app.config.base import HF_TOKEN, API
|
| 10 |
from app.utils.logging import LogFormatter
|
| 11 |
|
|
|
|
| 12 |
logger = logging.getLogger(__name__)
|
| 13 |
|
| 14 |
class ModelValidator:
|
|
@@ -54,78 +56,78 @@ class ModelValidator:
|
|
| 54 |
logger.error(LogFormatter.error(error_msg, e))
|
| 55 |
return False, str(e), None
|
| 56 |
|
| 57 |
-
async def get_safetensors_metadata(self, model_id: str,
|
| 58 |
"""Get metadata from a safetensors file"""
|
| 59 |
try:
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
except Exception as e:
|
| 72 |
-
logger.
|
| 73 |
return None
|
| 74 |
-
|
| 75 |
async def get_model_size(
|
| 76 |
self,
|
| 77 |
model_info: Any,
|
| 78 |
precision: str,
|
| 79 |
-
base_model: str
|
|
|
|
| 80 |
) -> Tuple[Optional[float], Optional[str]]:
|
| 81 |
"""Get model size in billions of parameters"""
|
| 82 |
try:
|
| 83 |
logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}"))
|
| 84 |
-
|
| 85 |
# Check if model is adapter
|
| 86 |
is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename'))
|
| 87 |
-
|
| 88 |
# Try to get size from safetensors first
|
| 89 |
model_size = None
|
| 90 |
-
|
| 91 |
if is_adapter and base_model:
|
| 92 |
# For adapters, we need both adapter and base model sizes
|
| 93 |
-
adapter_meta = await self.get_safetensors_metadata(model_info.id,
|
| 94 |
-
base_meta = await self.get_safetensors_metadata(base_model)
|
| 95 |
-
|
| 96 |
if adapter_meta and base_meta:
|
| 97 |
-
adapter_size = sum(
|
| 98 |
-
base_size = sum(
|
| 99 |
model_size = (adapter_size + base_size) / (2 * 1e9) # Convert to billions, assuming float16
|
| 100 |
else:
|
| 101 |
# For regular models, just get the model size
|
| 102 |
-
meta = await self.get_safetensors_metadata(model_info.id)
|
| 103 |
if meta:
|
| 104 |
-
total_params = sum(
|
| 105 |
model_size = total_params / (2 * 1e9) # Convert to billions, assuming float16
|
| 106 |
-
|
| 107 |
if model_size is None:
|
| 108 |
-
#
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
if size_match:
|
| 113 |
-
size_str = size_match.group(1)
|
| 114 |
-
model_size = float(size_str)
|
| 115 |
-
else:
|
| 116 |
-
return None, "Could not determine model size from safetensors or model name"
|
| 117 |
-
|
| 118 |
# Adjust size for GPTQ models
|
| 119 |
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
|
| 120 |
model_size = round(size_factor * model_size, 3)
|
| 121 |
-
|
| 122 |
logger.info(LogFormatter.success(f"Model size: {model_size}B parameters"))
|
| 123 |
return model_size, None
|
| 124 |
-
|
| 125 |
except Exception as e:
|
| 126 |
-
|
| 127 |
-
logger.error(LogFormatter.error(error_msg, e))
|
| 128 |
return None, str(e)
|
|
|
|
| 129 |
|
| 130 |
async def check_chat_template(
|
| 131 |
self,
|
|
|
|
| 5 |
from typing import Tuple, Optional, Dict, Any
|
| 6 |
import aiohttp
|
| 7 |
from huggingface_hub import HfApi, ModelCard, hf_hub_download
|
| 8 |
+
from huggingface_hub import hf_api
|
| 9 |
from transformers import AutoConfig, AutoTokenizer
|
| 10 |
from app.config.base import HF_TOKEN, API
|
| 11 |
from app.utils.logging import LogFormatter
|
| 12 |
|
| 13 |
+
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
| 16 |
class ModelValidator:
|
|
|
|
| 56 |
logger.error(LogFormatter.error(error_msg, e))
|
| 57 |
return False, str(e), None
|
| 58 |
|
| 59 |
+
async def get_safetensors_metadata(self, model_id: str, is_adapter: bool = False, revision: str = "main") -> Optional[Dict]:
|
| 60 |
"""Get metadata from a safetensors file"""
|
| 61 |
try:
|
| 62 |
+
if is_adapter:
|
| 63 |
+
metadata = await asyncio.to_thread(
|
| 64 |
+
hf_api.parse_safetensors_file_metadata,
|
| 65 |
+
model_id,
|
| 66 |
+
"adapter_model.safetensors",
|
| 67 |
+
token=self.token,
|
| 68 |
+
revision=revision,
|
| 69 |
+
)
|
| 70 |
+
else:
|
| 71 |
+
metadata = await asyncio.to_thread(
|
| 72 |
+
hf_api.get_safetensors_metadata,
|
| 73 |
+
repo_id=model_id,
|
| 74 |
+
token=self.token,
|
| 75 |
+
revision=revision,
|
| 76 |
+
)
|
| 77 |
+
return metadata
|
| 78 |
+
|
| 79 |
except Exception as e:
|
| 80 |
+
logger.error(f"Failed to get safetensors metadata: {str(e)}")
|
| 81 |
return None
|
| 82 |
+
|
| 83 |
async def get_model_size(
|
| 84 |
self,
|
| 85 |
model_info: Any,
|
| 86 |
precision: str,
|
| 87 |
+
base_model: str,
|
| 88 |
+
revision: str
|
| 89 |
) -> Tuple[Optional[float], Optional[str]]:
|
| 90 |
"""Get model size in billions of parameters"""
|
| 91 |
try:
|
| 92 |
logger.info(LogFormatter.info(f"Checking model size for {model_info.modelId}"))
|
| 93 |
+
|
| 94 |
# Check if model is adapter
|
| 95 |
is_adapter = any(s.rfilename == "adapter_config.json" for s in model_info.siblings if hasattr(s, 'rfilename'))
|
| 96 |
+
|
| 97 |
# Try to get size from safetensors first
|
| 98 |
model_size = None
|
| 99 |
+
|
| 100 |
if is_adapter and base_model:
|
| 101 |
# For adapters, we need both adapter and base model sizes
|
| 102 |
+
adapter_meta = await self.get_safetensors_metadata(model_info.id, is_adapter=True, revision=revision)
|
| 103 |
+
base_meta = await self.get_safetensors_metadata(base_model, revision="main")
|
| 104 |
+
|
| 105 |
if adapter_meta and base_meta:
|
| 106 |
+
adapter_size = sum(adapter_meta.parameter_count.values())
|
| 107 |
+
base_size = sum(base_meta.parameter_count.values())
|
| 108 |
model_size = (adapter_size + base_size) / (2 * 1e9) # Convert to billions, assuming float16
|
| 109 |
else:
|
| 110 |
# For regular models, just get the model size
|
| 111 |
+
meta = await self.get_safetensors_metadata(model_info.id, revision=revision)
|
| 112 |
if meta:
|
| 113 |
+
total_params = sum(meta.parameter_count.values())
|
| 114 |
model_size = total_params / (2 * 1e9) # Convert to billions, assuming float16
|
| 115 |
+
|
| 116 |
if model_size is None:
|
| 117 |
+
# If model size could not be determined, return an error
|
| 118 |
+
return None, "Model size could not be determined"
|
| 119 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 120 |
# Adjust size for GPTQ models
|
| 121 |
size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.id.lower()) else 1
|
| 122 |
model_size = round(size_factor * model_size, 3)
|
| 123 |
+
|
| 124 |
logger.info(LogFormatter.success(f"Model size: {model_size}B parameters"))
|
| 125 |
return model_size, None
|
| 126 |
+
|
| 127 |
except Exception as e:
|
| 128 |
+
logger.error(LogFormatter.error(f"Error while determining model size: {e}"))
|
|
|
|
| 129 |
return None, str(e)
|
| 130 |
+
|
| 131 |
|
| 132 |
async def check_chat_template(
|
| 133 |
self,
|