Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -21,7 +21,7 @@ except ImportError:
|
|
21 |
# --- Configuration ---
|
22 |
# Model path is set to sojka
|
23 |
MODEL_PATH = os.getenv("MODEL_PATH", "AndromedaPL/sojka")
|
24 |
-
TOKENIZER_PATH = os.getenv("
|
25 |
|
26 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
27 |
LABELS = ["self-harm", "hate", "vulgar", "sex", "crime"]
|
@@ -45,9 +45,10 @@ logger = logging.getLogger(__name__)
|
|
45 |
|
46 |
def load_model_and_tokenizer(model_path: str, tokenizer_path: str, device: str) -> Tuple[AutoModelForSequenceClassification, AutoTokenizer]:
|
47 |
"""Load the trained model and tokenizer"""
|
48 |
-
logger.info(f"Loading
|
49 |
|
50 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, use_fast=True)
|
|
|
51 |
|
52 |
if tokenizer.pad_token is None:
|
53 |
if tokenizer.eos_token:
|
@@ -57,6 +58,8 @@ def load_model_and_tokenizer(model_path: str, tokenizer_path: str, device: str)
|
|
57 |
|
58 |
tokenizer.truncation_side = "right"
|
59 |
|
|
|
|
|
60 |
model_load_kwargs = {
|
61 |
"torch_dtype": torch.float16 if device == 'cuda' else torch.float32,
|
62 |
"device_map": 'auto' if device == 'cuda' else None,
|
@@ -92,7 +95,7 @@ try:
|
|
92 |
model, tokenizer = load_model_and_tokenizer(MODEL_PATH, TOKENIZER_PATH, DEVICE)
|
93 |
model_loaded = True
|
94 |
except Exception as e:
|
95 |
-
logger.error(f"FATAL: Failed to load the model from {MODEL_PATH}: {e}")
|
96 |
model, tokenizer, model_loaded = None, None, False
|
97 |
|
98 |
def predict(text: str) -> Dict[str, Any]:
|
|
|
21 |
# --- Configuration ---
|
22 |
# Model path is set to sojka
|
23 |
MODEL_PATH = os.getenv("MODEL_PATH", "AndromedaPL/sojka")
|
24 |
+
TOKENIZER_PATH = os.getenv("TOKENIZER_PATH", "sdadas/mmlw-roberta-base")
|
25 |
|
26 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
27 |
LABELS = ["self-harm", "hate", "vulgar", "sex", "crime"]
|
|
|
45 |
|
46 |
def load_model_and_tokenizer(model_path: str, tokenizer_path: str, device: str) -> Tuple[AutoModelForSequenceClassification, AutoTokenizer]:
|
47 |
"""Load the trained model and tokenizer"""
|
48 |
+
logger.info(f"Loading tokenizer from {tokenizer_path}")
|
49 |
|
50 |
tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, use_fast=True)
|
51 |
+
logger.info(f"Tokenizer loaded: {tokenizer.name_or_path}")
|
52 |
|
53 |
if tokenizer.pad_token is None:
|
54 |
if tokenizer.eos_token:
|
|
|
58 |
|
59 |
tokenizer.truncation_side = "right"
|
60 |
|
61 |
+
logger.info(f"Loading model from {model_path}")
|
62 |
+
|
63 |
model_load_kwargs = {
|
64 |
"torch_dtype": torch.float16 if device == 'cuda' else torch.float32,
|
65 |
"device_map": 'auto' if device == 'cuda' else None,
|
|
|
95 |
model, tokenizer = load_model_and_tokenizer(MODEL_PATH, TOKENIZER_PATH, DEVICE)
|
96 |
model_loaded = True
|
97 |
except Exception as e:
|
98 |
+
logger.error(f"FATAL: Failed to load the model from {MODEL_PATH} or tokenizer from {TOKENIZER_PATH}: {e}", e)
|
99 |
model, tokenizer, model_loaded = None, None, False
|
100 |
|
101 |
def predict(text: str) -> Dict[str, Any]:
|