kovacsvi commited on
Commit
d68fe8b
·
1 Parent(s): 690a8d2

preload models

Browse files
Files changed (2) hide show
  1. interfaces/cap.py +5 -3
  2. utils.py +5 -2
interfaces/cap.py CHANGED
@@ -12,6 +12,7 @@ from huggingface_hub import HfApi
12
  from label_dicts import CAP_NUM_DICT, CAP_LABEL_NAMES
13
 
14
  from .utils import is_disk_full, release_model
 
15
 
16
  HF_TOKEN = os.environ["hf_read"]
17
 
@@ -83,11 +84,12 @@ def build_huggingface_path(language: str, domain: str):
83
  else:
84
  return "poltextlab/xlm-roberta-large-pooled-cap"
85
 
86
- #@spaces.GPU
87
  def predict(text, model_id, tokenizer_id):
88
  device = torch.device("cpu")
89
- model = AutoModelForSequenceClassification.from_pretrained(model_id, device_map="auto", token=HF_TOKEN).to(device)
90
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_id)
 
 
91
 
92
  inputs = tokenizer(text,
93
  max_length=256,
 
12
  from label_dicts import CAP_NUM_DICT, CAP_LABEL_NAMES
13
 
14
  from .utils import is_disk_full, release_model
15
+ from ..utils import MODELS_PRELOADED, TOKENIZERS_PRELOADED
16
 
17
  HF_TOKEN = os.environ["hf_read"]
18
 
 
84
  else:
85
  return "poltextlab/xlm-roberta-large-pooled-cap"
86
 
 
87
  def predict(text, model_id, tokenizer_id):
88
  device = torch.device("cpu")
89
+
90
+ print(MODELS_PRELOADED, TOKENIZERS_PRELOADED)
91
+ model = MODELS_PRELOADED[model_id].to(device)
92
+ tokenizer = TOKENIZERS_PRELOADED[tokenizer_id]
93
 
94
  inputs = tokenizer(text,
95
  max_length=256,
utils.py CHANGED
@@ -25,6 +25,9 @@ from interfaces.ontolisst import build_huggingface_path as hf_ontolisst_path
25
 
26
  from huggingface_hub import scan_cache_dir
27
 
 
 
 
28
  HF_TOKEN = os.environ["hf_read"]
29
 
30
  # should be a temporary solution
@@ -54,9 +57,9 @@ tokenizers = ["xlm-roberta-large"]
54
 
55
  def download_hf_models():
56
  for model_id in models:
57
- AutoModelForSequenceClassification.from_pretrained(model_id, device_map="auto", token=HF_TOKEN)
58
  for tokenizer_id in tokenizers:
59
- AutoTokenizer.from_pretrained(tokenizer_id)
60
 
61
 
62
  def df_h():
 
25
 
26
  from huggingface_hub import scan_cache_dir
27
 
28
+ MODELS_PRELOADED = []
29
+ TOKENIZERS_PRELOADED = []
30
+
31
  HF_TOKEN = os.environ["hf_read"]
32
 
33
  # should be a temporary solution
 
57
 
58
  def download_hf_models():
59
  for model_id in models:
60
+ MODELS_PRELOADED[model_id] = AutoModelForSequenceClassification.from_pretrained(model_id, device_map="auto", token=HF_TOKEN)
61
  for tokenizer_id in tokenizers:
62
+ TOKENIZERS_PRELOADED[tokenizer_id] = AutoTokenizer.from_pretrained(tokenizer_id)
63
 
64
 
65
  def df_h():