Spaces:
Running
Running
kovacsvi
commited on
Commit
·
853f29a
1
Parent(s):
4441b75
cpu memory cleanup
Browse files- interfaces/cap.py +2 -2
- interfaces/cap_media_demo.py +2 -2
- interfaces/cap_minor.py +2 -2
- interfaces/cap_minor_media.py +4 -4
- interfaces/emotion.py +2 -2
- interfaces/emotion9.py +2 -2
- interfaces/illframes.py +2 -2
- interfaces/manifesto.py +2 -2
- interfaces/ontolisst.py +2 -2
- interfaces/sentiment.py +2 -2
- interfaces/utils.py +42 -14
interfaces/cap.py
CHANGED
@@ -11,7 +11,7 @@ from huggingface_hub import HfApi
|
|
11 |
|
12 |
from label_dicts import CAP_NUM_DICT, CAP_LABEL_NAMES
|
13 |
|
14 |
-
from .utils import is_disk_full,
|
15 |
|
16 |
HF_TOKEN = os.environ["hf_read"]
|
17 |
|
@@ -98,7 +98,7 @@ def predict(text, model_id, tokenizer_id):
|
|
98 |
|
99 |
with torch.no_grad():
|
100 |
logits = model(**inputs).logits
|
101 |
-
|
102 |
|
103 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
104 |
output_pred = {f"[{CAP_NUM_DICT[i]}] {CAP_LABEL_NAMES[CAP_NUM_DICT[i]]}": probs[i] for i in np.argsort(probs)[::-1]}
|
|
|
11 |
|
12 |
from label_dicts import CAP_NUM_DICT, CAP_LABEL_NAMES
|
13 |
|
14 |
+
from .utils import is_disk_full, release_model
|
15 |
|
16 |
HF_TOKEN = os.environ["hf_read"]
|
17 |
|
|
|
98 |
|
99 |
with torch.no_grad():
|
100 |
logits = model(**inputs).logits
|
101 |
+
release_model(model, model_id)
|
102 |
|
103 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
104 |
output_pred = {f"[{CAP_NUM_DICT[i]}] {CAP_LABEL_NAMES[CAP_NUM_DICT[i]]}": probs[i] for i in np.argsort(probs)[::-1]}
|
interfaces/cap_media_demo.py
CHANGED
@@ -10,7 +10,7 @@ from huggingface_hub import HfApi
|
|
10 |
|
11 |
from label_dicts import CAP_MEDIA_NUM_DICT, CAP_MEDIA_LABEL_NAMES
|
12 |
|
13 |
-
from .utils import is_disk_full,
|
14 |
|
15 |
HF_TOKEN = os.environ["hf_read"]
|
16 |
|
@@ -47,7 +47,7 @@ def predict(text, model_id, tokenizer_id):
|
|
47 |
|
48 |
with torch.no_grad():
|
49 |
logits = model(**inputs).logits
|
50 |
-
|
51 |
|
52 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
53 |
output_pred = {f"[{CAP_MEDIA_NUM_DICT[i]}] {CAP_MEDIA_LABEL_NAMES[CAP_MEDIA_NUM_DICT[i]]}": probs[i] for i in np.argsort(probs)[::-1]}
|
|
|
10 |
|
11 |
from label_dicts import CAP_MEDIA_NUM_DICT, CAP_MEDIA_LABEL_NAMES
|
12 |
|
13 |
+
from .utils import is_disk_full, release_model
|
14 |
|
15 |
HF_TOKEN = os.environ["hf_read"]
|
16 |
|
|
|
47 |
|
48 |
with torch.no_grad():
|
49 |
logits = model(**inputs).logits
|
50 |
+
release_model(model, model_id)
|
51 |
|
52 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
53 |
output_pred = {f"[{CAP_MEDIA_NUM_DICT[i]}] {CAP_MEDIA_LABEL_NAMES[CAP_MEDIA_NUM_DICT[i]]}": probs[i] for i in np.argsort(probs)[::-1]}
|
interfaces/cap_minor.py
CHANGED
@@ -10,7 +10,7 @@ from huggingface_hub import HfApi
|
|
10 |
|
11 |
from label_dicts import CAP_MIN_NUM_DICT, CAP_MIN_LABEL_NAMES, CAP_LABEL_NAMES
|
12 |
|
13 |
-
from .utils import is_disk_full,
|
14 |
from itertools import islice
|
15 |
|
16 |
def take(n, iterable):
|
@@ -79,7 +79,7 @@ def predict(text, model_id, tokenizer_id):
|
|
79 |
|
80 |
with torch.no_grad():
|
81 |
logits = model(**inputs).logits
|
82 |
-
|
83 |
|
84 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
85 |
output_pred = {f"[{'999' if str(CAP_MIN_NUM_DICT[i]) == '999' else str(CAP_MIN_NUM_DICT[i])[:-2]}]{convert_minor_to_major(CAP_MIN_NUM_DICT[i])} [{CAP_MIN_NUM_DICT[i]}]{CAP_MIN_LABEL_NAMES[CAP_MIN_NUM_DICT[i]]}": probs[i] for i in np.argsort(probs)[::-1]}
|
|
|
10 |
|
11 |
from label_dicts import CAP_MIN_NUM_DICT, CAP_MIN_LABEL_NAMES, CAP_LABEL_NAMES
|
12 |
|
13 |
+
from .utils import is_disk_full, release_model
|
14 |
from itertools import islice
|
15 |
|
16 |
def take(n, iterable):
|
|
|
79 |
|
80 |
with torch.no_grad():
|
81 |
logits = model(**inputs).logits
|
82 |
+
release_model(model, model_id)
|
83 |
|
84 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
85 |
output_pred = {f"[{'999' if str(CAP_MIN_NUM_DICT[i]) == '999' else str(CAP_MIN_NUM_DICT[i])[:-2]}]{convert_minor_to_major(CAP_MIN_NUM_DICT[i])} [{CAP_MIN_NUM_DICT[i]}]{CAP_MIN_LABEL_NAMES[CAP_MIN_NUM_DICT[i]]}": probs[i] for i in np.argsort(probs)[::-1]}
|
interfaces/cap_minor_media.py
CHANGED
@@ -15,7 +15,7 @@ from label_dicts import (CAP_MEDIA_NUM_DICT, CAP_MEDIA_LABEL_NAMES,
|
|
15 |
CAP_MIN_NUM_DICT, CAP_MIN_LABEL_NAMES,
|
16 |
CAP_MIN_MEDIA_NUM_DICT)
|
17 |
|
18 |
-
from .utils import is_disk_full,
|
19 |
|
20 |
HF_TOKEN = os.environ["hf_read"]
|
21 |
|
@@ -116,8 +116,8 @@ def predict(text, major_model_id, minor_model_id, tokenizer_id, HF_TOKEN=None):
|
|
116 |
minor_logits = minor_model(**inputs).logits
|
117 |
minor_probs = F.softmax(minor_logits, dim=-1)
|
118 |
|
119 |
-
|
120 |
-
|
121 |
|
122 |
print(minor_probs) # debug
|
123 |
# Restrict to valid minor codes
|
@@ -162,7 +162,7 @@ def predict_flat(text, model_id, tokenizer_id, HF_TOKEN=None):
|
|
162 |
|
163 |
with torch.no_grad():
|
164 |
logits = model(**inputs).logits
|
165 |
-
|
166 |
|
167 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
168 |
top_indices = np.argsort(probs)[::-1][:10]
|
|
|
15 |
CAP_MIN_NUM_DICT, CAP_MIN_LABEL_NAMES,
|
16 |
CAP_MIN_MEDIA_NUM_DICT)
|
17 |
|
18 |
+
from .utils import is_disk_full, release_model
|
19 |
|
20 |
HF_TOKEN = os.environ["hf_read"]
|
21 |
|
|
|
116 |
minor_logits = minor_model(**inputs).logits
|
117 |
minor_probs = F.softmax(minor_logits, dim=-1)
|
118 |
|
119 |
+
release_model(major_model, major_model_id)
|
120 |
+
release_model(minor_model, minor_model_id)
|
121 |
|
122 |
print(minor_probs) # debug
|
123 |
# Restrict to valid minor codes
|
|
|
162 |
|
163 |
with torch.no_grad():
|
164 |
logits = model(**inputs).logits
|
165 |
+
release_model(model, model_id)
|
166 |
|
167 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
168 |
top_indices = np.argsort(probs)[::-1][:10]
|
interfaces/emotion.py
CHANGED
@@ -9,7 +9,7 @@ from huggingface_hub import HfApi
|
|
9 |
|
10 |
from label_dicts import MANIFESTO_LABEL_NAMES
|
11 |
|
12 |
-
from .utils import is_disk_full,
|
13 |
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
@@ -40,7 +40,7 @@ def predict(text, model_id, tokenizer_id):
|
|
40 |
|
41 |
with torch.no_grad():
|
42 |
logits = model(**inputs).logits
|
43 |
-
|
44 |
|
45 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
46 |
output_pred = {model.config.id2label[i]: probs[i] for i in np.argsort(probs)[::-1]}
|
|
|
9 |
|
10 |
from label_dicts import MANIFESTO_LABEL_NAMES
|
11 |
|
12 |
+
from .utils import is_disk_full, release_model
|
13 |
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
|
|
40 |
|
41 |
with torch.no_grad():
|
42 |
logits = model(**inputs).logits
|
43 |
+
release_model(model, model_id)
|
44 |
|
45 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
46 |
output_pred = {model.config.id2label[i]: probs[i] for i in np.argsort(probs)[::-1]}
|
interfaces/emotion9.py
CHANGED
@@ -9,7 +9,7 @@ from huggingface_hub import HfApi
|
|
9 |
|
10 |
from label_dicts import EMOTION9_LABEL_NAMES
|
11 |
|
12 |
-
from .utils import is_disk_full,
|
13 |
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
@@ -38,7 +38,7 @@ def predict(text, model_id, tokenizer_id):
|
|
38 |
|
39 |
with torch.no_grad():
|
40 |
logits = model(**inputs).logits
|
41 |
-
|
42 |
|
43 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
44 |
|
|
|
9 |
|
10 |
from label_dicts import EMOTION9_LABEL_NAMES
|
11 |
|
12 |
+
from .utils import is_disk_full, release_model
|
13 |
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
|
|
38 |
|
39 |
with torch.no_grad():
|
40 |
logits = model(**inputs).logits
|
41 |
+
release_model(model, model_id)
|
42 |
|
43 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
44 |
|
interfaces/illframes.py
CHANGED
@@ -10,7 +10,7 @@ from huggingface_hub import HfApi
|
|
10 |
|
11 |
from label_dicts import ILLFRAMES_MIGRATION_LABEL_NAMES, ILLFRAMES_COVID_LABEL_NAMES, ILLFRAMES_WAR_LABEL_NAMES
|
12 |
|
13 |
-
from .utils import is_disk_full,
|
14 |
|
15 |
HF_TOKEN = os.environ["hf_read"]
|
16 |
|
@@ -80,7 +80,7 @@ def predict(text, model_id, tokenizer_id, label_names):
|
|
80 |
|
81 |
with torch.no_grad():
|
82 |
logits = model(**inputs).logits
|
83 |
-
|
84 |
|
85 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
86 |
|
|
|
10 |
|
11 |
from label_dicts import ILLFRAMES_MIGRATION_LABEL_NAMES, ILLFRAMES_COVID_LABEL_NAMES, ILLFRAMES_WAR_LABEL_NAMES
|
12 |
|
13 |
+
from .utils import is_disk_full, release_model
|
14 |
|
15 |
HF_TOKEN = os.environ["hf_read"]
|
16 |
|
|
|
80 |
|
81 |
with torch.no_grad():
|
82 |
logits = model(**inputs).logits
|
83 |
+
release_model(model, model_id)
|
84 |
|
85 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
86 |
|
interfaces/manifesto.py
CHANGED
@@ -9,7 +9,7 @@ from huggingface_hub import HfApi
|
|
9 |
|
10 |
from label_dicts import MANIFESTO_LABEL_NAMES
|
11 |
|
12 |
-
from .utils import is_disk_full,
|
13 |
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
@@ -38,7 +38,7 @@ def predict(text, model_id, tokenizer_id):
|
|
38 |
|
39 |
with torch.no_grad():
|
40 |
logits = model(**inputs).logits
|
41 |
-
|
42 |
|
43 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
44 |
output_pred = {f"[{model.config.id2label[i]}] {MANIFESTO_LABEL_NAMES[int(model.config.id2label[i])]}": probs[i] for i in np.argsort(probs)[::-1]}
|
|
|
9 |
|
10 |
from label_dicts import MANIFESTO_LABEL_NAMES
|
11 |
|
12 |
+
from .utils import is_disk_full, release_model
|
13 |
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
|
|
38 |
|
39 |
with torch.no_grad():
|
40 |
logits = model(**inputs).logits
|
41 |
+
release_model(model, model_id)
|
42 |
|
43 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
44 |
output_pred = {f"[{model.config.id2label[i]}] {MANIFESTO_LABEL_NAMES[int(model.config.id2label[i])]}": probs[i] for i in np.argsort(probs)[::-1]}
|
interfaces/ontolisst.py
CHANGED
@@ -15,7 +15,7 @@ languages = [
|
|
15 |
|
16 |
from label_dicts import ONTOLISST_LABEL_NAMES
|
17 |
|
18 |
-
from .utils import is_disk_full,
|
19 |
|
20 |
# --- DEBUG ---
|
21 |
import shutil
|
@@ -67,7 +67,7 @@ def predict(text, model_id, tokenizer_id):
|
|
67 |
|
68 |
with torch.no_grad():
|
69 |
logits = model(**inputs).logits
|
70 |
-
|
71 |
|
72 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
73 |
predicted_class_id = probs.argmax()
|
|
|
15 |
|
16 |
from label_dicts import ONTOLISST_LABEL_NAMES
|
17 |
|
18 |
+
from .utils import is_disk_full, release_model
|
19 |
|
20 |
# --- DEBUG ---
|
21 |
import shutil
|
|
|
67 |
|
68 |
with torch.no_grad():
|
69 |
logits = model(**inputs).logits
|
70 |
+
release_model(model, model_id)
|
71 |
|
72 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
73 |
predicted_class_id = probs.argmax()
|
interfaces/sentiment.py
CHANGED
@@ -9,7 +9,7 @@ from huggingface_hub import HfApi
|
|
9 |
|
10 |
from label_dicts import MANIFESTO_LABEL_NAMES
|
11 |
|
12 |
-
from .utils import is_disk_full,
|
13 |
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
@@ -43,7 +43,7 @@ def predict(text, model_id, tokenizer_id):
|
|
43 |
|
44 |
with torch.no_grad():
|
45 |
logits = model(**inputs).logits
|
46 |
-
|
47 |
|
48 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
49 |
predicted_class_id = probs.argmax()
|
|
|
9 |
|
10 |
from label_dicts import MANIFESTO_LABEL_NAMES
|
11 |
|
12 |
+
from .utils import is_disk_full, release_model
|
13 |
|
14 |
HF_TOKEN = os.environ["hf_read"]
|
15 |
|
|
|
43 |
|
44 |
with torch.no_grad():
|
45 |
logits = model(**inputs).logits
|
46 |
+
release_model(model, model_id)
|
47 |
|
48 |
probs = torch.nn.functional.softmax(logits, dim=1).cpu().numpy().flatten()
|
49 |
predicted_class_id = probs.argmax()
|
interfaces/utils.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import shutil
|
2 |
import torch
|
|
|
3 |
import gc
|
4 |
|
5 |
|
@@ -15,25 +16,52 @@ def is_disk_full(min_free_space_in_GB=10):
|
|
15 |
return True
|
16 |
|
17 |
|
18 |
-
def
|
19 |
"""
|
20 |
-
|
21 |
-
and clearing PyTorch's CUDA cache.
|
22 |
|
23 |
Args:
|
24 |
-
|
25 |
-
label
|
26 |
"""
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
|
|
33 |
gc.collect()
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
if torch.cuda.is_available():
|
37 |
-
print(f"[{label}] After deletion: {torch.cuda.memory_allocated() / 1e6:.2f} MB\n")
|
38 |
-
|
39 |
|
|
|
1 |
import shutil
|
2 |
import torch
|
3 |
+
import psutil
|
4 |
import gc
|
5 |
|
6 |
|
|
|
16 |
return True
|
17 |
|
18 |
|
19 |
+
def release_model(model=None, label='Model'):
|
20 |
"""
|
21 |
+
Releases CPU and GPU memory used by a model or pipeline.
|
|
|
22 |
|
23 |
Args:
|
24 |
+
model: The object to delete (e.g., model, pipeline).
|
25 |
+
label: String label for log output.
|
26 |
"""
|
27 |
+
using_cuda = torch.cuda.is_available()
|
28 |
+
was_cuda = False
|
29 |
+
|
30 |
+
# CPU memory before
|
31 |
+
process = psutil.Process(os.getpid())
|
32 |
+
mem_cpu_before = process.memory_info().rss / 1e6 # MB
|
33 |
+
|
34 |
+
if using_cuda:
|
35 |
+
mem_gpu_before = torch.cuda.memory_allocated()
|
36 |
+
print(f"\n[{label}] GPU memory before release: {mem_gpu_before:.2f} MB")
|
37 |
+
|
38 |
+
print(f"[{label}] CPU memory before release: {mem_cpu_before:.2f} MB")
|
39 |
+
|
40 |
+
# Try to detect if model was on CUDA
|
41 |
+
if model is not None:
|
42 |
+
try:
|
43 |
+
if hasattr(model, 'parameters'):
|
44 |
+
was_cuda = any(p.is_cuda for p in model.parameters())
|
45 |
+
except Exception as e:
|
46 |
+
print(f"[{label}] Could not check device: {e}")
|
47 |
+
del model
|
48 |
|
49 |
+
# Garbage collection and cache clearing
|
50 |
gc.collect()
|
51 |
+
if using_cuda:
|
52 |
+
if was_cuda:
|
53 |
+
torch.cuda.empty_cache()
|
54 |
+
else:
|
55 |
+
print(f"[{label}] ⚠️ Model was not using CUDA, but CUDA is available.")
|
56 |
+
|
57 |
+
# CPU memory after
|
58 |
+
mem_cpu_after = process.memory_info().rss / 1e6 # MB
|
59 |
+
print(f"[{label}] CPU memory after release: {mem_cpu_after:.2f} MB")
|
60 |
+
|
61 |
+
if using_cuda:
|
62 |
+
mem_gpu_after = torch.cuda.memory_allocated()
|
63 |
+
print(f"[{label}] GPU memory after release: {mem_gpu_after:.2f} MB\n")
|
64 |
+
else:
|
65 |
+
print(f"[{label}] CUDA not available — GPU memory not tracked.\n")
|
66 |
|
|
|
|
|
|
|
67 |
|