Spaces:
Sleeping
Sleeping
Samuel Thomas
commited on
Commit
·
17a32cd
1
Parent(s):
f2a058a
remove cuda
Browse files
tools.py
CHANGED
@@ -807,9 +807,9 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
|
|
807 |
new_state = state.copy() # Start with a copy of the input state
|
808 |
|
809 |
try:
|
810 |
-
if torch.cuda.is_available():
|
811 |
-
|
812 |
-
|
813 |
|
814 |
print(f"Invoking LLM with {len(messages_for_llm)} messages.")
|
815 |
# This is where you call your actual LLM
|
@@ -846,8 +846,8 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
|
|
846 |
print("🚨 CUDA OOM detected during LLM call! Implementing emergency cleanup...")
|
847 |
error_message_content = f"LLM failed due to Out of Memory: {str(e)}."
|
848 |
try:
|
849 |
-
if torch.cuda.is_available():
|
850 |
-
|
851 |
gc.collect()
|
852 |
except Exception as cleanup_e:
|
853 |
print(f"Emergency OOM cleanup failed: {cleanup_e}")
|
@@ -859,9 +859,10 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
|
|
859 |
new_state["done"] = True # Mark as done to prevent loops on LLM failure
|
860 |
finally:
|
861 |
try:
|
862 |
-
|
863 |
-
|
864 |
-
|
|
|
865 |
except Exception:
|
866 |
pass # Avoid error in cleanup hiding the main error
|
867 |
|
@@ -1289,9 +1290,9 @@ def call_tool_with_memory_management(state: AgentState) -> AgentState:
|
|
1289 |
# Clear CUDA cache before processing
|
1290 |
try:
|
1291 |
import torch
|
1292 |
-
if torch.cuda.is_available():
|
1293 |
-
|
1294 |
-
|
1295 |
except:
|
1296 |
pass
|
1297 |
|
@@ -1374,8 +1375,8 @@ def call_tool_with_memory_management(state: AgentState) -> AgentState:
|
|
1374 |
# Clear CUDA cache after processing
|
1375 |
try:
|
1376 |
import torch
|
1377 |
-
if torch.cuda.is_available():
|
1378 |
-
|
1379 |
except:
|
1380 |
pass
|
1381 |
|
@@ -1671,8 +1672,8 @@ def run_agent(myagent, state: AgentState):
|
|
1671 |
#del hf_pipe
|
1672 |
#del model_vqa
|
1673 |
#del processor_vqa
|
1674 |
-
torch.cuda.empty_cache()
|
1675 |
-
torch.cuda.ipc_collect()
|
1676 |
gc.collect()
|
1677 |
print("Released GPU memory after FINAL ANSWER.")
|
1678 |
# Re-initialize for the next run
|
|
|
807 |
new_state = state.copy() # Start with a copy of the input state
|
808 |
|
809 |
try:
|
810 |
+
#if torch.cuda.is_available():
|
811 |
+
# torch.cuda.empty_cache()
|
812 |
+
# print(f"🧹 Pre-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
|
813 |
|
814 |
print(f"Invoking LLM with {len(messages_for_llm)} messages.")
|
815 |
# This is where you call your actual LLM
|
|
|
846 |
print("🚨 CUDA OOM detected during LLM call! Implementing emergency cleanup...")
|
847 |
error_message_content = f"LLM failed due to Out of Memory: {str(e)}."
|
848 |
try:
|
849 |
+
#if torch.cuda.is_available():
|
850 |
+
# torch.cuda.empty_cache()
|
851 |
gc.collect()
|
852 |
except Exception as cleanup_e:
|
853 |
print(f"Emergency OOM cleanup failed: {cleanup_e}")
|
|
|
859 |
new_state["done"] = True # Mark as done to prevent loops on LLM failure
|
860 |
finally:
|
861 |
try:
|
862 |
+
pass
|
863 |
+
#if torch.cuda.is_available():
|
864 |
+
# torch.cuda.empty_cache()
|
865 |
+
# print(f"🧹 Post-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
|
866 |
except Exception:
|
867 |
pass # Avoid error in cleanup hiding the main error
|
868 |
|
|
|
1290 |
# Clear CUDA cache before processing
|
1291 |
try:
|
1292 |
import torch
|
1293 |
+
#if torch.cuda.is_available():
|
1294 |
+
# torch.cuda.empty_cache()
|
1295 |
+
# print(f"🧹 Cleared CUDA cache. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
|
1296 |
except:
|
1297 |
pass
|
1298 |
|
|
|
1375 |
# Clear CUDA cache after processing
|
1376 |
try:
|
1377 |
import torch
|
1378 |
+
#if torch.cuda.is_available():
|
1379 |
+
# torch.cuda.empty_cache()
|
1380 |
except:
|
1381 |
pass
|
1382 |
|
|
|
1672 |
#del hf_pipe
|
1673 |
#del model_vqa
|
1674 |
#del processor_vqa
|
1675 |
+
#torch.cuda.empty_cache()
|
1676 |
+
#torch.cuda.ipc_collect()
|
1677 |
gc.collect()
|
1678 |
print("Released GPU memory after FINAL ANSWER.")
|
1679 |
# Re-initialize for the next run
|