Samuel Thomas commited on
Commit
17a32cd
·
1 Parent(s): f2a058a

remove cuda

Browse files
Files changed (1) hide show
  1. tools.py +16 -15
tools.py CHANGED
@@ -807,9 +807,9 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
807
  new_state = state.copy() # Start with a copy of the input state
808
 
809
  try:
810
- if torch.cuda.is_available():
811
- torch.cuda.empty_cache()
812
- print(f"🧹 Pre-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
813
 
814
  print(f"Invoking LLM with {len(messages_for_llm)} messages.")
815
  # This is where you call your actual LLM
@@ -846,8 +846,8 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
846
  print("🚨 CUDA OOM detected during LLM call! Implementing emergency cleanup...")
847
  error_message_content = f"LLM failed due to Out of Memory: {str(e)}."
848
  try:
849
- if torch.cuda.is_available():
850
- torch.cuda.empty_cache()
851
  gc.collect()
852
  except Exception as cleanup_e:
853
  print(f"Emergency OOM cleanup failed: {cleanup_e}")
@@ -859,9 +859,10 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
859
  new_state["done"] = True # Mark as done to prevent loops on LLM failure
860
  finally:
861
  try:
862
- if torch.cuda.is_available():
863
- torch.cuda.empty_cache()
864
- print(f"🧹 Post-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
 
865
  except Exception:
866
  pass # Avoid error in cleanup hiding the main error
867
 
@@ -1289,9 +1290,9 @@ def call_tool_with_memory_management(state: AgentState) -> AgentState:
1289
  # Clear CUDA cache before processing
1290
  try:
1291
  import torch
1292
- if torch.cuda.is_available():
1293
- torch.cuda.empty_cache()
1294
- print(f"🧹 Cleared CUDA cache. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
1295
  except:
1296
  pass
1297
 
@@ -1374,8 +1375,8 @@ def call_tool_with_memory_management(state: AgentState) -> AgentState:
1374
  # Clear CUDA cache after processing
1375
  try:
1376
  import torch
1377
- if torch.cuda.is_available():
1378
- torch.cuda.empty_cache()
1379
  except:
1380
  pass
1381
 
@@ -1671,8 +1672,8 @@ def run_agent(myagent, state: AgentState):
1671
  #del hf_pipe
1672
  #del model_vqa
1673
  #del processor_vqa
1674
- torch.cuda.empty_cache()
1675
- torch.cuda.ipc_collect()
1676
  gc.collect()
1677
  print("Released GPU memory after FINAL ANSWER.")
1678
  # Re-initialize for the next run
 
807
  new_state = state.copy() # Start with a copy of the input state
808
 
809
  try:
810
+ #if torch.cuda.is_available():
811
+ # torch.cuda.empty_cache()
812
+ # print(f"🧹 Pre-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
813
 
814
  print(f"Invoking LLM with {len(messages_for_llm)} messages.")
815
  # This is where you call your actual LLM
 
846
  print("🚨 CUDA OOM detected during LLM call! Implementing emergency cleanup...")
847
  error_message_content = f"LLM failed due to Out of Memory: {str(e)}."
848
  try:
849
+ #if torch.cuda.is_available():
850
+ # torch.cuda.empty_cache()
851
  gc.collect()
852
  except Exception as cleanup_e:
853
  print(f"Emergency OOM cleanup failed: {cleanup_e}")
 
859
  new_state["done"] = True # Mark as done to prevent loops on LLM failure
860
  finally:
861
  try:
862
+ pass
863
+ #if torch.cuda.is_available():
864
+ # torch.cuda.empty_cache()
865
+ # print(f"🧹 Post-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
866
  except Exception:
867
  pass # Avoid error in cleanup hiding the main error
868
 
 
1290
  # Clear CUDA cache before processing
1291
  try:
1292
  import torch
1293
+ #if torch.cuda.is_available():
1294
+ # torch.cuda.empty_cache()
1295
+ # print(f"🧹 Cleared CUDA cache. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
1296
  except:
1297
  pass
1298
 
 
1375
  # Clear CUDA cache after processing
1376
  try:
1377
  import torch
1378
+ #if torch.cuda.is_available():
1379
+ # torch.cuda.empty_cache()
1380
  except:
1381
  pass
1382
 
 
1672
  #del hf_pipe
1673
  #del model_vqa
1674
  #del processor_vqa
1675
+ #torch.cuda.empty_cache()
1676
+ #torch.cuda.ipc_collect()
1677
  gc.collect()
1678
  print("Released GPU memory after FINAL ANSWER.")
1679
  # Re-initialize for the next run