HF_Final_Assignment_Template

Sleeping

App Files Files Community

Samuel Thomas commited on May 25

Commit

17a32cd

1 Parent(s): f2a058a

remove cuda

Browse files

Files changed (1) hide show

tools.py +16 -15

tools.py CHANGED Viewed

@@ -807,9 +807,9 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
     new_state = state.copy()  # Start with a copy of the input state
     try:
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            print(f"🧹 Pre-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
         print(f"Invoking LLM with {len(messages_for_llm)} messages.")
         # This is where you call your actual LLM
@@ -846,8 +846,8 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
             print("🚨 CUDA OOM detected during LLM call! Implementing emergency cleanup...")
             error_message_content = f"LLM failed due to Out of Memory: {str(e)}."
             try:
-                if torch.cuda.is_available():
-                    torch.cuda.empty_cache()
                 gc.collect()
             except Exception as cleanup_e:
                 print(f"Emergency OOM cleanup failed: {cleanup_e}")
@@ -859,9 +859,10 @@ def call_llm_with_memory_management(state: AgentState, llm_model) -> AgentState:
         new_state["done"] = True  # Mark as done to prevent loops on LLM failure
     finally:
         try:
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-                print(f"🧹 Post-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
         except Exception:
             pass # Avoid error in cleanup hiding the main error
@@ -1289,9 +1290,9 @@ def call_tool_with_memory_management(state: AgentState) -> AgentState:
     # Clear CUDA cache before processing
     try:
         import torch
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
-            print(f"🧹 Cleared CUDA cache. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
     except:
         pass
@@ -1374,8 +1375,8 @@ def call_tool_with_memory_management(state: AgentState) -> AgentState:
     # Clear CUDA cache after processing
     try:
         import torch
-        if torch.cuda.is_available():
-            torch.cuda.empty_cache()
     except:
         pass
@@ -1671,8 +1672,8 @@ def run_agent(myagent, state: AgentState):
         #del hf_pipe
         #del model_vqa
         #del processor_vqa
-        torch.cuda.empty_cache()
-        torch.cuda.ipc_collect()
         gc.collect()
         print("Released GPU memory after FINAL ANSWER.")
         # Re-initialize for the next run

     new_state = state.copy()  # Start with a copy of the input state
     try:
+        #if torch.cuda.is_available():
+        #    torch.cuda.empty_cache()
+        #    print(f"🧹 Pre-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
         print(f"Invoking LLM with {len(messages_for_llm)} messages.")
         # This is where you call your actual LLM
             print("🚨 CUDA OOM detected during LLM call! Implementing emergency cleanup...")
             error_message_content = f"LLM failed due to Out of Memory: {str(e)}."
             try:
+                #if torch.cuda.is_available():
+                #    torch.cuda.empty_cache()
                 gc.collect()
             except Exception as cleanup_e:
                 print(f"Emergency OOM cleanup failed: {cleanup_e}")
         new_state["done"] = True  # Mark as done to prevent loops on LLM failure
     finally:
         try:
+            pass
+            #if torch.cuda.is_available():
+            #    torch.cuda.empty_cache()
+            #    print(f"🧹 Post-LLM CUDA cache cleared. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
         except Exception:
             pass # Avoid error in cleanup hiding the main error
     # Clear CUDA cache before processing
     try:
         import torch
+        #if torch.cuda.is_available():
+        #    torch.cuda.empty_cache()
+        #    print(f"🧹 Cleared CUDA cache. Memory: {torch.cuda.memory_allocated()/1024**2:.1f}MB")
     except:
         pass
     # Clear CUDA cache after processing
     try:
         import torch
+        #if torch.cuda.is_available():
+        #    torch.cuda.empty_cache()
     except:
         pass
         #del hf_pipe
         #del model_vqa
         #del processor_vqa
+        #torch.cuda.empty_cache()
+        #torch.cuda.ipc_collect()
         gc.collect()
         print("Released GPU memory after FINAL ANSWER.")
         # Re-initialize for the next run