Spaces:

pdarleyjr
/

iplc-t5-clinical

Sleeping

pdarleyjr commited on Jan 31

Commit

9bad572

1 Parent(s): 2e8b75e

Fix device placement and memory handling

Files changed (1) hide show

app.py CHANGED Viewed

@@ -83,17 +83,19 @@ class ModelManager:
             # Load the fine-tuned model
             logger.info("Loading fine-tuned model (this may take a few minutes)...")
             self.model = T5ForConditionalGeneration.from_pretrained(
                 "pdarleyjr/iplc-t5-model",
                 config=config,
-                device_map="auto",
-                torch_dtype=torch.float16,
                 low_cpu_mem_usage=True
-            )
             logger.success("Model loaded successfully")
             # Prepare model with accelerator
-            self.model = self.accelerator.prepare(self.model)
             logger.success("Model prepared with accelerator")
             # Log final memory usage
@@ -173,7 +175,10 @@ async def predict(request: PredictRequest) -> JSONResponse:
         # Generate summary with error handling
         try:
-            with model_manager.accelerator.autocast():
                 outputs = model_manager.model.generate(
                     input_ids,
                     max_length=256,

             # Load the fine-tuned model
             logger.info("Loading fine-tuned model (this may take a few minutes)...")
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+            logger.info(f"Using device: {device}")
             self.model = T5ForConditionalGeneration.from_pretrained(
                 "pdarleyjr/iplc-t5-model",
                 config=config,
+                torch_dtype=torch.float16 if device == "cuda" else torch.float32,
                 low_cpu_mem_usage=True
+            ).to(device)
             logger.success("Model loaded successfully")
             # Prepare model with accelerator
+            self.model = self.accelerator.prepare_model(self.model)
             logger.success("Model prepared with accelerator")
             # Log final memory usage
         # Generate summary with error handling
         try:
+            device = next(model_manager.model.parameters()).device
+            input_ids = input_ids.to(device)
+            with torch.no_grad(), model_manager.accelerator.autocast():
                 outputs = model_manager.model.generate(
                     input_ids,
                     max_length=256,