ejschwartz commited on
Commit
51b3a2c
·
1 Parent(s): 7197e11

More debugging

Browse files
Files changed (1) hide show
  1. app.py +16 -2
app.py CHANGED
@@ -41,7 +41,7 @@ tokenizer = AutoTokenizer.from_pretrained("bigcode/starcoderbase-3b")
41
  vardecoder_model = AutoModelForCausalLM.from_pretrained(
42
  "ejschwartz/resym-vardecoder",
43
  torch_dtype=torch.bfloat16,
44
- device_map="cuda",
45
  )
46
  print("Loaded vardecoder model successfully.")
47
 
@@ -50,12 +50,26 @@ print(f"Model dtype: {next(vardecoder_model.parameters()).dtype}")
50
  print(f"Model is meta: {next(vardecoder_model.parameters()).is_meta}")
51
  print(f"Model parameters: {sum(p.numel() for p in vardecoder_model.parameters() if p.requires_grad):,}")
52
 
 
 
 
 
 
 
53
 
54
  # Check memory after first model
55
  print(f"GPU memory after vardecoder:")
56
  print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
57
  print(f"Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
58
 
 
 
 
 
 
 
 
 
59
  # Add more detailed debugging before loading the second model
60
  try:
61
  logger.info("Loading fielddecoder model...")
@@ -67,7 +81,7 @@ try:
67
  fielddecoder_model = AutoModelForCausalLM.from_pretrained(
68
  "ejschwartz/resym-fielddecoder",
69
  torch_dtype=torch.bfloat16,
70
- device_map="cuda",
71
  )
72
  logger.info("Successfully loaded fielddecoder model")
73
  except Exception as e:
 
41
  vardecoder_model = AutoModelForCausalLM.from_pretrained(
42
  "ejschwartz/resym-vardecoder",
43
  torch_dtype=torch.bfloat16,
44
+ device_map="cpu",
45
  )
46
  print("Loaded vardecoder model successfully.")
47
 
 
50
  print(f"Model is meta: {next(vardecoder_model.parameters()).is_meta}")
51
  print(f"Model parameters: {sum(p.numel() for p in vardecoder_model.parameters() if p.requires_grad):,}")
52
 
53
+ # Check if parameters actually have data
54
+ sample_param = next(vardecoder_model.parameters())
55
+ print(f"Sample parameter shape: {sample_param.shape}")
56
+ print(f"Sample parameter requires_grad: {sample_param.requires_grad}")
57
+ print(f"Sample parameter data type: {type(sample_param.data)}")
58
+ print(f"Sample parameter storage: {sample_param.storage()}")
59
 
60
  # Check memory after first model
61
  print(f"GPU memory after vardecoder:")
62
  print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
63
  print(f"Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
64
 
65
+ # Try manual memory allocation test
66
+ test_tensor = torch.randn(1000, 1000, dtype=torch.bfloat16, device='cuda')
67
+ print(f"GPU memory after test tensor:")
68
+ print(f"Allocated: {torch.cuda.memory_allocated() / 1024**3:.2f} GB")
69
+ print(f"Reserved: {torch.cuda.memory_reserved() / 1024**3:.2f} GB")
70
+ del test_tensor
71
+ torch.cuda.empty_cache()
72
+
73
  # Add more detailed debugging before loading the second model
74
  try:
75
  logger.info("Loading fielddecoder model...")
 
81
  fielddecoder_model = AutoModelForCausalLM.from_pretrained(
82
  "ejschwartz/resym-fielddecoder",
83
  torch_dtype=torch.bfloat16,
84
+ device_map="cpu",
85
  )
86
  logger.info("Successfully loaded fielddecoder model")
87
  except Exception as e: