Spaces:

AkashDataScience
/

GPT-2

Sleeping

AkashDataScience commited on Jul 3, 2024

Commit

6c3debe

1 Parent(s): 59bc37f

Loading model properly

Files changed (1) hide show

app.py CHANGED Viewed

@@ -3,7 +3,6 @@ import tiktoken
 import gradio as gr
 import torch.nn.functional as F
 from model import GPT, GPTConfig
-torch._dynamo.reset()
 device = 'cpu'
 if torch.cuda.is_available():
@@ -12,10 +11,14 @@ elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
     device = "mps"
 model = GPT(GPTConfig())
-model.load_state_dict(torch.load("gpt2.pt", map_location=torch.device(device)))
 model.to(device)
-model = torch.compile(model, fullgraph=True, backend="cudagraphs")
 enc = tiktoken.get_encoding('gpt2')

 import gradio as gr
 import torch.nn.functional as F
 from model import GPT, GPTConfig
 device = 'cpu'
 if torch.cuda.is_available():
     device = "mps"
 model = GPT(GPTConfig())
+ckpt = torch.load("gpt2.pt", map_location=torch.device(device))
+unwanted_prefix = '_orig_mod.'
+for k,v in list(ckpt.items()):
+    if k.startswith(unwanted_prefix):
+        ckpt[k[len(unwanted_prefix):]] = ckpt.pop(k)
+model.load_state_dict(ckpt)
 model.to(device)
 enc = tiktoken.get_encoding('gpt2')