Spaces:

ECCV2022
/

storydalle

Build error

App Files Files Community

adymaharana commited on Sep 14, 2022

Commit

1cac669

1 Parent(s): 77e955b

restart

Browse files

Files changed (4) hide show

app.py +6 -2
dalle/__pycache__/__init__.cpython-38.pyc +0 -0
dalle/models/__init__.py +15 -12
dalle/models/__pycache__/__init__.cpython-38.pyc +0 -0

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os, torch
 import gradio as gr
 import torchvision.utils as vutils
 import torchvision.transforms as transforms
@@ -68,6 +68,7 @@ def save_story_results(images, video_len=4, n_candidates=1, mask=None):
 def main(args):
     #device = 'cuda:0'
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
     model_url = 'https://drive.google.com/u/1/uc?id=1KAXVtE8lEE2Yc83VY7w6ycOOMkdWbmJo&export=sharing'
@@ -77,7 +78,7 @@ def main(args):
     #if not os.path.exists("./ckpt/25.pth"):
     #    gdown.download(model_url, quiet=False, use_cookies=False, output="./ckpt/25.pth")
     #    print("Downloaded checkpoint")
-    assert os.path.exists("./ckpt/25.pth")
     gdown.download(png_url, quiet=True, use_cookies=False, output="demo_pororo_good.png")
     if args.debug:
@@ -102,6 +103,9 @@ def main(args):
              transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]
         )
     def predict(caption_1, caption_2, caption_3, caption_4, source='Pororo', top_k=32, top_p=0.2, n_candidates=4,
                 supercondition=False):

+import os, sys, torch
 import gradio as gr
 import torchvision.utils as vutils
 import torchvision.transforms as transforms
 def main(args):
     #device = 'cuda:0'
     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    #device = torch.device('cpu')
     model_url = 'https://drive.google.com/u/1/uc?id=1KAXVtE8lEE2Yc83VY7w6ycOOMkdWbmJo&export=sharing'
     #if not os.path.exists("./ckpt/25.pth"):
     #    gdown.download(model_url, quiet=False, use_cookies=False, output="./ckpt/25.pth")
     #    print("Downloaded checkpoint")
+    #assert os.path.exists("./ckpt/25.pth")
     gdown.download(png_url, quiet=True, use_cookies=False, output="demo_pororo_good.png")
     if args.debug:
              transforms.Normalize([0.5, 0.5, 0.5], [0.5, 0.5, 0.5])]
         )
+    #torch.save(model, './ckpt/checkpoint.pt')
+    #sys.exit()
     def predict(caption_1, caption_2, caption_3, caption_4, source='Pororo', top_k=32, top_p=0.2, n_candidates=4,
                 supercondition=False):

dalle/__pycache__/__init__.cpython-38.pyc CHANGED Viewed

Binary files a/dalle/__pycache__/__init__.cpython-38.pyc and b/dalle/__pycache__/__init__.cpython-38.pyc differ

dalle/models/__init__.py CHANGED Viewed

@@ -23,6 +23,7 @@ from ..utils.utils import save_image
 from .tokenizer import build_tokenizer
 import numpy as np
 from .stage2.layers import CrossAttentionLayer
 _MODELS = {
     'minDALL-E/1.3B': 'https://arena.kakaocdn.net/brainrepo/models/minDALL-E/57b008f02ceaa02b779c8b7463143315/1.3B.tar.gz'
@@ -1191,7 +1192,9 @@ class StoryDalle(Dalle):
             print("Loaded tokenizer from finetuned checkpoint")
             print(model.cross_attention_idxs)
             print("Loading model from pretrained checkpoint %s" % args.model_name_or_path)
             # model.from_ckpt(args.model_name_or_path)
             try:
                 model.load_state_dict(torch.load(args.model_name_or_path, map_location=torch.device('cpu'))['state_dict'])
             except KeyError:
@@ -1248,9 +1251,9 @@ class StoryDalle(Dalle):
         #{"input_ids": batch, "labels": labels, 'src_attn': src_attn, 'tgt_attn':tgt_attn, 'src':src}
         with torch.no_grad():
-            with autocast(enabled=False):
-                codes = self.stage1.get_codes(images).detach()
-                src_codes = self.stage1.get_codes(src_images).detach()
         B, C, H, W = images.shape
@@ -1310,8 +1313,8 @@ class StoryDalle(Dalle):
         # Check if the encoding works as intended
         # print(self.tokenizer.decode_batch(tokens.tolist(), skip_special_tokens=True)[0])
-        tokens = tokens.to(device)
-        source = source.to(device)
         # print(tokens.shape, sent_embeds.shape, prompt.shape)
         B, L, _ = sent_embeds.shape
@@ -1322,8 +1325,8 @@ class StoryDalle(Dalle):
             prompt = sent_embeds
         pos_enc_prompt = get_positional_encoding(torch.arange(prompt.shape[1]).long().unsqueeze(0).expand(B*L, -1).to(self.device), mode='1d')
-        with autocast(enabled=False):
-            src_codes = self.stage1.get_codes(source).detach()
         src_codes = torch.repeat_interleave(src_codes, self.config.story.story_len, dim=0)
         print(tokens.shape, src_codes.shape, prompt.shape)
         if self.config.story.condition:
@@ -1378,8 +1381,8 @@ class StoryDalle(Dalle):
         # Check if the encoding works as intended
         # print(self.tokenizer.decode_batch(tokens.tolist(), skip_special_tokens=True)[0])
-        tokens = tokens.to(device)
-        source = source.to(device)
         # print(tokens.shape, sent_embeds.shape, prompt.shape)
         B, L, _ = sent_embeds.shape
@@ -1389,10 +1392,10 @@ class StoryDalle(Dalle):
         else:
             prompt = sent_embeds
         pos_enc_prompt = get_positional_encoding(
-            torch.arange(prompt.shape[1]).long().unsqueeze(0).expand(B * L, -1).to(self.device), mode='1d')
-        with autocast(enabled=False):
-            src_codes = self.stage1.get_codes(source).detach()
         # repeat inputs to adjust to n_candidates and story length
         src_codes = torch.repeat_interleave(src_codes, self.config.story.story_len * n_candidates, dim=0)

 from .tokenizer import build_tokenizer
 import numpy as np
 from .stage2.layers import CrossAttentionLayer
+from huggingface_hub import hf_hub_download
 _MODELS = {
     'minDALL-E/1.3B': 'https://arena.kakaocdn.net/brainrepo/models/minDALL-E/57b008f02ceaa02b779c8b7463143315/1.3B.tar.gz'
             print("Loaded tokenizer from finetuned checkpoint")
             print(model.cross_attention_idxs)
             print("Loading model from pretrained checkpoint %s" % args.model_name_or_path)
             # model.from_ckpt(args.model_name_or_path)
             try:
                 model.load_state_dict(torch.load(args.model_name_or_path, map_location=torch.device('cpu'))['state_dict'])
             except KeyError:
         #{"input_ids": batch, "labels": labels, 'src_attn': src_attn, 'tgt_attn':tgt_attn, 'src':src}
         with torch.no_grad():
+            #with autocast(enabled=False):
+            codes = self.stage1.get_codes(images).detach()
+            src_codes = self.stage1.get_codes(src_images).detach()
         B, C, H, W = images.shape
         # Check if the encoding works as intended
         # print(self.tokenizer.decode_batch(tokens.tolist(), skip_special_tokens=True)[0])
+        #tokens = tokens.to(device)
+        #source = source.to(device)
         # print(tokens.shape, sent_embeds.shape, prompt.shape)
         B, L, _ = sent_embeds.shape
             prompt = sent_embeds
         pos_enc_prompt = get_positional_encoding(torch.arange(prompt.shape[1]).long().unsqueeze(0).expand(B*L, -1).to(self.device), mode='1d')
+        #with autocast(enabled=False):
+        src_codes = self.stage1.get_codes(source).detach()
         src_codes = torch.repeat_interleave(src_codes, self.config.story.story_len, dim=0)
         print(tokens.shape, src_codes.shape, prompt.shape)
         if self.config.story.condition:
         # Check if the encoding works as intended
         # print(self.tokenizer.decode_batch(tokens.tolist(), skip_special_tokens=True)[0])
+        #tokens = tokens.to(device)
+        #source = source.to(device)
         # print(tokens.shape, sent_embeds.shape, prompt.shape)
         B, L, _ = sent_embeds.shape
         else:
             prompt = sent_embeds
         pos_enc_prompt = get_positional_encoding(
+            torch.arange(prompt.shape[1]).long().unsqueeze(0).expand(B * L, -1).to(tokens.device), mode='1d')
+        #with autocast(enabled=False):
+        src_codes = self.stage1.get_codes(source).detach()
         # repeat inputs to adjust to n_candidates and story length
         src_codes = torch.repeat_interleave(src_codes, self.config.story.story_len * n_candidates, dim=0)

dalle/models/__pycache__/__init__.cpython-38.pyc CHANGED Viewed

Binary files a/dalle/models/__pycache__/__init__.cpython-38.pyc and b/dalle/models/__pycache__/__init__.cpython-38.pyc differ