Spaces:

QHL067
/

CrossFlow

Running on Zero

App Files Files Community

QHL067 commited on Mar 15

Commit

485bed7

1 Parent(s): bed6d07

512 model

Browse files

Files changed (1) hide show

app.py +22 -6

app.py CHANGED Viewed

@@ -95,13 +95,16 @@ def get_caption(llm: str, text_model, prompt_dict: dict, batch_size: int):
 # Load configuration and initialize models.
 # config_dict = t2i_512px_clip_dimr.get_config()
 config_dict = t2i_256px_clip_dimr.get_config()
-config = ml_collections.ConfigDict(config_dict)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logging.info(f"Using device: {device}")
 # Freeze configuration.
-config = ml_collections.FrozenConfigDict(config)
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 MAX_SEED = np.iinfo(np.int32).max
@@ -112,11 +115,19 @@ repo_id = "QHL067/CrossFlow"
 # filename = "pretrained_models/t2i_512px_clip_dimr.pth"
 filename = "pretrained_models/t2i_256px_clip_dimr.pth"
 checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename)
-nnet = utils.get_nnet(**config.nnet)
-nnet = nnet.to(device)
 state_dict = torch.load(checkpoint_path, map_location=device)
-nnet.load_state_dict(state_dict)
-nnet.eval()
 # Initialize text model.
 llm = "clip"
@@ -170,6 +181,11 @@ def infer(
     else:
         assert num_of_interpolation == 3, "For arithmetic, please sample three images."
     # Get text embeddings and tokens.
     _context, _token_mask, _token, _caption = get_caption(
         llm, clip, prompt_dict=prompt_dict, batch_size=num_of_interpolation

 # Load configuration and initialize models.
 # config_dict = t2i_512px_clip_dimr.get_config()
 config_dict = t2i_256px_clip_dimr.get_config()
+config_1 = ml_collections.ConfigDict(config_dict)
+config_dict = t2i_512px_clip_dimr.get_config()
+config_2 = ml_collections.ConfigDict(config_dict)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 logging.info(f"Using device: {device}")
 # Freeze configuration.
+config_1 = ml_collections.FrozenConfigDict(config_1)
+config_2 = ml_collections.FrozenConfigDict(config_2)
 torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
 MAX_SEED = np.iinfo(np.int32).max
 # filename = "pretrained_models/t2i_512px_clip_dimr.pth"
 filename = "pretrained_models/t2i_256px_clip_dimr.pth"
 checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename)
+nnet_1 = utils.get_nnet(**config_1.nnet)
+nnet_1 = nnet_1.to(device)
 state_dict = torch.load(checkpoint_path, map_location=device)
+nnet_1.load_state_dict(state_dict)
+nnet_1.eval()
+filename = "pretrained_models/t2i_512px_clip_dimr.pth"
+checkpoint_path = hf_hub_download(repo_id=repo_id, filename=filename)
+nnet_2 = utils.get_nnet(**config_2.nnet)
+nnet_2 = nnet.to(device)
+state_dict = torch.load(checkpoint_path, map_location=device)
+nnet_2.load_state_dict(state_dict)
+nnet_2.eval()
 # Initialize text model.
 llm = "clip"
     else:
         assert num_of_interpolation == 3, "For arithmetic, please sample three images."
+    if num_of_interpolation == 3:
+        nnet = nnet_2
+    else:
+        nnet = nnet_1
     # Get text embeddings and tokens.
     _context, _token_mask, _token, _caption = get_caption(
         llm, clip, prompt_dict=prompt_dict, batch_size=num_of_interpolation