lorocksUMD commited on
Commit
9c1f1cf
·
verified ·
1 Parent(s): 908c4e4

Upload 32 files

Browse files
DenseAV/denseav/aggregators.py CHANGED
@@ -6,7 +6,7 @@ import torch.nn as nn
6
  import torch.nn.functional as F
7
  from tqdm import tqdm
8
 
9
- from denseav.constants import *
10
 
11
 
12
  @torch.jit.script
 
6
  import torch.nn.functional as F
7
  from tqdm import tqdm
8
 
9
+ from constants import *
10
 
11
 
12
  @torch.jit.script
DenseAV/denseav/aligners.py CHANGED
@@ -4,7 +4,7 @@ import torch
4
  import torch.nn.functional as F
5
  from torch.nn import ModuleList
6
 
7
- from denseav.featurizers.DINO import Block
8
 
9
 
10
  class ChannelNorm(torch.nn.Module):
 
4
  import torch.nn.functional as F
5
  from torch.nn import ModuleList
6
 
7
+ from featurizers.DINO import Block
8
 
9
 
10
  class ChannelNorm(torch.nn.Module):
DenseAV/denseav/data/AVDatasets.py CHANGED
@@ -18,9 +18,13 @@ from PIL import Image
18
  from torch.utils.data import Dataset, DataLoader, default_collate, Subset, ConcatDataset
19
  from tqdm import tqdm
20
 
21
- from denseav.constants import AUDIO_MASK, AUDIO_POS_MASK, IMAGE_MASK, IMAGE_INPUT
22
- from denseav.data.make_tarballs import untar_all
23
- from denseav.shared import norm, prep_waveform
 
 
 
 
24
 
25
 
26
  def sample_choice(choices, probs):
 
18
  from torch.utils.data import Dataset, DataLoader, default_collate, Subset, ConcatDataset
19
  from tqdm import tqdm
20
 
21
+ import sys
22
+ sys.path.append('../constants')
23
+ sys.path.append('../shared')
24
+
25
+ from constants import AUDIO_MASK, AUDIO_POS_MASK, IMAGE_MASK, IMAGE_INPUT
26
+ from make_tarballs import untar_all
27
+ from shared import norm, prep_waveform
28
 
29
 
30
  def sample_choice(choices, probs):
DenseAV/denseav/data/make_tarballs.py CHANGED
@@ -9,11 +9,12 @@ from torch.utils.data import Dataset, DataLoader
9
  from tqdm import tqdm
10
  from pathlib import Path
11
 
12
- from denseav.shared import batch
13
-
14
  import tempfile
15
  import shutil
16
 
 
 
 
17
 
18
  class Tarballer(Dataset):
19
 
 
9
  from tqdm import tqdm
10
  from pathlib import Path
11
 
 
 
12
  import tempfile
13
  import shutil
14
 
15
+ import sys
16
+ sys.path.append('../shared')
17
+ from shared import batch
18
 
19
  class Tarballer(Dataset):
20
 
DenseAV/denseav/eval_utils.py CHANGED
@@ -9,7 +9,7 @@ from torchmetrics.functional.classification import binary_average_precision
9
  from tqdm import tqdm
10
 
11
  from constants import *
12
- from denseav.shared import unnorm, remove_axes
13
 
14
 
15
  def prep_heatmap(sims, masks, h, w):
 
9
  from tqdm import tqdm
10
 
11
  from constants import *
12
+ from shared import unnorm, remove_axes
13
 
14
 
15
  def prep_heatmap(sims, masks, h, w):
DenseAV/denseav/evaluate.py CHANGED
@@ -4,8 +4,8 @@ from omegaconf import DictConfig, OmegaConf
4
  from pytorch_lightning import Trainer
5
  from pytorch_lightning import seed_everything
6
  from pytorch_lightning.loggers import TensorBoardLogger
7
- from denseav.data.AVDatasets import AVDataModule
8
- from denseav.shared import load_trained_model
9
 
10
 
11
  @hydra.main(config_path="configs", config_name="av_align.yaml")
 
4
  from pytorch_lightning import Trainer
5
  from pytorch_lightning import seed_everything
6
  from pytorch_lightning.loggers import TensorBoardLogger
7
+ from data.AVDatasets import AVDataModule
8
+ from shared import load_trained_model
9
 
10
 
11
  @hydra.main(config_path="configs", config_name="av_align.yaml")
DenseAV/denseav/plotting.py CHANGED
@@ -10,7 +10,7 @@ import torch.nn.functional as F
10
  import torchvision
11
  from moviepy.editor import VideoFileClip, AudioFileClip
12
  from base64 import b64encode
13
- from denseav.shared import pca
14
 
15
 
16
  def write_video_with_audio(video_frames, audio_array, video_fps, audio_fps, output_path):
 
10
  import torchvision
11
  from moviepy.editor import VideoFileClip, AudioFileClip
12
  from base64 import b64encode
13
+ from shared import pca
14
 
15
 
16
  def write_video_with_audio(video_frames, audio_array, video_fps, audio_fps, output_path):
DenseAV/denseav/shared.py CHANGED
@@ -90,37 +90,37 @@ def get_image_featurizer(name, token_type="key", **kwargs):
90
  name = name.lower()
91
 
92
  if name == "vit":
93
- from denseav.featurizers.DINO import DINOFeaturizer
94
  patch_size = 16
95
  model = DINOFeaturizer("vit_small_patch16_224", patch_size, token_type)
96
  dim = 384
97
  elif name == "dino16":
98
- from denseav.featurizers.DINO import DINOFeaturizer
99
  patch_size = 16
100
  model = DINOFeaturizer("dino_vits16", patch_size, token_type)
101
  dim = 384
102
  elif name == "dino8":
103
- from denseav.featurizers.DINO import DINOFeaturizer
104
  patch_size = 8
105
  model = DINOFeaturizer("dino_vits8", patch_size, token_type)
106
  dim = 384
107
  elif name == "clip":
108
- from denseav.featurizers.CLIP import CLIPFeaturizer
109
  patch_size = 16
110
  model = CLIPFeaturizer()
111
  dim = 512
112
  elif name == "cavmae":
113
- from denseav.featurizers.CAVMAE import CAVMAEImageFeaturizer
114
  model = CAVMAEImageFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
115
  dim = 768
116
  patch_size = 16
117
  elif name == "fnac":
118
- from denseav.featurizers.FNACAVL import FNACImageFeaturizer
119
  model = FNACImageFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
120
  dim = 512
121
  patch_size = 16
122
  elif name == "imagebind":
123
- from denseav.featurizers.ImageBind import ImageBindImageFeaturizer
124
  model = ImageBindImageFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
125
  dim = 1024
126
  patch_size = 16
@@ -131,12 +131,12 @@ def get_image_featurizer(name, token_type="key", **kwargs):
131
  patch_size = 1
132
  dim = 2048
133
  elif name == "davenet":
134
- from fdenseav.eaturizers.DAVENet import DavenetImageFeaturizer
135
  model = DavenetImageFeaturizer()
136
  patch_size = 1
137
  dim = 1024
138
  elif name == "dinov2":
139
- from denseav.featurizers.DINOv2 import DINOv2Featurizer
140
  model = DINOv2Featurizer()
141
  patch_size = 14
142
  dim = 768
@@ -147,29 +147,29 @@ def get_image_featurizer(name, token_type="key", **kwargs):
147
 
148
  def get_audio_featurizer(name, **kwargs):
149
  if name == "davenet":
150
- from denseav.featurizers.DAVENet import DavenetAudioFeaturizer
151
  model = DavenetAudioFeaturizer()
152
  dim = 1024
153
  elif name == "dino8":
154
  model, _, dim = get_image_featurizer("dino8")
155
  elif name == "hubert":
156
- from denseav.featurizers.Hubert import Hubert
157
  model = Hubert()
158
  dim = 1024
159
  elif name == "cavmae":
160
- from denseav.featurizers.CAVMAE import CAVMAEAudioFeaturizer
161
  model = CAVMAEAudioFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
162
  dim = 768
163
  elif name == "imagebind":
164
- from denseav.featurizers.ImageBind import ImageBindAudioFeaturizer
165
  model = ImageBindAudioFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
166
  dim = 1024
167
  elif name == "audiomae":
168
- from denseav.featurizers.AudioMAE import AudioMAE
169
  model = AudioMAE(kwargs["output_root"], False)
170
  dim = 768
171
  elif name == "audiomae-finetuned":
172
- from denseav.featurizers.AudioMAE import AudioMAE
173
  model = AudioMAE(kwargs["output_root"], True)
174
  dim = 768
175
  else:
 
90
  name = name.lower()
91
 
92
  if name == "vit":
93
+ from featurizers.DINO import DINOFeaturizer
94
  patch_size = 16
95
  model = DINOFeaturizer("vit_small_patch16_224", patch_size, token_type)
96
  dim = 384
97
  elif name == "dino16":
98
+ from featurizers.DINO import DINOFeaturizer
99
  patch_size = 16
100
  model = DINOFeaturizer("dino_vits16", patch_size, token_type)
101
  dim = 384
102
  elif name == "dino8":
103
+ from featurizers.DINO import DINOFeaturizer
104
  patch_size = 8
105
  model = DINOFeaturizer("dino_vits8", patch_size, token_type)
106
  dim = 384
107
  elif name == "clip":
108
+ from featurizers.CLIP import CLIPFeaturizer
109
  patch_size = 16
110
  model = CLIPFeaturizer()
111
  dim = 512
112
  elif name == "cavmae":
113
+ from featurizers.CAVMAE import CAVMAEImageFeaturizer
114
  model = CAVMAEImageFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
115
  dim = 768
116
  patch_size = 16
117
  elif name == "fnac":
118
+ from featurizers.FNACAVL import FNACImageFeaturizer
119
  model = FNACImageFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
120
  dim = 512
121
  patch_size = 16
122
  elif name == "imagebind":
123
+ from featurizers.ImageBind import ImageBindImageFeaturizer
124
  model = ImageBindImageFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
125
  dim = 1024
126
  patch_size = 16
 
131
  patch_size = 1
132
  dim = 2048
133
  elif name == "davenet":
134
+ from featurizers.DAVENet import DavenetImageFeaturizer
135
  model = DavenetImageFeaturizer()
136
  patch_size = 1
137
  dim = 1024
138
  elif name == "dinov2":
139
+ from featurizers.DINOv2 import DINOv2Featurizer
140
  model = DINOv2Featurizer()
141
  patch_size = 14
142
  dim = 768
 
147
 
148
  def get_audio_featurizer(name, **kwargs):
149
  if name == "davenet":
150
+ from featurizers.DAVENet import DavenetAudioFeaturizer
151
  model = DavenetAudioFeaturizer()
152
  dim = 1024
153
  elif name == "dino8":
154
  model, _, dim = get_image_featurizer("dino8")
155
  elif name == "hubert":
156
+ from featurizers.Hubert import Hubert
157
  model = Hubert()
158
  dim = 1024
159
  elif name == "cavmae":
160
+ from featurizers.CAVMAE import CAVMAEAudioFeaturizer
161
  model = CAVMAEAudioFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
162
  dim = 768
163
  elif name == "imagebind":
164
+ from featurizers.ImageBind import ImageBindAudioFeaturizer
165
  model = ImageBindAudioFeaturizer(kwargs["output_root"], model=kwargs.get("model"))
166
  dim = 1024
167
  elif name == "audiomae":
168
+ from featurizers.AudioMAE import AudioMAE
169
  model = AudioMAE(kwargs["output_root"], False)
170
  dim = 768
171
  elif name == "audiomae-finetuned":
172
+ from featurizers.AudioMAE import AudioMAE
173
  model = AudioMAE(kwargs["output_root"], True)
174
  dim = 768
175
  else:
DenseAV/denseav/train.py CHANGED
@@ -21,11 +21,11 @@ from torchmetrics.functional.classification import binary_average_precision
21
 
22
  from huggingface_hub import PyTorchModelHubMixin
23
 
24
- from denseav.aggregators import get_aggregator
25
- from denseav.aligners import get_aligner, ProgressiveGrowing
26
- from denseav.constants import *
27
- from denseav.data.AVDatasets import AVDataModule
28
- from denseav.shared import flatten_preds, GatherLayer, \
29
  get_image_featurizer, get_audio_featurizer, RollingAvg, create_model_from_cfg
30
 
31
  torch.multiprocessing.set_sharing_strategy('file_system')
 
21
 
22
  from huggingface_hub import PyTorchModelHubMixin
23
 
24
+ from aggregators import get_aggregator
25
+ from aligners import get_aligner, ProgressiveGrowing
26
+ from constants import *
27
+ from data.AVDatasets import AVDataModule
28
+ from shared import flatten_preds, GatherLayer, \
29
  get_image_featurizer, get_audio_featurizer, RollingAvg, create_model_from_cfg
30
 
31
  torch.multiprocessing.set_sharing_strategy('file_system')