Spaces:
Running
Running
import clip | |
import torch | |
from torch import nn | |
class CLIPFeaturizer(nn.Module): | |
def __init__(self): | |
super().__init__() | |
self.model, self.preprocess = clip.load("ViT-B/16", device="cpu") | |
self.model.eval().cuda() | |
self.config = {} | |
def get_cls_token(self, img): | |
return self.model.encode_image(img).to(torch.float32) | |
def forward(self, img, include_cls): | |
features = self.model.get_visual_features(img, include_cls) | |
new_features = [] | |
for i in range(2): | |
t = features[i] | |
if isinstance(t, torch.Tensor): | |
new_features.append(t.to(torch.float32)) | |
else: | |
new_features.append(t) | |
return new_features | |
if __name__ == "__main__": | |
import torchvision.transforms as T | |
from PIL import Image | |
from shared import norm, crop_to_divisor | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
image = Image.open("../samples/lex1.jpg") | |
load_size = 224 # * 3 | |
transform = T.Compose([ | |
T.Resize(load_size, Image.BILINEAR), | |
# T.CenterCrop(load_size), | |
T.ToTensor(), | |
lambda x: crop_to_divisor(x, 16), | |
norm]) | |
model = CLIPFeaturizer().cuda() | |
results = model(transform(image).cuda().unsqueeze(0)) | |
print(clip.available_models()) | |