summaryrefslogtreecommitdiff
path: root/cv/img2vec/demo.py
blob: bf7209684e70459f7dfe51d76b6b24f1e9ef882d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
from torch.autograd import Variable
from PIL import Image
import os
from torchvision import transforms as T


# Load the pretrained model
model = models.resnet152(pretrained=True)

layer = model._modules['avgpool']
# Set model to evaluation mode
model.eval()


# scaler = transforms.Scale((224, 224))
# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
#                                  std=[0.229, 0.224, 0.225])
# to_tensor = transforms.ToTensor()

trans = T.Compose([
    # T.Resize(256),
    # T.CenterCrop(224),
    T.Scale((256, 256)),
    T.ToTensor(),
    T.Normalize(mean=[0.485, 0.456, 0.406],
                std=[0.229, 0.224, 0.225])
])

def get_vector(image_name):
    # 1. Load the image with Pillow library
    img = Image.open(image_name)
    # 2. Create a PyTorch Variable with the transformed image
    t_img = trans(img).unsqueeze(0)
    # 3. Create a vector of zeros that will hold our feature vector
    #    The 'avgpool' layer has an output size of 512
    my_embedding = torch.zeros(2048)
    # # 4. Define a function that will copy the output of a layer
    def copy_data(m, i, o):
        my_embedding.copy_(o.data.reshape(o.data.size(1)))
    # 5. Attach that function to our selected layer
    h = layer.register_forward_hook(copy_data)
    # # 6. Run the model on our transformed image
    model(t_img)
    # # 7. Detach our copy function from the layer
    h.remove()
    # 8. Return the feature vector
    return my_embedding


if __name__ == '__main__':
    imgs = []
    img_vecs = []
    for img_file in os.listdir('./imgs'):
        imgs.append(img_file)
        img_file = os.path.join('./imgs', img_file)
        vec = get_vector(img_file)
        img_vecs.append(vec)
        print(vec.shape, type(vec))

    cos = nn.CosineSimilarity(dim=1)

    for i in range(0, len(imgs)):
        for j in range(i, len(imgs)):
            print(imgs[i], imgs[j], cos(img_vecs[i].unsqueeze(0), img_vecs[j].unsqueeze(0)))