From b2432d57f626a37ee790a83483bcc960048b0dac Mon Sep 17 00:00:00 2001 From: zhang Date: Wed, 22 Jun 2022 23:32:41 +0800 Subject: img2vec --- cv/img2vec/demo.py | 70 ++++++ .../image_feature_extractor_similarity.ipynb | 275 +++++++++++++++++++++ cv/img2vec/imgs/cat.jpg | Bin 0 -> 9705 bytes cv/img2vec/imgs/cat2.jpg | Bin 0 -> 210054 bytes cv/img2vec/imgs/catdog.jpg | Bin 0 -> 27434 bytes cv/img2vec/imgs/face.jpg | Bin 0 -> 90716 bytes cv/img2vec/imgs/face2.jpg | Bin 0 -> 236453 bytes 7 files changed, 345 insertions(+) create mode 100644 cv/img2vec/demo.py create mode 100644 cv/img2vec/image_feature_extractor_similarity.ipynb create mode 100755 cv/img2vec/imgs/cat.jpg create mode 100755 cv/img2vec/imgs/cat2.jpg create mode 100755 cv/img2vec/imgs/catdog.jpg create mode 100755 cv/img2vec/imgs/face.jpg create mode 100755 cv/img2vec/imgs/face2.jpg (limited to 'cv/img2vec') diff --git a/cv/img2vec/demo.py b/cv/img2vec/demo.py new file mode 100644 index 0000000..bf72096 --- /dev/null +++ b/cv/img2vec/demo.py @@ -0,0 +1,70 @@ +import torch +import torch.nn as nn +import torchvision.models as models +import torchvision.transforms as transforms +from torch.autograd import Variable +from PIL import Image +import os +from torchvision import transforms as T + + +# Load the pretrained model +model = models.resnet152(pretrained=True) + +layer = model._modules['avgpool'] +# Set model to evaluation mode +model.eval() + + +# scaler = transforms.Scale((224, 224)) +# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], +# std=[0.229, 0.224, 0.225]) +# to_tensor = transforms.ToTensor() + +trans = T.Compose([ + # T.Resize(256), + # T.CenterCrop(224), + T.Scale((256, 256)), + T.ToTensor(), + T.Normalize(mean=[0.485, 0.456, 0.406], + std=[0.229, 0.224, 0.225]) +]) + +def get_vector(image_name): + # 1. Load the image with Pillow library + img = Image.open(image_name) + # 2. Create a PyTorch Variable with the transformed image + t_img = trans(img).unsqueeze(0) + # 3. Create a vector of zeros that will hold our feature vector + # The 'avgpool' layer has an output size of 512 + my_embedding = torch.zeros(2048) + # # 4. Define a function that will copy the output of a layer + def copy_data(m, i, o): + my_embedding.copy_(o.data.reshape(o.data.size(1))) + # 5. Attach that function to our selected layer + h = layer.register_forward_hook(copy_data) + # # 6. Run the model on our transformed image + model(t_img) + # # 7. Detach our copy function from the layer + h.remove() + # 8. Return the feature vector + return my_embedding + + +if __name__ == '__main__': + imgs = [] + img_vecs = [] + for img_file in os.listdir('./imgs'): + imgs.append(img_file) + img_file = os.path.join('./imgs', img_file) + vec = get_vector(img_file) + img_vecs.append(vec) + print(vec.shape, type(vec)) + + cos = nn.CosineSimilarity(dim=1) + + for i in range(0, len(imgs)): + for j in range(i, len(imgs)): + print(imgs[i], imgs[j], cos(img_vecs[i].unsqueeze(0), img_vecs[j].unsqueeze(0))) + + diff --git a/cv/img2vec/image_feature_extractor_similarity.ipynb b/cv/img2vec/image_feature_extractor_similarity.ipynb new file mode 100644 index 0000000..a9fc867 --- /dev/null +++ b/cv/img2vec/image_feature_extractor_similarity.ipynb @@ -0,0 +1,275 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. 图像数据处理" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:20:04.751148Z", + "start_time": "2022-06-22T15:20:03.148807Z" + } + }, + "outputs": [], + "source": [ + "from PIL import Image\n", + "from torchvision import transforms as T\n", + "import timm" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:17:08.148080Z", + "start_time": "2022-06-22T15:17:08.142540Z" + } + }, + "outputs": [], + "source": [ + "img1 = Image.open('./imgs/cat.jpg')\n", + "img2 = Image.open('./imgs/cat2.jpg')\n", + "img3 = Image.open('./imgs/face.jpg')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:20:48.808680Z", + "start_time": "2022-06-22T15:20:48.804760Z" + } + }, + "outputs": [], + "source": [ + "trans = T.Compose([\n", + " T.Resize((256, 256)),\n", + " T.ToTensor(),\n", + " T.Normalize(mean=timm.data.IMAGENET_DEFAULT_MEAN, std=timm.data.IMAGENET_DEFAULT_STD)\n", + "])" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:21:36.222222Z", + "start_time": "2022-06-22T15:21:36.112742Z" + } + }, + "outputs": [], + "source": [ + "t_img1 = trans(img1).unsqueeze(0)\n", + "t_img2 = trans(img2).unsqueeze(0)\n", + "t_img3 = trans(img3).unsqueeze(0)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. 加载模型" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:25:37.025421Z", + "start_time": "2022-06-22T15:25:37.021239Z" + } + }, + "outputs": [], + "source": [ + "from torchvision import models\n", + "import torch\n", + "from torch import nn" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:18:02.323997Z", + "start_time": "2022-06-22T15:18:00.711753Z" + } + }, + "outputs": [], + "source": [ + "model = models.resnet152(pretrained=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:19:05.325328Z", + "start_time": "2022-06-22T15:19:05.321574Z" + } + }, + "outputs": [], + "source": [ + "layer = model._modules['avgpool']" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:25:15.871839Z", + "start_time": "2022-06-22T15:25:15.110680Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch.Size([1, 2048, 1, 1])\n" + ] + } + ], + "source": [ + "model.eval()\n", + "with torch.no_grad():\n", + " feature_vec3 = torch.zeros(2048)\n", + " def copy(m, i, o):\n", + " print(o.shape)\n", + " feature_vec3.copy_(o.reshape(o.shape[1]))\n", + " h = layer.register_forward_hook(copy)\n", + " model(t_img3) # forward\n", + " h.remove()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:25:27.967597Z", + "start_time": "2022-06-22T15:25:27.962756Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([0.1978, 0.4018, 0.0321, ..., 0.0981, 0.2331, 0.2016])" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "feature_vec3" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. 输出处理" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:25:48.350136Z", + "start_time": "2022-06-22T15:25:48.347187Z" + } + }, + "outputs": [], + "source": [ + "cos_sim = nn.CosineSimilarity(dim=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:26:16.628814Z", + "start_time": "2022-06-22T15:26:16.623559Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([0.6978])" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cos_sim(feature_vec.unsqueeze(0), feature_vec2.unsqueeze(0))" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "metadata": { + "ExecuteTime": { + "end_time": "2022-06-22T15:26:43.703193Z", + "start_time": "2022-06-22T15:26:43.698088Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "tensor([0.5045])" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "cos_sim(feature_vec.unsqueeze(0), feature_vec3.unsqueeze(0))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/cv/img2vec/imgs/cat.jpg b/cv/img2vec/imgs/cat.jpg new file mode 100755 index 0000000..0cfbab1 Binary files /dev/null and b/cv/img2vec/imgs/cat.jpg differ diff --git a/cv/img2vec/imgs/cat2.jpg b/cv/img2vec/imgs/cat2.jpg new file mode 100755 index 0000000..56f7e4d Binary files /dev/null and b/cv/img2vec/imgs/cat2.jpg differ diff --git a/cv/img2vec/imgs/catdog.jpg b/cv/img2vec/imgs/catdog.jpg new file mode 100755 index 0000000..146e23d Binary files /dev/null and b/cv/img2vec/imgs/catdog.jpg differ diff --git a/cv/img2vec/imgs/face.jpg b/cv/img2vec/imgs/face.jpg new file mode 100755 index 0000000..d9f4c01 Binary files /dev/null and b/cv/img2vec/imgs/face.jpg differ diff --git a/cv/img2vec/imgs/face2.jpg b/cv/img2vec/imgs/face2.jpg new file mode 100755 index 0000000..d3f76b6 Binary files /dev/null and b/cv/img2vec/imgs/face2.jpg differ -- cgit v1.2.3