img2vec

author: zhang <zch921005@126.com> 2022-06-22 23:32:41 +0800
committer: zhang <zch921005@126.com> 2022-06-22 23:32:41 +0800
commit: b2432d57f626a37ee790a83483bcc960048b0dac (patch)
tree: 4d50a6601b083246d178ba4fd2627ebe61a54947 /cv
parent: ca69b367cc6e11d84f743285bf5e03f99f4927b4 (diff)
7 files changed, 345 insertions, 0 deletions
diff --git a/cv/img2vec/demo.py b/cv/img2vec/demo.py
new file mode 100644
index 0000000..bf72096
--- /dev/null
+++ b/cv/img2vec/demo.py
@@ -0,0 +1,70 @@
+import torch
+import torch.nn as nn
+import torchvision.models as models
+import torchvision.transforms as transforms
+from torch.autograd import Variable
+from PIL import Image
+import os
+from torchvision import transforms as T
+
+
+# Load the pretrained model
+model = models.resnet152(pretrained=True)
+
+layer = model._modules['avgpool']
+# Set model to evaluation mode
+model.eval()
+
+
+# scaler = transforms.Scale((224, 224))
+# normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
+#                                  std=[0.229, 0.224, 0.225])
+# to_tensor = transforms.ToTensor()
+
+trans = T.Compose([
+    # T.Resize(256),
+    # T.CenterCrop(224),
+    T.Scale((256, 256)),
+    T.ToTensor(),
+    T.Normalize(mean=[0.485, 0.456, 0.406],
+                std=[0.229, 0.224, 0.225])
+])
+
+def get_vector(image_name):
+    # 1. Load the image with Pillow library
+    img = Image.open(image_name)
+    # 2. Create a PyTorch Variable with the transformed image
+    t_img = trans(img).unsqueeze(0)
+    # 3. Create a vector of zeros that will hold our feature vector
+    #    The 'avgpool' layer has an output size of 512
+    my_embedding = torch.zeros(2048)
+    # # 4. Define a function that will copy the output of a layer
+    def copy_data(m, i, o):
+        my_embedding.copy_(o.data.reshape(o.data.size(1)))
+    # 5. Attach that function to our selected layer
+    h = layer.register_forward_hook(copy_data)
+    # # 6. Run the model on our transformed image
+    model(t_img)
+    # # 7. Detach our copy function from the layer
+    h.remove()
+    # 8. Return the feature vector
+    return my_embedding
+
+
+if __name__ == '__main__':
+    imgs = []
+    img_vecs = []
+    for img_file in os.listdir('./imgs'):
+        imgs.append(img_file)
+        img_file = os.path.join('./imgs', img_file)
+        vec = get_vector(img_file)
+        img_vecs.append(vec)
+        print(vec.shape, type(vec))
+
+    cos = nn.CosineSimilarity(dim=1)
+
+    for i in range(0, len(imgs)):
+        for j in range(i, len(imgs)):
+            print(imgs[i], imgs[j], cos(img_vecs[i].unsqueeze(0), img_vecs[j].unsqueeze(0)))
+
+
diff --git a/cv/img2vec/image_feature_extractor_similarity.ipynb b/cv/img2vec/image_feature_extractor_similarity.ipynb
new file mode 100644
index 0000000..a9fc867
--- /dev/null
+++ b/cv/img2vec/image_feature_extractor_similarity.ipynb
@@ -0,0 +1,275 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. 图像数据处理"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:20:04.751148Z",
+     "start_time": "2022-06-22T15:20:03.148807Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from PIL import Image\n",
+    "from torchvision import transforms as T\n",
+    "import timm"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:17:08.148080Z",
+     "start_time": "2022-06-22T15:17:08.142540Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "img1 = Image.open('./imgs/cat.jpg')\n",
+    "img2 = Image.open('./imgs/cat2.jpg')\n",
+    "img3 = Image.open('./imgs/face.jpg')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:20:48.808680Z",
+     "start_time": "2022-06-22T15:20:48.804760Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "trans = T.Compose([\n",
+    "    T.Resize((256, 256)),\n",
+    "    T.ToTensor(),\n",
+    "    T.Normalize(mean=timm.data.IMAGENET_DEFAULT_MEAN, std=timm.data.IMAGENET_DEFAULT_STD)\n",
+    "])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:21:36.222222Z",
+     "start_time": "2022-06-22T15:21:36.112742Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "t_img1 = trans(img1).unsqueeze(0)\n",
+    "t_img2 = trans(img2).unsqueeze(0)\n",
+    "t_img3 = trans(img3).unsqueeze(0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2. 加载模型"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:25:37.025421Z",
+     "start_time": "2022-06-22T15:25:37.021239Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "from torchvision import models\n",
+    "import torch\n",
+    "from torch import nn"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:18:02.323997Z",
+     "start_time": "2022-06-22T15:18:00.711753Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "model = models.resnet152(pretrained=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:19:05.325328Z",
+     "start_time": "2022-06-22T15:19:05.321574Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "layer = model._modules['avgpool']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:25:15.871839Z",
+     "start_time": "2022-06-22T15:25:15.110680Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "torch.Size([1, 2048, 1, 1])\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.eval()\n",
+    "with torch.no_grad():\n",
+    "    feature_vec3 = torch.zeros(2048)\n",
+    "    def copy(m, i, o):\n",
+    "        print(o.shape)\n",
+    "        feature_vec3.copy_(o.reshape(o.shape[1]))\n",
+    "    h = layer.register_forward_hook(copy)\n",
+    "    model(t_img3)   # forward\n",
+    "    h.remove()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:25:27.967597Z",
+     "start_time": "2022-06-22T15:25:27.962756Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([0.1978, 0.4018, 0.0321,  ..., 0.0981, 0.2331, 0.2016])"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "feature_vec3"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. 输出处理"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:25:48.350136Z",
+     "start_time": "2022-06-22T15:25:48.347187Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "cos_sim = nn.CosineSimilarity(dim=1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:26:16.628814Z",
+     "start_time": "2022-06-22T15:26:16.623559Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([0.6978])"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cos_sim(feature_vec.unsqueeze(0), feature_vec2.unsqueeze(0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-06-22T15:26:43.703193Z",
+     "start_time": "2022-06-22T15:26:43.698088Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([0.5045])"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "cos_sim(feature_vec.unsqueeze(0), feature_vec3.unsqueeze(0))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/cv/img2vec/imgs/cat.jpg b/cv/img2vec/imgs/cat.jpg
new file mode 100755
index 0000000..0cfbab1
--- /dev/null
+++ b/cv/img2vec/imgs/cat.jpg
diff --git a/cv/img2vec/imgs/cat2.jpg b/cv/img2vec/imgs/cat2.jpg
new file mode 100755
index 0000000..56f7e4d
--- /dev/null
+++ b/cv/img2vec/imgs/cat2.jpg
diff --git a/cv/img2vec/imgs/catdog.jpg b/cv/img2vec/imgs/catdog.jpg
new file mode 100755
index 0000000..146e23d
--- /dev/null
+++ b/cv/img2vec/imgs/catdog.jpg
diff --git a/cv/img2vec/imgs/face.jpg b/cv/img2vec/imgs/face.jpg
new file mode 100755
index 0000000..d9f4c01
--- /dev/null
+++ b/cv/img2vec/imgs/face.jpg
diff --git a/cv/img2vec/imgs/face2.jpg b/cv/img2vec/imgs/face2.jpg
new file mode 100755
index 0000000..d3f76b6
--- /dev/null
+++ b/cv/img2vec/imgs/face2.jpg
author	zhang <zch921005@126.com>	2022-06-22 23:32:41 +0800
committer	zhang <zch921005@126.com>	2022-06-22 23:32:41 +0800
commit	b2432d57f626a37ee790a83483bcc960048b0dac (patch)
tree	4d50a6601b083246d178ba4fd2627ebe61a54947 /cv
parent	ca69b367cc6e11d84f743285bf5e03f99f4927b4 (diff)