4 files changed, 682 insertions, 2 deletions
diff --git a/dl/normalize/mnist_demo.py b/dl/normalize/mnist_demo.py
index dc9e00c..a835542 100644
--- a/dl/normalize/mnist_demo.py
+++ b/dl/normalize/mnist_demo.py
@@ -11,7 +11,8 @@ import torch
 # timm.data.IMAGENET_DEFAULT_STD: (0.229, 0.224, 0.225)
 transform = transforms.Compose([
                 transforms.ToTensor(),
-                transforms.Normalize(mean=[0.1307],  std=[0.3081])
+                transforms.Normalize(mean=[0.1307],  std=[0.3081]),
+                transforms.Resize()
             ])
 
 # MNIST dataset
diff --git a/fine_tune/bert/tutorials/05_output.py b/fine_tune/bert/tutorials/05_output.py
index 1911641..a4f9db5 100644
--- a/fine_tune/bert/tutorials/05_output.py
+++ b/fine_tune/bert/tutorials/05_output.py
@@ -3,6 +3,7 @@ from transformers.models.bert import BertModel
 import torch
 from torch import nn
 
+nn.BatchNorm2d()
 
 if __name__ == '__main__':
 
@@ -14,7 +15,7 @@ if __name__ == '__main__':
     text = "After stealing money from the bank vault, the bank robber was seen " \
        "fishing on the Mississippi river bank."
 
-
+    model.eval()
     token_inputs = tokenizer(text, return_tensors='pt')
     with torch.no_grad():
         outputs = model(**token_inputs)
diff --git a/learn_torch/basics/bn.py b/learn_torch/basics/bn.py
new file mode 100644
index 0000000..722497e
--- /dev/null
+++ b/learn_torch/basics/bn.py
@@ -0,0 +1,19 @@
+
+import torch
+from torch import nn
+
+
+if __name__ == '__main__':
+    m = nn.BatchNorm1d(3, momentum=None)
+    x1 = torch.randint(0, 5, (2, 3), dtype=torch.float32)
+    x2 = torch.randint(0, 5, (2, 3), dtype=torch.float32)
+
+    m(x1)
+    print(m.running_mean, m.running_var)
+    m(x2)
+    print(m.running_mean, m.running_var)
+
+    m.eval()
+    x3 = torch.randint(0, 5, (1, 3), dtype=torch.float32)
+    m(x3)
+
diff --git a/learn_torch/tutorials/bn_train_eval.ipynb b/learn_torch/tutorials/bn_train_eval.ipynb
new file mode 100644
index 0000000..c450e74
--- /dev/null
+++ b/learn_torch/tutorials/bn_train_eval.ipynb
@@ -0,0 +1,659 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:12:41.751525Z",
+     "start_time": "2022-09-13T15:12:39.748638Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "from torch import nn\n",
+    "import numpy as np\n",
+    "from copy import deepcopy"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 1. module"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:12:43.503523Z",
+     "start_time": "2022-09-13T15:12:43.498924Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "m = nn.BatchNorm1d(3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:12:46.134859Z",
+     "start_time": "2022-09-13T15:12:46.117802Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.1 m(x1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:14:00.620025Z",
+     "start_time": "2022-09-13T15:14:00.617068Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "x1 = torch.randint(0, 5, (2, 3), dtype=torch.float)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:14:04.022933Z",
+     "start_time": "2022-09-13T15:14:04.016599Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[2., 3., 1.],\n",
+       "        [1., 3., 4.]])"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:15:03.155867Z",
+     "start_time": "2022-09-13T15:15:03.149579Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([1.5000, 3.0000, 2.5000]), tensor([0.2500, 0.0000, 2.2500]))"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x1.mean(dim=0), x1.var(dim=0, unbiased=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:16:50.746057Z",
+     "start_time": "2022-09-13T15:16:50.740149Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 1.0000,  0.0000, -1.0000],\n",
+       "        [-1.0000,  0.0000,  1.0000]])"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# biased (unbiased = False)\n",
+    "(x1 - x1.mean(dim=0))/torch.sqrt(x1.var(dim=0, unbiased=False) + 1e-5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:15:14.087721Z",
+     "start_time": "2022-09-13T15:15:14.080391Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 1.0000,  0.0000, -1.0000],\n",
+       "        [-1.0000,  0.0000,  1.0000]], grad_fn=<NativeBatchNormBackward0>)"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m(x1)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:15:40.407708Z",
+     "start_time": "2022-09-13T15:15:40.404523Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "last_mean, last_var = deepcopy(m.running_mean), deepcopy(m.running_var)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:15:43.888563Z",
+     "start_time": "2022-09-13T15:15:43.883410Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([0.1500, 0.3000, 0.2500]), tensor([0.9500, 0.9000, 1.3500]))"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "last_mean, last_var"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:16:11.611791Z",
+     "start_time": "2022-09-13T15:16:11.606570Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([0.1500, 0.3000, 0.2500])"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(1-0.1)*0 + 0.1*x1.mean(dim=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:16:58.637472Z",
+     "start_time": "2022-09-13T15:16:58.632528Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([0.9500, 0.9000, 1.3500])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# unbiased = True\n",
+    "(1-0.1)*torch.ones(3) + 0.1*x1.var(dim=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 2.2 m(x2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:17:11.997946Z",
+     "start_time": "2022-09-13T15:17:11.995243Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "x2 = torch.randint(0, 5, (2, 3), dtype=torch.float)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:17:12.954075Z",
+     "start_time": "2022-09-13T15:17:12.949510Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[0., 3., 0.],\n",
+       "        [3., 2., 2.]])"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:17:21.705867Z",
+     "start_time": "2022-09-13T15:17:21.701285Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([1.5000, 2.5000, 1.0000]), tensor([4.5000, 0.5000, 2.0000]))"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x2.mean(dim=0), x2.var(dim=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:17:26.797073Z",
+     "start_time": "2022-09-13T15:17:26.791778Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[-1.0000,  1.0000, -1.0000],\n",
+       "        [ 1.0000, -1.0000,  1.0000]])"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(x2 - x2.mean(dim=0)) / torch.sqrt(x2.var(dim=0, unbiased=False)+1e-05)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:17:29.424105Z",
+     "start_time": "2022-09-13T15:17:29.418592Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[-1.0000,  1.0000, -1.0000],\n",
+       "        [ 1.0000, -1.0000,  1.0000]], grad_fn=<NativeBatchNormBackward0>)"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m(x2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:17:49.273927Z",
+     "start_time": "2022-09-13T15:17:49.268708Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([0.2850, 0.5200, 0.3250]), tensor([1.3050, 0.8600, 1.4150]))"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.running_mean, m.running_var"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:18:03.331985Z",
+     "start_time": "2022-09-13T15:18:03.326575Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([0.2850, 0.5200, 0.3250])"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(1-0.1)*last_mean + 0.1*x2.mean(dim=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:18:07.494252Z",
+     "start_time": "2022-09-13T15:18:07.487036Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([1.3050, 0.8600, 1.4150])"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(1-0.1)*last_var + 0.1*x2.var(dim=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### 3. eval mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:18:18.641717Z",
+     "start_time": "2022-09-13T15:18:18.639009Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "x3 = torch.randint(0, 5, (2, 3), dtype=torch.float)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:18:20.564723Z",
+     "start_time": "2022-09-13T15:18:20.560541Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[1., 3., 3.],\n",
+       "        [2., 0., 3.]])"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "x3"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:18:23.072850Z",
+     "start_time": "2022-09-13T15:18:23.069084Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "BatchNorm1d(3, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.eval()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:18:32.993600Z",
+     "start_time": "2022-09-13T15:18:32.987854Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 0.6259,  2.6742,  2.2488],\n",
+       "        [ 1.5013, -0.5607,  2.2488]], grad_fn=<NativeBatchNormBackward0>)"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m(x3)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:18:41.861910Z",
+     "start_time": "2022-09-13T15:18:41.856128Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[-1.0000,  1.0000,  0.0000],\n",
+       "        [ 1.0000, -1.0000,  0.0000]])"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(x3 - x3.mean(dim=0))/torch.sqrt(x3.var(dim=0, unbiased=False) + 1e-5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2022-09-13T15:19:06.793626Z",
+     "start_time": "2022-09-13T15:19:06.788040Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([[ 0.6259,  2.6742,  2.2488],\n",
+       "        [ 1.5013, -0.5607,  2.2488]])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "(x3 - m.running_mean)/torch.sqrt(m.running_var+1e-5)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}