In [11]:
import numpy as np
import torch
from torch import nn

## 1. sigmoid

$$
\begin{equation}
\begin{split}
\sigma(x)=&\frac{1}{1+\exp(-x)}\\
\sigma'(x)=&\frac{1}{1+\exp(-x)}\cdot\left(\frac{\exp(-x)}{1+\exp(-x)}\right)\\
=&\sigma(x)\cdot(1-\sigma(x))
\end{split}
\end{equation}
$$

In [15]:
x = torch.tensor(1., requires_grad=True)

In [16]:
x

tensor(1., requires_grad=True)

In [18]:
x.grad

In [5]:
y = x.sigmoid()

In [6]:
y

tensor(0.7311, grad_fn=)

In [7]:
1/(1+np.exp(-1))

0.7310585786300049

In [8]:
y.backward()

In [9]:
x.grad

tensor(0.1966)

In [10]:
x.sigmoid()*(1-x.sigmoid())

tensor(0.1966, grad_fn=)

In [12]:
sigmoid = nn.Sigmoid()

In [14]:
sigmoid(x)*(1-sigmoid(x))

tensor(0.1966, grad_fn=)

### 1.1 multi variables 多元形式

$$
\begin{split}
&y: \mathbb R^n \rightarrow \mathbb R\\
&y=\sum_i\sigma(x_i)=\sigma(x_1)+\sigma(x_2)+\sigma(x_3)+\cdots\\
&\frac{\partial y}{\partial x_1}=\frac{\partial y}{\partial \sigma(x_1)}\cdot\frac{\partial \sigma(x_1)}{\partial x_1}=1\cdot \sigma'(x_1)=\sigma(x_1)\cdot(1-\sigma(x_1))\\
&\frac{\partial y}{\partial x_2}=\frac{\partial y}{\partial \sigma(x_2)}\cdot\frac{\partial \sigma(x_2)}{\partial x_2}=1\cdot \sigma'(x_2)=\sigma(x_2)\cdot(1-\sigma(x_2))
\end{split}
$$

In [19]:
x = torch.tensor([1., 2., 3.], requires_grad=True)

In [20]:
x

tensor([1., 2., 3.], requires_grad=True)

In [21]:
y = x.sigmoid().sum()

In [22]:
y

tensor(2.5644, grad_fn=)

In [23]:
x.grad

In [24]:
y.backward()

In [25]:
x.grad

tensor([0.1966, 0.1050, 0.0452])

In [26]:
x.sigmoid() * (1-x.sigmoid())

tensor([0.1966, 0.1050, 0.0452], grad_fn=)

In [27]:
sigmoid = nn.Sigmoid()

In [28]:
sigmoid(x)*(1-sigmoid(x))

tensor([0.1966, 0.1050, 0.0452], grad_fn=)

### 1.2 自定义函数

$$
\begin{split}
&y=\sigma^2(x)\\
&y'=2\sigma(x)\cdot(\sigma'(x))=2\sigma(x)\cdot(\sigma(x)(1-\sigma(x)))
\end{split}
$$

In [29]:
x = torch.tensor(2., requires_grad=True)

In [30]:
y = x.sigmoid()*x.sigmoid()

In [31]:
y

tensor(0.7758, grad_fn=)

In [32]:
x.grad

In [33]:
y.backward()

In [34]:
x.grad

tensor(0.1850)

In [35]:
2*x.sigmoid()*(x.sigmoid() * (1-x.sigmoid()))

tensor(0.1850, grad_fn=)