10 files changed, 551 insertions, 0 deletions
diff --git a/learn_torch/basics/autograd_v5.py b/learn_torch/basics/autograd_v5.py
new file mode 100644
index 0000000..810a702
--- /dev/null
+++ b/learn_torch/basics/autograd_v5.py
@@ -0,0 +1,53 @@
+
+import torch
+import math
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+dtype = torch.float
+
+lr = 5e-6
+
+class LegendrePolynomial3(torch.autograd.Function):
+    @staticmethod
+    def forward(ctx, input):
+        ctx.save_for_backward(input)
+        return 0.5*(5*input**3 - 3*input)
+    @staticmethod
+    def backward(ctx, grad_output):
+        input, = ctx.saved_tensors
+        return grad_output*(7.5*input**2 - 1.5)
+
+
+def train(X, y):
+    a = torch.full((), 0, device=device, dtype=dtype, requires_grad=True)
+    b = torch.full((), -1, device=device, dtype=dtype, requires_grad=True)
+    c = torch.full((), 0, device=device, dtype=dtype, requires_grad=True)
+    d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
+
+    for i in range(2000):
+        P3 = LegendrePolynomial3.apply
+        # 执行 forward
+        y_pred = a + b * P3(c + d*X)
+        loss = (y_pred - y).pow(2).sum()
+        if i % 100 == 0:
+            print('{}/{}: {}'.format(i, 2000, loss.item()))
+        # 执行 backward
+        loss.backward()
+        with torch.no_grad():
+            a -= lr * a.grad
+            b -= lr * b.grad
+            c -= lr * c.grad
+            d -= lr * d.grad
+
+            a.grad = None
+            b.grad = None
+            c.grad = None
+            d.grad = None
+    print('a = {}, b = {}, c = {}, d = {}'.format(a.item(), b.item(), c.item(), d.item()))
+
+
+
+if __name__ == '__main__':
+    X = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype, device=device)
+    y = torch.sin(X)
+    train(X, y)
diff --git a/learn_torch/basics/dynamic_net.py b/learn_torch/basics/dynamic_net.py
new file mode 100644
index 0000000..efd2f4e
--- /dev/null
+++ b/learn_torch/basics/dynamic_net.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+import random
+import torch
+import math
+
+
+class DynamicNet(torch.nn.Module):
+    def __init__(self):
+        """
+        In the constructor we instantiate five parameters and assign them as members.
+        """
+        super().__init__()
+        self.a = torch.nn.Parameter(torch.randn(()))
+        self.b = torch.nn.Parameter(torch.randn(()))
+        self.c = torch.nn.Parameter(torch.randn(()))
+        self.d = torch.nn.Parameter(torch.randn(()))
+        self.e = torch.nn.Parameter(torch.randn(()))
+
+    def forward(self, x):
+        """
+        For the forward pass of the model, we randomly choose either 4, 5
+        and reuse the e parameter to compute the contribution of these orders.
+
+        Since each forward pass builds a dynamic computation graph, we can use normal
+        Python control-flow operators like loops or conditional statements when
+        defining the forward pass of the model.
+
+        Here we also see that it is perfectly safe to reuse the same parameter many
+        times when defining a computational graph.
+        """
+        y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
+        for exp in range(4, random.randint(4, 6)):
+            y = y + self.e * x ** exp
+        return y
+
+    def string(self):
+        """
+        Just like any class in Python, you can also define custom method on PyTorch modules
+        """
+        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
+
+
+# Create Tensors to hold input and outputs.
+x = torch.linspace(-math.pi, math.pi, 2000)
+y = torch.sin(x)
+
+# Construct our model by instantiating the class defined above
+model = DynamicNet()
+
+# Construct our loss function and an Optimizer. Training this strange model with
+# vanilla stochastic gradient descent is tough, so we use momentum
+criterion = torch.nn.MSELoss(reduction='sum')
+optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
+for t in range(30000):
+    # Forward pass: Compute predicted y by passing x to the model
+    y_pred = model(x)
+
+    # Compute and print loss
+    loss = criterion(y_pred, y)
+    if t % 2000 == 1999:
+        print(t, loss.item())
+
+    # Zero gradients, perform a backward pass, and update the weights.
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+print(f'Result: {model.string()}')
+\ No newline at end of file
diff --git a/learn_torch/basics/nd_grad.py b/learn_torch/basics/nd_grad.py
new file mode 100644
index 0000000..d8af59e
--- /dev/null
+++ b/learn_torch/basics/nd_grad.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import math
+
+# Create random input and output data
+x = np.linspace(-math.pi, math.pi, 2000)
+y = np.sin(x)
+
+# Randomly initialize weights
+a = np.random.randn()
+b = np.random.randn()
+c = np.random.randn()
+d = np.random.randn()
+
+learning_rate = 1e-3
+for t in range(500):
+    # Forward pass: compute predicted y
+    # y = a + b x + c x^2 + d x^3
+    y_pred = a + b * x + c * x ** 2 + d * x ** 3
+
+    # Compute and print loss
+    loss = np.square(y_pred - y).mean()
+    if t % 10 == 0:
+        print(x.shape, y_pred.shape, y.shape)
+        print(t, loss)
+
+    # Backprop to compute gradients of a, b, c, d with respect to loss
+    grad_y_pred = 2.0 * (y_pred - y)
+    # grad_y_pred = 2.0 * (y - y_pred)
+
+    grad_a = grad_y_pred.mean()
+    grad_b = (grad_y_pred * x).mean()
+    grad_c = (grad_y_pred * x ** 2).mean()
+    grad_d = (grad_y_pred * x ** 3).mean()
+
+    # Update weights
+    a -= learning_rate * grad_a
+    b -= learning_rate * grad_b
+    c -= learning_rate * grad_c
+    d -= learning_rate * grad_d
+
+print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')
+\ No newline at end of file
diff --git a/learn_torch/basics/nn_custom.py b/learn_torch/basics/nn_custom.py
new file mode 100644
index 0000000..0d9ae8f
--- /dev/null
+++ b/learn_torch/basics/nn_custom.py
@@ -0,0 +1,69 @@
+
+import torch
+import math
+
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+dtype = torch.float
+lr = 1e-3
+
+
+class Poly3(torch.nn.Module):
+    def __init__(self):
+        super(Poly3, self).__init__()
+        self.a = torch.nn.Parameter(torch.randn(()))
+        self.b = torch.nn.Parameter(torch.randn(()))
+        self.c = torch.nn.Parameter(torch.randn(()))
+        self.d = torch.nn.Parameter(torch.randn(()))
+
+    def forward(self, x):
+        return self.a + self.b * x + self.c * x**2 + self.d * x**3
+
+    def __repr__(self):
+        return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
+
+
+def train(X, y):
+    for i in range(2000):
+        y_pred = model(X)
+        loss = loss_fn(y_pred, y)
+
+        if i % 100 == 0:
+            print('{}/{}: {}'.format(i, 2000, loss.item()))
+
+        # model.zero_grad()
+        opt.zero_grad()
+
+        loss.backward()
+
+        # with torch.no_grad():
+        #     for param in model.parameters():
+        #         param -= lr * param.grad
+        opt.step()
+
+
+if __name__ == '__main__':
+
+    X = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+    y = torch.sin(X)
+
+    # p = torch.Tensor([1, 2, 3])
+    # X = X.unsqueeze(-1).pow(p)
+
+    # model = torch.nn.Sequential(
+    #     torch.nn.Linear(3, 1),
+    #     torch.nn.Flatten(0, 1)
+    # )
+    model = Poly3()
+
+    loss_fn = torch.nn.MSELoss(reduction='sum')
+    opt = torch.optim.RMSprop(model.parameters(), lr=lr)
+
+    train(X, y)
+    # weight_layer = model[0]
+    #
+    # print('y = {} + {}x + {}x^2 + {}x^3'.format(weight_layer.bias.item(),
+    #                                             weight_layer.weight[0, 0].item(),
+    #                                             weight_layer.weight[0, 1].item(),
+    #                                             weight_layer.weight[0, 2].item()))
+    print(model)
diff --git a/learn_torch/basics/nn_demo.py b/learn_torch/basics/nn_demo.py
new file mode 100644
index 0000000..2e0bb98
--- /dev/null
+++ b/learn_torch/basics/nn_demo.py
@@ -0,0 +1,47 @@
+
+import torch
+import math
+
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+dtype = torch.float
+lr = 1e-6
+
+
+def train(X, y):
+    for i in range(2000):
+        y_pred = model(X)
+        loss = loss_fn(y_pred, y)
+
+        if i % 100 == 0:
+            print('{}/{}: {}'.format(i, 2000, loss.item()))
+
+        model.zero_grad()
+        loss.backward()
+
+        with torch.no_grad():
+            for param in model.parameters():
+                param -= lr * param.grad
+
+if __name__ == '__main__':
+
+    X = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+    y = torch.sin(X)
+
+    p = torch.Tensor([1, 2, 3])
+    X = X.unsqueeze(-1).pow(p)
+
+    model = torch.nn.Sequential(
+        torch.nn.Linear(3, 1),
+        torch.nn.Flatten(0, 1)
+    )
+
+    loss_fn = torch.nn.MSELoss()
+
+    train(X, y)
+    weight_layer = model[0]
+
+    print('y = {} + {}x + {}x^2 + {}x^3'.format(weight_layer.bias.item(),
+                                                weight_layer.weight[0, 0].item(),
+                                                weight_layer.weight[0, 1].item(),
+                                                weight_layer.weight[0, 2].item()))
diff --git a/learn_torch/basics/nn_demo_optim.py b/learn_torch/basics/nn_demo_optim.py
new file mode 100644
index 0000000..38d95dc
--- /dev/null
+++ b/learn_torch/basics/nn_demo_optim.py
@@ -0,0 +1,52 @@
+
+import torch
+import math
+
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+dtype = torch.float
+lr = 1e-3
+
+
+def train(X, y):
+    for i in range(2000):
+        y_pred = model(X)
+        loss = loss_fn(y_pred, y)
+
+        if i % 100 == 0:
+            print('{}/{}: {}'.format(i, 2000, loss.item()))
+
+        # model.zero_grad()
+        opt.zero_grad()
+
+        loss.backward()
+
+        # with torch.no_grad():
+        #     for param in model.parameters():
+        #         param -= lr * param.grad
+        opt.step()
+
+
+if __name__ == '__main__':
+
+    X = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+    y = torch.sin(X)
+
+    p = torch.Tensor([1, 2, 3])
+    X = X.unsqueeze(-1).pow(p)
+
+    model = torch.nn.Sequential(
+        torch.nn.Linear(3, 1),
+        torch.nn.Flatten(0, 1)
+    )
+
+    loss_fn = torch.nn.MSELoss(reduction='sum')
+    opt = torch.optim.RMSprop(model.parameters(), lr=lr)
+
+    train(X, y)
+    weight_layer = model[0]
+
+    print('y = {} + {}x + {}x^2 + {}x^3'.format(weight_layer.bias.item(),
+                                                weight_layer.weight[0, 0].item(),
+                                                weight_layer.weight[0, 1].item(),
+                                                weight_layer.weight[0, 2].item()))
diff --git a/learn_torch/basics/regression_v3.py b/learn_torch/basics/regression_v3.py
new file mode 100644
index 0000000..f6bd467
--- /dev/null
+++ b/learn_torch/basics/regression_v3.py
@@ -0,0 +1,40 @@
+import math
+import torch
+
+
+dtype = torch.float
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+
+
+lr = 1e-6
+
+
+def train(X, y):
+    a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+    b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+    c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+    d = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+
+    for i in range(2000):
+        y_pred = a + b*X + c*X**2 + d*X**3
+        loss = (y_pred - y).pow(2).sum()
+        if i % 100 == 0:
+            print('{}/{}: {}'.format(i, 2000, loss.item()))
+        loss.backward()
+        with torch.no_grad():
+            a -= lr * a.grad
+            b -= lr * b.grad
+            c -= lr * c.grad
+            d -= lr * d.grad
+            a.grad = None
+            b.grad = None
+            c.grad = None
+            d.grad = None
+    print('a = {}, b = {}, c = {}, d = {}'.format(a.item(), b.item(), c.item(), d.item()))
+
+if __name__ == '__main__':
+
+    X = torch.linspace(-math.pi, math.pi, 2000)
+    y = torch.sin(X)
+    train(X, y)
+
diff --git a/learn_torch/basics/tensor_autograd.py b/learn_torch/basics/tensor_autograd.py
new file mode 100644
index 0000000..661620d
--- /dev/null
+++ b/learn_torch/basics/tensor_autograd.py
@@ -0,0 +1,50 @@
+
+import torch
+import math
+import numpy as np
+
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float
+
+# a = torch.randn((), device=device, dtype=dtype)
+# b = torch.randn((), device=device, dtype=dtype)
+# c = torch.randn((), device=device, dtype=dtype)
+# d = torch.randn((), device=device, dtype=dtype)
+# params = [a, b, c, d]
+
+lr = 1e-6
+
+
+def train(X, y):
+    a = torch.randn((), device=device, dtype=dtype)
+    b = torch.randn((), device=device, dtype=dtype)
+    c = torch.randn((), device=device, dtype=dtype)
+    d = torch.randn((), device=device, dtype=dtype)
+
+    for i in range(2000):
+        y_pred = a + b*X + c*X**2 + d*X**3
+        loss = (y_pred - y).pow(2).sum().item()
+        if i % 50 == 0:
+            print(i, loss)
+        loss_grad = 2*(y_pred - y)
+        a_grad = loss_grad.sum()
+        b_grad = (loss_grad * X).sum()
+        c_grad = (loss_grad * X**2).sum()
+        d_grad = (loss_grad * X**3).sum()
+        # if i % 50 == 0:
+        #     print(a_grad, b_grad, c_grad, d_grad)
+
+        a -= lr * a_grad
+        b -= lr * b_grad
+        c -= lr * c_grad
+        d -= lr * d_grad
+    print('a = {}, b = {}, c = {}, d = {}'.format(a.item(), b.item(), c.item(), d.item()))
+
+if __name__ == '__main__':
+    X = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=torch.float)
+    y = torch.sin(X)
+    train(X, y)
+
+
+
diff --git a/learn_torch/basics/tensor_autograd_2.py b/learn_torch/basics/tensor_autograd_2.py
new file mode 100644
index 0000000..584d4c7
--- /dev/null
+++ b/learn_torch/basics/tensor_autograd_2.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+import torch
+import math
+
+
+dtype = torch.float
+device = torch.device("cpu")
+# device = torch.device("cuda:0") # Uncomment this to run on GPU
+
+# Create random input and output data
+x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+y = torch.sin(x)
+
+# Randomly initialize weights
+a = torch.randn(1, device=device, dtype=dtype)
+b = torch.randn(1, device=device, dtype=dtype)
+c = torch.randn(1, device=device, dtype=dtype)
+d = torch.randn(1, device=device, dtype=dtype)
+
+learning_rate = 1e-6
+for t in range(2000):
+    # Forward pass: compute predicted y
+    y_pred = a + b * x + c * x ** 2 + d * x ** 3
+
+    # Compute and print loss
+    loss = (y_pred - y).pow(2).sum().item()
+    if t % 100 == 99:
+        print(t, loss)
+
+    # Backprop to compute gradients of a, b, c, d with respect to loss
+    grad_y_pred = 2.0 * (y_pred - y)
+    grad_a = grad_y_pred.sum()
+    grad_b = (grad_y_pred * x).sum()
+    grad_c = (grad_y_pred * x ** 2).sum()
+    grad_d = (grad_y_pred * x ** 3).sum()
+
+    # Update weights using gradient descent
+    a -= learning_rate * grad_a
+    b -= learning_rate * grad_b
+    c -= learning_rate * grad_c
+    d -= learning_rate * grad_d
+
+
+print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')
+\ No newline at end of file
diff --git a/learn_torch/basics/v5_diff.py b/learn_torch/basics/v5_diff.py
new file mode 100644
index 0000000..5b247c0
--- /dev/null
+++ b/learn_torch/basics/v5_diff.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+import torch
+import math
+
+
+class LegendrePolynomial3(torch.autograd.Function):
+    """
+    We can implement our own custom autograd Functions by subclassing
+    torch.autograd.Function and implementing the forward and backward passes
+    which operate on Tensors.
+    """
+
+    @staticmethod
+    def forward(ctx, input):
+        """
+        In the forward pass we receive a Tensor containing the input and return
+        a Tensor containing the output. ctx is a context object that can be used
+        to stash information for backward computation. You can cache arbitrary
+        objects for use in the backward pass using the ctx.save_for_backward method.
+        """
+        ctx.save_for_backward(input)
+        return 0.5 * (5 * input ** 3 - 3 * input)
+
+    @staticmethod
+    def backward(ctx, grad_output):
+        """
+        In the backward pass we receive a Tensor containing the gradient of the loss
+        with respect to the output, and we need to compute the gradient of the loss
+        with respect to the input.
+        """
+        input, = ctx.saved_tensors
+        return grad_output * 1.5 * (5 * input ** 2 - 1)
+
+
+dtype = torch.float
+device = torch.device("cpu")
+# device = torch.device("cuda:0")  # Uncomment this to run on GPU
+
+# Create Tensors to hold input and outputs.
+# By default, requires_grad=False, which indicates that we do not need to
+# compute gradients with respect to these Tensors during the backward pass.
+x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+y = torch.sin(x)
+
+# Create random Tensors for weights. For this example, we need
+# 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized
+# not too far from the correct result to ensure convergence.
+# Setting requires_grad=True indicates that we want to compute gradients with
+# respect to these Tensors during the backward pass.
+a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
+b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
+c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
+d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
+
+learning_rate = 5e-6
+for t in range(2000):
+    # To apply our Function, we use Function.apply method. We alias this as 'P3'.
+    P3 = LegendrePolynomial3.apply
+
+    # Forward pass: compute predicted y using operations; we compute
+    # P3 using our custom autograd operation.
+    y_pred = a + b * P3(c + d * x)
+
+    # Compute and print loss
+    loss = (y_pred - y).pow(2).sum()
+    if t % 100 == 99:
+        print(t, loss.item())
+
+    # Use autograd to compute the backward pass.
+    loss.backward()
+
+    # Update weights using gradient descent
+    with torch.no_grad():
+        a -= learning_rate * a.grad
+        b -= learning_rate * b.grad
+        c -= learning_rate * c.grad
+        d -= learning_rate * d.grad
+
+        # Manually zero the gradients after updating weights
+        a.grad = None
+        b.grad = None
+        c.grad = None
+        d.grad = None
+
+print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)')
+\ No newline at end of file