summaryrefslogtreecommitdiff
path: root/learn_torch/basics
diff options
context:
space:
mode:
Diffstat (limited to 'learn_torch/basics')
-rw-r--r--learn_torch/basics/autograd_v5.py53
-rw-r--r--learn_torch/basics/dynamic_net.py68
-rw-r--r--learn_torch/basics/nd_grad.py42
-rw-r--r--learn_torch/basics/nn_custom.py69
-rw-r--r--learn_torch/basics/nn_demo.py47
-rw-r--r--learn_torch/basics/nn_demo_optim.py52
-rw-r--r--learn_torch/basics/regression_v3.py40
-rw-r--r--learn_torch/basics/tensor_autograd.py50
-rw-r--r--learn_torch/basics/tensor_autograd_2.py45
-rw-r--r--learn_torch/basics/v5_diff.py85
10 files changed, 551 insertions, 0 deletions
diff --git a/learn_torch/basics/autograd_v5.py b/learn_torch/basics/autograd_v5.py
new file mode 100644
index 0000000..810a702
--- /dev/null
+++ b/learn_torch/basics/autograd_v5.py
@@ -0,0 +1,53 @@
+
+import torch
+import math
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+dtype = torch.float
+
+lr = 5e-6
+
+class LegendrePolynomial3(torch.autograd.Function):
+ @staticmethod
+ def forward(ctx, input):
+ ctx.save_for_backward(input)
+ return 0.5*(5*input**3 - 3*input)
+ @staticmethod
+ def backward(ctx, grad_output):
+ input, = ctx.saved_tensors
+ return grad_output*(7.5*input**2 - 1.5)
+
+
+def train(X, y):
+ a = torch.full((), 0, device=device, dtype=dtype, requires_grad=True)
+ b = torch.full((), -1, device=device, dtype=dtype, requires_grad=True)
+ c = torch.full((), 0, device=device, dtype=dtype, requires_grad=True)
+ d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
+
+ for i in range(2000):
+ P3 = LegendrePolynomial3.apply
+ # 执行 forward
+ y_pred = a + b * P3(c + d*X)
+ loss = (y_pred - y).pow(2).sum()
+ if i % 100 == 0:
+ print('{}/{}: {}'.format(i, 2000, loss.item()))
+ # 执行 backward
+ loss.backward()
+ with torch.no_grad():
+ a -= lr * a.grad
+ b -= lr * b.grad
+ c -= lr * c.grad
+ d -= lr * d.grad
+
+ a.grad = None
+ b.grad = None
+ c.grad = None
+ d.grad = None
+ print('a = {}, b = {}, c = {}, d = {}'.format(a.item(), b.item(), c.item(), d.item()))
+
+
+
+if __name__ == '__main__':
+ X = torch.linspace(-math.pi, math.pi, 2000, dtype=dtype, device=device)
+ y = torch.sin(X)
+ train(X, y)
diff --git a/learn_torch/basics/dynamic_net.py b/learn_torch/basics/dynamic_net.py
new file mode 100644
index 0000000..efd2f4e
--- /dev/null
+++ b/learn_torch/basics/dynamic_net.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+import random
+import torch
+import math
+
+
+class DynamicNet(torch.nn.Module):
+ def __init__(self):
+ """
+ In the constructor we instantiate five parameters and assign them as members.
+ """
+ super().__init__()
+ self.a = torch.nn.Parameter(torch.randn(()))
+ self.b = torch.nn.Parameter(torch.randn(()))
+ self.c = torch.nn.Parameter(torch.randn(()))
+ self.d = torch.nn.Parameter(torch.randn(()))
+ self.e = torch.nn.Parameter(torch.randn(()))
+
+ def forward(self, x):
+ """
+ For the forward pass of the model, we randomly choose either 4, 5
+ and reuse the e parameter to compute the contribution of these orders.
+
+ Since each forward pass builds a dynamic computation graph, we can use normal
+ Python control-flow operators like loops or conditional statements when
+ defining the forward pass of the model.
+
+ Here we also see that it is perfectly safe to reuse the same parameter many
+ times when defining a computational graph.
+ """
+ y = self.a + self.b * x + self.c * x ** 2 + self.d * x ** 3
+ for exp in range(4, random.randint(4, 6)):
+ y = y + self.e * x ** exp
+ return y
+
+ def string(self):
+ """
+ Just like any class in Python, you can also define custom method on PyTorch modules
+ """
+ return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3 + {self.e.item()} x^4 ? + {self.e.item()} x^5 ?'
+
+
+# Create Tensors to hold input and outputs.
+x = torch.linspace(-math.pi, math.pi, 2000)
+y = torch.sin(x)
+
+# Construct our model by instantiating the class defined above
+model = DynamicNet()
+
+# Construct our loss function and an Optimizer. Training this strange model with
+# vanilla stochastic gradient descent is tough, so we use momentum
+criterion = torch.nn.MSELoss(reduction='sum')
+optimizer = torch.optim.SGD(model.parameters(), lr=1e-8, momentum=0.9)
+for t in range(30000):
+ # Forward pass: Compute predicted y by passing x to the model
+ y_pred = model(x)
+
+ # Compute and print loss
+ loss = criterion(y_pred, y)
+ if t % 2000 == 1999:
+ print(t, loss.item())
+
+ # Zero gradients, perform a backward pass, and update the weights.
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+
+print(f'Result: {model.string()}') \ No newline at end of file
diff --git a/learn_torch/basics/nd_grad.py b/learn_torch/basics/nd_grad.py
new file mode 100644
index 0000000..d8af59e
--- /dev/null
+++ b/learn_torch/basics/nd_grad.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+import numpy as np
+import math
+
+# Create random input and output data
+x = np.linspace(-math.pi, math.pi, 2000)
+y = np.sin(x)
+
+# Randomly initialize weights
+a = np.random.randn()
+b = np.random.randn()
+c = np.random.randn()
+d = np.random.randn()
+
+learning_rate = 1e-3
+for t in range(500):
+ # Forward pass: compute predicted y
+ # y = a + b x + c x^2 + d x^3
+ y_pred = a + b * x + c * x ** 2 + d * x ** 3
+
+ # Compute and print loss
+ loss = np.square(y_pred - y).mean()
+ if t % 10 == 0:
+ print(x.shape, y_pred.shape, y.shape)
+ print(t, loss)
+
+ # Backprop to compute gradients of a, b, c, d with respect to loss
+ grad_y_pred = 2.0 * (y_pred - y)
+ # grad_y_pred = 2.0 * (y - y_pred)
+
+ grad_a = grad_y_pred.mean()
+ grad_b = (grad_y_pred * x).mean()
+ grad_c = (grad_y_pred * x ** 2).mean()
+ grad_d = (grad_y_pred * x ** 3).mean()
+
+ # Update weights
+ a -= learning_rate * grad_a
+ b -= learning_rate * grad_b
+ c -= learning_rate * grad_c
+ d -= learning_rate * grad_d
+
+print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3') \ No newline at end of file
diff --git a/learn_torch/basics/nn_custom.py b/learn_torch/basics/nn_custom.py
new file mode 100644
index 0000000..0d9ae8f
--- /dev/null
+++ b/learn_torch/basics/nn_custom.py
@@ -0,0 +1,69 @@
+
+import torch
+import math
+
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+dtype = torch.float
+lr = 1e-3
+
+
+class Poly3(torch.nn.Module):
+ def __init__(self):
+ super(Poly3, self).__init__()
+ self.a = torch.nn.Parameter(torch.randn(()))
+ self.b = torch.nn.Parameter(torch.randn(()))
+ self.c = torch.nn.Parameter(torch.randn(()))
+ self.d = torch.nn.Parameter(torch.randn(()))
+
+ def forward(self, x):
+ return self.a + self.b * x + self.c * x**2 + self.d * x**3
+
+ def __repr__(self):
+ return f'y = {self.a.item()} + {self.b.item()} x + {self.c.item()} x^2 + {self.d.item()} x^3'
+
+
+def train(X, y):
+ for i in range(2000):
+ y_pred = model(X)
+ loss = loss_fn(y_pred, y)
+
+ if i % 100 == 0:
+ print('{}/{}: {}'.format(i, 2000, loss.item()))
+
+ # model.zero_grad()
+ opt.zero_grad()
+
+ loss.backward()
+
+ # with torch.no_grad():
+ # for param in model.parameters():
+ # param -= lr * param.grad
+ opt.step()
+
+
+if __name__ == '__main__':
+
+ X = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+ y = torch.sin(X)
+
+ # p = torch.Tensor([1, 2, 3])
+ # X = X.unsqueeze(-1).pow(p)
+
+ # model = torch.nn.Sequential(
+ # torch.nn.Linear(3, 1),
+ # torch.nn.Flatten(0, 1)
+ # )
+ model = Poly3()
+
+ loss_fn = torch.nn.MSELoss(reduction='sum')
+ opt = torch.optim.RMSprop(model.parameters(), lr=lr)
+
+ train(X, y)
+ # weight_layer = model[0]
+ #
+ # print('y = {} + {}x + {}x^2 + {}x^3'.format(weight_layer.bias.item(),
+ # weight_layer.weight[0, 0].item(),
+ # weight_layer.weight[0, 1].item(),
+ # weight_layer.weight[0, 2].item()))
+ print(model)
diff --git a/learn_torch/basics/nn_demo.py b/learn_torch/basics/nn_demo.py
new file mode 100644
index 0000000..2e0bb98
--- /dev/null
+++ b/learn_torch/basics/nn_demo.py
@@ -0,0 +1,47 @@
+
+import torch
+import math
+
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+dtype = torch.float
+lr = 1e-6
+
+
+def train(X, y):
+ for i in range(2000):
+ y_pred = model(X)
+ loss = loss_fn(y_pred, y)
+
+ if i % 100 == 0:
+ print('{}/{}: {}'.format(i, 2000, loss.item()))
+
+ model.zero_grad()
+ loss.backward()
+
+ with torch.no_grad():
+ for param in model.parameters():
+ param -= lr * param.grad
+
+if __name__ == '__main__':
+
+ X = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+ y = torch.sin(X)
+
+ p = torch.Tensor([1, 2, 3])
+ X = X.unsqueeze(-1).pow(p)
+
+ model = torch.nn.Sequential(
+ torch.nn.Linear(3, 1),
+ torch.nn.Flatten(0, 1)
+ )
+
+ loss_fn = torch.nn.MSELoss()
+
+ train(X, y)
+ weight_layer = model[0]
+
+ print('y = {} + {}x + {}x^2 + {}x^3'.format(weight_layer.bias.item(),
+ weight_layer.weight[0, 0].item(),
+ weight_layer.weight[0, 1].item(),
+ weight_layer.weight[0, 2].item()))
diff --git a/learn_torch/basics/nn_demo_optim.py b/learn_torch/basics/nn_demo_optim.py
new file mode 100644
index 0000000..38d95dc
--- /dev/null
+++ b/learn_torch/basics/nn_demo_optim.py
@@ -0,0 +1,52 @@
+
+import torch
+import math
+
+
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+dtype = torch.float
+lr = 1e-3
+
+
+def train(X, y):
+ for i in range(2000):
+ y_pred = model(X)
+ loss = loss_fn(y_pred, y)
+
+ if i % 100 == 0:
+ print('{}/{}: {}'.format(i, 2000, loss.item()))
+
+ # model.zero_grad()
+ opt.zero_grad()
+
+ loss.backward()
+
+ # with torch.no_grad():
+ # for param in model.parameters():
+ # param -= lr * param.grad
+ opt.step()
+
+
+if __name__ == '__main__':
+
+ X = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+ y = torch.sin(X)
+
+ p = torch.Tensor([1, 2, 3])
+ X = X.unsqueeze(-1).pow(p)
+
+ model = torch.nn.Sequential(
+ torch.nn.Linear(3, 1),
+ torch.nn.Flatten(0, 1)
+ )
+
+ loss_fn = torch.nn.MSELoss(reduction='sum')
+ opt = torch.optim.RMSprop(model.parameters(), lr=lr)
+
+ train(X, y)
+ weight_layer = model[0]
+
+ print('y = {} + {}x + {}x^2 + {}x^3'.format(weight_layer.bias.item(),
+ weight_layer.weight[0, 0].item(),
+ weight_layer.weight[0, 1].item(),
+ weight_layer.weight[0, 2].item()))
diff --git a/learn_torch/basics/regression_v3.py b/learn_torch/basics/regression_v3.py
new file mode 100644
index 0000000..f6bd467
--- /dev/null
+++ b/learn_torch/basics/regression_v3.py
@@ -0,0 +1,40 @@
+import math
+import torch
+
+
+dtype = torch.float
+device = 'cuda:0' if torch.cuda.is_available() else 'cpu'
+
+
+lr = 1e-6
+
+
+def train(X, y):
+ a = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+ b = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+ c = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+ d = torch.randn((), device=device, dtype=dtype, requires_grad=True)
+
+ for i in range(2000):
+ y_pred = a + b*X + c*X**2 + d*X**3
+ loss = (y_pred - y).pow(2).sum()
+ if i % 100 == 0:
+ print('{}/{}: {}'.format(i, 2000, loss.item()))
+ loss.backward()
+ with torch.no_grad():
+ a -= lr * a.grad
+ b -= lr * b.grad
+ c -= lr * c.grad
+ d -= lr * d.grad
+ a.grad = None
+ b.grad = None
+ c.grad = None
+ d.grad = None
+ print('a = {}, b = {}, c = {}, d = {}'.format(a.item(), b.item(), c.item(), d.item()))
+
+if __name__ == '__main__':
+
+ X = torch.linspace(-math.pi, math.pi, 2000)
+ y = torch.sin(X)
+ train(X, y)
+
diff --git a/learn_torch/basics/tensor_autograd.py b/learn_torch/basics/tensor_autograd.py
new file mode 100644
index 0000000..661620d
--- /dev/null
+++ b/learn_torch/basics/tensor_autograd.py
@@ -0,0 +1,50 @@
+
+import torch
+import math
+import numpy as np
+
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+dtype = torch.float
+
+# a = torch.randn((), device=device, dtype=dtype)
+# b = torch.randn((), device=device, dtype=dtype)
+# c = torch.randn((), device=device, dtype=dtype)
+# d = torch.randn((), device=device, dtype=dtype)
+# params = [a, b, c, d]
+
+lr = 1e-6
+
+
+def train(X, y):
+ a = torch.randn((), device=device, dtype=dtype)
+ b = torch.randn((), device=device, dtype=dtype)
+ c = torch.randn((), device=device, dtype=dtype)
+ d = torch.randn((), device=device, dtype=dtype)
+
+ for i in range(2000):
+ y_pred = a + b*X + c*X**2 + d*X**3
+ loss = (y_pred - y).pow(2).sum().item()
+ if i % 50 == 0:
+ print(i, loss)
+ loss_grad = 2*(y_pred - y)
+ a_grad = loss_grad.sum()
+ b_grad = (loss_grad * X).sum()
+ c_grad = (loss_grad * X**2).sum()
+ d_grad = (loss_grad * X**3).sum()
+ # if i % 50 == 0:
+ # print(a_grad, b_grad, c_grad, d_grad)
+
+ a -= lr * a_grad
+ b -= lr * b_grad
+ c -= lr * c_grad
+ d -= lr * d_grad
+ print('a = {}, b = {}, c = {}, d = {}'.format(a.item(), b.item(), c.item(), d.item()))
+
+if __name__ == '__main__':
+ X = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=torch.float)
+ y = torch.sin(X)
+ train(X, y)
+
+
+
diff --git a/learn_torch/basics/tensor_autograd_2.py b/learn_torch/basics/tensor_autograd_2.py
new file mode 100644
index 0000000..584d4c7
--- /dev/null
+++ b/learn_torch/basics/tensor_autograd_2.py
@@ -0,0 +1,45 @@
+# -*- coding: utf-8 -*-
+
+import torch
+import math
+
+
+dtype = torch.float
+device = torch.device("cpu")
+# device = torch.device("cuda:0") # Uncomment this to run on GPU
+
+# Create random input and output data
+x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+y = torch.sin(x)
+
+# Randomly initialize weights
+a = torch.randn(1, device=device, dtype=dtype)
+b = torch.randn(1, device=device, dtype=dtype)
+c = torch.randn(1, device=device, dtype=dtype)
+d = torch.randn(1, device=device, dtype=dtype)
+
+learning_rate = 1e-6
+for t in range(2000):
+ # Forward pass: compute predicted y
+ y_pred = a + b * x + c * x ** 2 + d * x ** 3
+
+ # Compute and print loss
+ loss = (y_pred - y).pow(2).sum().item()
+ if t % 100 == 99:
+ print(t, loss)
+
+ # Backprop to compute gradients of a, b, c, d with respect to loss
+ grad_y_pred = 2.0 * (y_pred - y)
+ grad_a = grad_y_pred.sum()
+ grad_b = (grad_y_pred * x).sum()
+ grad_c = (grad_y_pred * x ** 2).sum()
+ grad_d = (grad_y_pred * x ** 3).sum()
+
+ # Update weights using gradient descent
+ a -= learning_rate * grad_a
+ b -= learning_rate * grad_b
+ c -= learning_rate * grad_c
+ d -= learning_rate * grad_d
+
+
+print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3') \ No newline at end of file
diff --git a/learn_torch/basics/v5_diff.py b/learn_torch/basics/v5_diff.py
new file mode 100644
index 0000000..5b247c0
--- /dev/null
+++ b/learn_torch/basics/v5_diff.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+import torch
+import math
+
+
+class LegendrePolynomial3(torch.autograd.Function):
+ """
+ We can implement our own custom autograd Functions by subclassing
+ torch.autograd.Function and implementing the forward and backward passes
+ which operate on Tensors.
+ """
+
+ @staticmethod
+ def forward(ctx, input):
+ """
+ In the forward pass we receive a Tensor containing the input and return
+ a Tensor containing the output. ctx is a context object that can be used
+ to stash information for backward computation. You can cache arbitrary
+ objects for use in the backward pass using the ctx.save_for_backward method.
+ """
+ ctx.save_for_backward(input)
+ return 0.5 * (5 * input ** 3 - 3 * input)
+
+ @staticmethod
+ def backward(ctx, grad_output):
+ """
+ In the backward pass we receive a Tensor containing the gradient of the loss
+ with respect to the output, and we need to compute the gradient of the loss
+ with respect to the input.
+ """
+ input, = ctx.saved_tensors
+ return grad_output * 1.5 * (5 * input ** 2 - 1)
+
+
+dtype = torch.float
+device = torch.device("cpu")
+# device = torch.device("cuda:0") # Uncomment this to run on GPU
+
+# Create Tensors to hold input and outputs.
+# By default, requires_grad=False, which indicates that we do not need to
+# compute gradients with respect to these Tensors during the backward pass.
+x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)
+y = torch.sin(x)
+
+# Create random Tensors for weights. For this example, we need
+# 4 weights: y = a + b * P3(c + d * x), these weights need to be initialized
+# not too far from the correct result to ensure convergence.
+# Setting requires_grad=True indicates that we want to compute gradients with
+# respect to these Tensors during the backward pass.
+a = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
+b = torch.full((), -1.0, device=device, dtype=dtype, requires_grad=True)
+c = torch.full((), 0.0, device=device, dtype=dtype, requires_grad=True)
+d = torch.full((), 0.3, device=device, dtype=dtype, requires_grad=True)
+
+learning_rate = 5e-6
+for t in range(2000):
+ # To apply our Function, we use Function.apply method. We alias this as 'P3'.
+ P3 = LegendrePolynomial3.apply
+
+ # Forward pass: compute predicted y using operations; we compute
+ # P3 using our custom autograd operation.
+ y_pred = a + b * P3(c + d * x)
+
+ # Compute and print loss
+ loss = (y_pred - y).pow(2).sum()
+ if t % 100 == 99:
+ print(t, loss.item())
+
+ # Use autograd to compute the backward pass.
+ loss.backward()
+
+ # Update weights using gradient descent
+ with torch.no_grad():
+ a -= learning_rate * a.grad
+ b -= learning_rate * b.grad
+ c -= learning_rate * c.grad
+ d -= learning_rate * d.grad
+
+ # Manually zero the gradients after updating weights
+ a.grad = None
+ b.grad = None
+ c.grad = None
+ d.grad = None
+
+print(f'Result: y = {a.item()} + {b.item()} * P3({c.item()} + {d.item()} x)') \ No newline at end of file