From 702d7b02909323d563053693efde3affa17f62a2 Mon Sep 17 00:00:00 2001 From: iridiumR Date: Mon, 26 Dec 2022 17:02:51 +0800 Subject: [PATCH] feat(AIandML): e3.1 --- .../e3.1_numpy_to_tensor.ipynb | 612 ++++++++++++++++++ 1 file changed, 612 insertions(+) create mode 100644 AIandML/e3_deep_learning/e3.1_numpy_to_tensor.ipynb diff --git a/AIandML/e3_deep_learning/e3.1_numpy_to_tensor.ipynb b/AIandML/e3_deep_learning/e3.1_numpy_to_tensor.ipynb new file mode 100644 index 0000000..df3a463 --- /dev/null +++ b/AIandML/e3_deep_learning/e3.1_numpy_to_tensor.ipynb @@ -0,0 +1,612 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 实验3-2 从numpy到Tensor\n", + "\n", + "实验目标:\n", + "\n", + "* 理解损失函数计算和梯度计算过程和框架机制\n", + "* 验证型实验,体会从numpy到Tensor、从手动梯度计算到自动梯度技术的过程\n", + "\n", + "载入相关库" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import math\n", + "import matplotlib.pyplot as plt" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 1. Numpy实现拟合\n", + "\n", + "> 在正式开始前,看一个使用Numpy的例子\n", + "\n", + "#### 1.1 给定数据\n", + "\n", + "* 给定原始数据源自$\\sin$函数\n", + "\n", + "$$\n", + "y = \\sin(x), \\quad x \\in [-\\pi, \\pi]\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Create random input and output data\n", + "x = np.linspace(-math.pi, math.pi, 2000)\n", + "y = np.sin(x)\n", + "\n", + "plt.plot(x, y)\n", + "plt.grid(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.2 用3阶多项式拟合\n", + "\n", + "* 3阶多项式表示为\n", + "\n", + "$$\n", + "y_\\text{pred} = a + b x + c x^2 + d x^3\n", + "$$\n", + "\n", + "* 求3阶多项式系数$a,b,c,d$\n", + "* 设计代价函数\n", + "\n", + "$$\n", + "\\begin{split}\n", + "J(a,b,c,d) \n", + "&= \\frac{1}{N}\\sum \\left(y_\\text{pred} - y\\right)^2 \\\\\n", + "&= \\frac{1}{N}\\sum \\left(a + b x + c x^2 + d x^3 - y\\right)^2\n", + "\\end{split}\n", + "$$\n", + "\n", + "* 通过梯度下降法,使\n", + "\n", + "$$\n", + "\\min J(a,b,c,d)\n", + "$$" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# Randomly initialize weights\n", + "a = np.random.randn()\n", + "b = np.random.randn()\n", + "c = np.random.randn()\n", + "d = np.random.randn()" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> 请回答:\n", + "> 1. 本例中的目标函数是什么?拟如何优化?\n", + "\n", + "目标函数是:\n", + "$$\n", + "y_\\text{pred} = a + b x + c x^2 + d x^3\n", + "$$" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### 1.3 numpy下实现梯度下降" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "99 2914.0686778918653\n", + "199 2054.9974161348064\n", + "299 1450.370643919091\n", + "399 1024.6837276236965\n", + "499 724.8859448756189\n", + "599 513.6858685532102\n", + "699 364.8594671456524\n", + "799 259.95856686918034\n", + "899 186.00056188365326\n", + "999 133.8460790269978\n", + "1099 97.05922475327091\n", + "1199 71.10652197085993\n", + "1299 52.79366316730517\n", + "1399 39.86932594257033\n", + "1499 30.74639809722312\n", + "1599 24.305750585122503\n", + "1699 19.758070389713772\n", + "1799 16.54654472985528\n", + "1899 14.27829847537817\n", + "1999 12.676076131525194\n", + "Result: y = 0.06505951823932438 + 0.8479905953957829 x + -0.011223849491339904 x^2 + -0.09208574250623029 x^3\n" + ] + } + ], + "source": [ + "learning_rate = 1e-6\n", + "for t in range(2000):\n", + " # Forward pass: compute predicted y\n", + " # y = a + b x + c x^2 + d x^3\n", + " y_pred = a + b * x + c * x ** 2 + d * x ** 3\n", + "\n", + " # Compute and print loss\n", + " loss = np.square(y_pred - y).sum()\n", + " if t % 100 == 99:\n", + " print(t, loss)\n", + "\n", + " # Backprop to compute gradients of a, b, c, d with respect to loss\n", + " grad_y_pred = 2.0 * (y_pred - y)\n", + " grad_a = grad_y_pred.sum()\n", + " grad_b = (grad_y_pred * x).sum()\n", + " grad_c = (grad_y_pred * x ** 2).sum()\n", + " grad_d = (grad_y_pred * x ** 3).sum()\n", + "\n", + " # Update weights\n", + " a -= learning_rate * grad_a\n", + " b -= learning_rate * grad_b\n", + " c -= learning_rate * grad_c\n", + " d -= learning_rate * grad_d\n", + "\n", + "print(f'Result: y = {a} + {b} x + {c} x^2 + {d} x^3')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(x, y, label='real')\n", + "plt.plot(x, y_pred, label='pred')\n", + "plt.grid(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 2. PyTorch的张量(Tensors)\n", + "\n", + "> PyTorch的张量类似numpy的ndarray,但在GPU上可以快约50倍。\n", + "\n", + "* 我们用Tensor重新实现前文例子" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "99 2938.743896484375\n", + "199 2081.909423828125\n", + "299 1475.6683349609375\n", + "399 1046.72216796875\n", + "499 743.2164306640625\n", + "599 528.4656982421875\n", + "699 376.5142822265625\n", + "799 268.9967956542969\n", + "899 192.9193572998047\n", + "999 139.08792114257812\n", + "1099 100.99724578857422\n", + "1199 74.04437255859375\n", + "1299 54.97263717651367\n", + "1399 41.47736358642578\n", + "1499 31.928035736083984\n", + "1599 25.17084503173828\n", + "1699 20.389366149902344\n", + "1799 17.005922317504883\n", + "1899 14.611732482910156\n", + "1999 12.917559623718262\n", + "Result: y = -0.06776070594787598 + 0.8552694916725159 x + 0.011689849197864532 x^2 + -0.09312110394239426 x^3\n" + ] + } + ], + "source": [ + "import torch\n", + "import math\n", + "\n", + "\n", + "dtype = torch.float\n", + "device = torch.device(\"cpu\")\n", + "# device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n", + "\n", + "# Create random input and output data\n", + "x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)\n", + "y = torch.sin(x)\n", + "\n", + "# Randomly initialize weights\n", + "a = torch.randn((), device=device, dtype=dtype)\n", + "b = torch.randn((), device=device, dtype=dtype)\n", + "c = torch.randn((), device=device, dtype=dtype)\n", + "d = torch.randn((), device=device, dtype=dtype)\n", + "\n", + "learning_rate = 1e-6\n", + "for t in range(2000):\n", + " # Forward pass: compute predicted y\n", + " y_pred = a + b * x + c * x ** 2 + d * x ** 3\n", + "\n", + " # Compute and print loss\n", + " loss = (y_pred - y).pow(2).sum().item()\n", + " if t % 100 == 99:\n", + " print(t, loss)\n", + "\n", + " # Backprop to compute gradients of a, b, c, d with respect to loss\n", + " grad_y_pred = 2.0 * (y_pred - y)\n", + " grad_a = grad_y_pred.sum()\n", + " grad_b = (grad_y_pred * x).sum()\n", + " grad_c = (grad_y_pred * x ** 2).sum()\n", + " grad_d = (grad_y_pred * x ** 3).sum()\n", + "\n", + " # Update weights using gradient descent\n", + " a -= learning_rate * grad_a\n", + " b -= learning_rate * grad_b\n", + " c -= learning_rate * grad_c\n", + " d -= learning_rate * grad_d\n", + "\n", + "print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(x.numpy(), y.numpy(), label='real')\n", + "plt.plot(x.numpy(), y_pred.numpy(), label='pred')\n", + "plt.grid(True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 3. 自动梯度计算(Autograd)\n", + "\n", + "> 前文例子中,我们必须手动计算多项式的梯度。事实上,PyTorch可以帮我们完成该。\n", + "\n", + "* PyTorch的autograd模块通过`计算图`功能实现了自动梯度计算,因此不必人工手算了。 \n", + "* 同样重新编写前文例子" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "a.grad is None" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "99 13.743356704711914\n", + "199 12.083112716674805\n", + "299 10.983731269836426\n", + "399 10.254711151123047\n", + "499 9.771218299865723\n", + "599 9.450494766235352\n", + "699 9.23769760131836\n", + "799 9.096477508544922\n", + "899 9.002732276916504\n", + "999 8.94049072265625\n", + "1099 8.899148941040039\n", + "1199 8.871686935424805\n", + "1299 8.853440284729004\n", + "1399 8.841304779052734\n", + "1499 8.83323860168457\n", + "1599 8.827871322631836\n", + "1699 8.824300765991211\n", + "1799 8.821924209594727\n", + "1899 8.820340156555176\n", + "1999 8.819284439086914\n", + "Result: y = 0.0005760510102845728 + 0.8580527305603027 x + -9.938044240698218e-05 x^2 + -0.0935169979929924 x^3\n" + ] + } + ], + "source": [ + "import torch\n", + "import math\n", + "\n", + "dtype = torch.float\n", + "device = torch.device(\"cpu\")\n", + "# device = torch.device(\"cuda:0\") # Uncomment this to run on GPU\n", + "\n", + "# Create Tensors to hold input and outputs.\n", + "# By default, requires_grad=False, which indicates that we do not need to\n", + "# compute gradients with respect to these Tensors during the backward pass.\n", + "x = torch.linspace(-math.pi, math.pi, 2000, device=device, dtype=dtype)\n", + "y = torch.sin(x)\n", + "\n", + "# Create random Tensors for weights. For a third order polynomial, we need\n", + "# 4 weights: y = a + b x + c x^2 + d x^3\n", + "# Setting requires_grad=True indicates that we want to compute gradients with\n", + "# respect to these Tensors during the backward pass.\n", + "a = torch.randn((), device=device, dtype=dtype, requires_grad=True) # NEW!!!\n", + "b = torch.randn((), device=device, dtype=dtype, requires_grad=True)\n", + "c = torch.randn((), device=device, dtype=dtype, requires_grad=True)\n", + "d = torch.randn((), device=device, dtype=dtype, requires_grad=True)\n", + "\n", + "learning_rate = 1e-6\n", + "for t in range(2000):\n", + " # Forward pass: compute predicted y using operations on Tensors.\n", + " y_pred = a + b * x + c * x ** 2 + d * x ** 3\n", + "\n", + " # Compute and print loss using operations on Tensors.\n", + " # Now loss is a Tensor of shape (1,)\n", + " # loss.item() gets the scalar value held in the loss.\n", + " loss = (y_pred - y).pow(2).sum()\n", + " if t % 100 == 99:\n", + " print(t, loss.item())\n", + "\n", + " # NEW!!!\n", + " # Use autograd to compute the backward pass. This call will compute the\n", + " # gradient of loss with respect to all Tensors with requires_grad=True.\n", + " # After this call a.grad, b.grad. c.grad and d.grad will be Tensors holding\n", + " # the gradient of the loss with respect to a, b, c, d respectively.\n", + " loss.backward()\n", + "\n", + " # NEW!!!\n", + " # Manually update weights using gradient descent. Wrap in torch.no_grad()\n", + " # because weights have requires_grad=True, but we don't need to track this\n", + " # in autograd.\n", + " with torch.no_grad():\n", + " a -= learning_rate * a.grad\n", + " b -= learning_rate * b.grad\n", + " c -= learning_rate * c.grad\n", + " d -= learning_rate * d.grad\n", + "\n", + " # Manually zero the gradients after updating weights\n", + " a.grad = None\n", + " b.grad = None\n", + " c.grad = None\n", + " d.grad = None\n", + "\n", + "print(f'Result: y = {a.item()} + {b.item()} x + {c.item()} x^2 + {d.item()} x^3')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### 4. PyTorch的自动优化功能optim\n", + "\n", + "* `optim`模块自动优化的对象是`torch.nn.Module`及其派生类\n", + "* 重新改写前文示例如下" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "xx.shape:torch.Size([2000, 3])\n", + "99 2484.103515625\n", + "199 641.6993408203125\n", + "299 395.6757507324219\n", + "399 324.57843017578125\n", + "499 255.0222930908203\n", + "599 184.39678955078125\n", + "699 122.7295913696289\n", + "799 75.3610610961914\n", + "899 42.63737487792969\n", + "999 22.684669494628906\n", + "1099 12.80512809753418\n", + "1199 9.434273719787598\n", + "1299 8.856988906860352\n", + "1399 8.819315910339355\n", + "1499 8.831609725952148\n", + "1599 8.84821891784668\n", + "1699 8.896923065185547\n", + "1799 8.940781593322754\n", + "1899 8.907790184020996\n", + "1999 8.896581649780273\n", + "Result: y = 9.650691357165897e-11 + 0.8562721014022827 x + -1.3369240825511497e-08 x^2 + -0.09380023181438446 x^3\n" + ] + } + ], + "source": [ + "import torch\n", + "import math\n", + "\n", + "\n", + "# Create Tensors to hold input and outputs.\n", + "x = torch.linspace(-math.pi, math.pi, 2000)\n", + "y = torch.sin(x)\n", + "\n", + "# Prepare the input tensor (x, x^2, x^3).\n", + "p = torch.tensor([1, 2, 3])\n", + "xx = x.unsqueeze(-1).pow(p)\n", + "print(f\"xx.shape:{xx.shape}\")\n", + "\n", + "# Use the nn package to define our model and loss function.\n", + "model = torch.nn.Sequential(\n", + " torch.nn.Linear(3, 1),\n", + " torch.nn.Flatten(0, 1)\n", + ")\n", + "loss_fn = torch.nn.MSELoss(reduction='sum')\n", + "\n", + "# Use the optim package to define an Optimizer that will update the weights of\n", + "# the model for us. Here we will use RMSprop; the optim package contains many other\n", + "# optimization algorithms. The first argument to the RMSprop constructor tells the\n", + "# optimizer which Tensors it should update.\n", + "learning_rate = 1e-3\n", + "optimizer = torch.optim.RMSprop(model.parameters(), lr=learning_rate)\n", + "\n", + "for t in range(2000):\n", + " # Forward pass: compute predicted y by passing x to the model.\n", + " y_pred = model(xx)\n", + " #print(y_pred.shape)\n", + " #break\n", + "\n", + " # Compute and print loss.\n", + " loss = loss_fn(y_pred, y)\n", + " if t % 100 == 99:\n", + " print(t, loss.item())\n", + "\n", + " # Before the backward pass, use the optimizer object to zero all of the\n", + " # gradients for the variables it will update (which are the learnable\n", + " # weights of the model). This is because by default, gradients are\n", + " # accumulated in buffers( i.e, not overwritten) whenever .backward()\n", + " # is called. Checkout docs of torch.autograd.backward for more details.\n", + " optimizer.zero_grad()\n", + "\n", + " # Backward pass: compute gradient of the loss with respect to model\n", + " # parameters\n", + " loss.backward()\n", + "\n", + " # Calling the step function on an Optimizer makes an update to its\n", + " # parameters\n", + " optimizer.step()\n", + "\n", + "\n", + "linear_layer = model[0]\n", + "print(f'Result: y = {linear_layer.bias.item()} + {linear_layer.weight[:, 0].item()} x + {linear_layer.weight[:, 1].item()} x^2 + {linear_layer.weight[:, 2].item()} x^3')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "> 请回答:\n", + "> 1. 请回答以上4中实现方法,分别逐步做了哪些改进?\n", + "> 2. 从本实验中获得什么体会?分析PyTorch自动梯度计算机制带来的好处?" + ] + }, + { + "attachments": {}, + "cell_type": "markdown", + "metadata": {}, + "source": [ + "2中,使用torch进行计算,可以将数据置于显存中,提高运算速度\n", + "3中,使用了torch的自动梯度下降计算方法,降低编码难度\n", + "4中,使用了torch的优化器功能,使得拟合结果更好" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "体会: 使用torch内置功能可简化编码, 提高性能" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.16" + }, + "vscode": { + "interpreter": { + "hash": "0733c54d9044ea299f7b7f48049f3576c8ad4e6ff5a97e2c60d8a9e3bff0bc54" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}