This repository has been archived on 2024-01-06. You can view files and clone it, but cannot push or open issues or pull requests.
justhomework/AIandML/e2_matchine_learning/e2.0_k-means.ipynb

939 lines
108 KiB
Text
Raw Normal View History

2022-11-23 03:03:21 +00:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# K-means实验"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"用pandas读取啤酒数据集`e2.0_beer.txt`。"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>calories</th>\n",
" <th>sodium</th>\n",
" <th>alcohol</th>\n",
" <th>cost</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Budweiser</td>\n",
" <td>144</td>\n",
" <td>15</td>\n",
" <td>4.7</td>\n",
" <td>0.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Schlitz</td>\n",
" <td>151</td>\n",
" <td>19</td>\n",
" <td>4.9</td>\n",
" <td>0.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Lowenbrau</td>\n",
" <td>157</td>\n",
" <td>15</td>\n",
" <td>0.9</td>\n",
" <td>0.48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Kronenbourg</td>\n",
" <td>170</td>\n",
" <td>7</td>\n",
" <td>5.2</td>\n",
" <td>0.73</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Heineken</td>\n",
" <td>152</td>\n",
" <td>11</td>\n",
" <td>5.0</td>\n",
" <td>0.77</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Old_Milwaukee</td>\n",
" <td>145</td>\n",
" <td>23</td>\n",
" <td>4.6</td>\n",
" <td>0.28</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Augsberger</td>\n",
" <td>175</td>\n",
" <td>24</td>\n",
" <td>5.5</td>\n",
" <td>0.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Srohs_Bohemian_Style</td>\n",
" <td>149</td>\n",
" <td>27</td>\n",
" <td>4.7</td>\n",
" <td>0.42</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Miller_Lite</td>\n",
" <td>99</td>\n",
" <td>10</td>\n",
" <td>4.3</td>\n",
" <td>0.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Budweiser_Light</td>\n",
" <td>113</td>\n",
" <td>8</td>\n",
" <td>3.7</td>\n",
" <td>0.40</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Coors</td>\n",
" <td>140</td>\n",
" <td>18</td>\n",
" <td>4.6</td>\n",
" <td>0.44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Coors_Light</td>\n",
" <td>102</td>\n",
" <td>15</td>\n",
" <td>4.1</td>\n",
" <td>0.46</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Michelob_Light</td>\n",
" <td>135</td>\n",
" <td>11</td>\n",
" <td>4.2</td>\n",
" <td>0.50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Becks</td>\n",
" <td>150</td>\n",
" <td>19</td>\n",
" <td>4.7</td>\n",
" <td>0.76</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Kirin</td>\n",
" <td>149</td>\n",
" <td>6</td>\n",
" <td>5.0</td>\n",
" <td>0.79</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Pabst_Extra_Light</td>\n",
" <td>68</td>\n",
" <td>15</td>\n",
" <td>2.3</td>\n",
" <td>0.38</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Hamms</td>\n",
" <td>139</td>\n",
" <td>19</td>\n",
" <td>4.4</td>\n",
" <td>0.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Heilemans_Old_Style</td>\n",
" <td>144</td>\n",
" <td>24</td>\n",
" <td>4.9</td>\n",
" <td>0.43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Olympia_Goled_Light</td>\n",
" <td>72</td>\n",
" <td>6</td>\n",
" <td>2.9</td>\n",
" <td>0.46</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Schlitz_Light</td>\n",
" <td>97</td>\n",
" <td>7</td>\n",
" <td>4.2</td>\n",
" <td>0.47</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name calories sodium alcohol cost\n",
"0 Budweiser 144 15 4.7 0.43\n",
"1 Schlitz 151 19 4.9 0.43\n",
"2 Lowenbrau 157 15 0.9 0.48\n",
"3 Kronenbourg 170 7 5.2 0.73\n",
"4 Heineken 152 11 5.0 0.77\n",
"5 Old_Milwaukee 145 23 4.6 0.28\n",
"6 Augsberger 175 24 5.5 0.40\n",
"7 Srohs_Bohemian_Style 149 27 4.7 0.42\n",
"8 Miller_Lite 99 10 4.3 0.43\n",
"9 Budweiser_Light 113 8 3.7 0.40\n",
"10 Coors 140 18 4.6 0.44\n",
"11 Coors_Light 102 15 4.1 0.46\n",
"12 Michelob_Light 135 11 4.2 0.50\n",
"13 Becks 150 19 4.7 0.76\n",
"14 Kirin 149 6 5.0 0.79\n",
"15 Pabst_Extra_Light 68 15 2.3 0.38\n",
"16 Hamms 139 19 4.4 0.43\n",
"17 Heilemans_Old_Style 144 24 4.9 0.43\n",
"18 Olympia_Goled_Light 72 6 2.9 0.46\n",
"19 Schlitz_Light 97 7 4.2 0.47"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# beer dataset\n",
"import pandas as pd\n",
"url = 'e2.0_beer.txt'\n",
"beer = pd.read_csv(url, sep=' ')\n",
"beer"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"去掉`name`项,保留`calories`、`sodium`、`alcohol`和`cost`数据,作为特征`X`。"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"# define X\n",
"X = beer.drop('name', axis=1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"建立K-means聚类器使类别数为3并进行数据拟合。"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style>#sk-container-id-3 {color: black;background-color: white;}#sk-container-id-3 pre{padding: 0;}#sk-container-id-3 div.sk-toggleable {background-color: white;}#sk-container-id-3 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-3 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-3 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-3 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-3 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-3 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-3 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-3 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-3 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-3 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-3 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-3 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-3 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-3 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-3 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-3 div.sk-item {position: relative;z-index: 1;}#sk-container-id-3 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-3 div.sk-item::before, #sk-container-id-3 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-3 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-3 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-3 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-3 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-3 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-3 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-3 div.sk-label-container {text-align: center;}#sk-container-id-3 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-3 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-3\" class=\"sk-top-container\
],
"text/plain": [
"KMeans(n_clusters=3)"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# K-means with 3 clusters\n",
"# 注意使K-means聚类器的对象名称为 km\n",
"from sklearn.cluster import KMeans\n",
"km = KMeans(n_clusters=3)\n",
"km.fit(X)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"将聚类结果传递给pandas数据框并按类别排序查看各个啤酒参与聚类的结果。"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>name</th>\n",
" <th>calories</th>\n",
" <th>sodium</th>\n",
" <th>alcohol</th>\n",
" <th>cost</th>\n",
" <th>cluster</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>Budweiser_Light</td>\n",
" <td>113</td>\n",
" <td>8</td>\n",
" <td>3.7</td>\n",
" <td>0.40</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11</th>\n",
" <td>Coors_Light</td>\n",
" <td>102</td>\n",
" <td>15</td>\n",
" <td>4.1</td>\n",
" <td>0.46</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>Miller_Lite</td>\n",
" <td>99</td>\n",
" <td>10</td>\n",
" <td>4.3</td>\n",
" <td>0.43</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19</th>\n",
" <td>Schlitz_Light</td>\n",
" <td>97</td>\n",
" <td>7</td>\n",
" <td>4.2</td>\n",
" <td>0.47</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Heineken</td>\n",
" <td>152</td>\n",
" <td>11</td>\n",
" <td>5.0</td>\n",
" <td>0.77</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Old_Milwaukee</td>\n",
" <td>145</td>\n",
" <td>23</td>\n",
" <td>4.6</td>\n",
" <td>0.28</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>Augsberger</td>\n",
" <td>175</td>\n",
" <td>24</td>\n",
" <td>5.5</td>\n",
" <td>0.40</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>Srohs_Bohemian_Style</td>\n",
" <td>149</td>\n",
" <td>27</td>\n",
" <td>4.7</td>\n",
" <td>0.42</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Lowenbrau</td>\n",
" <td>157</td>\n",
" <td>15</td>\n",
" <td>0.9</td>\n",
" <td>0.48</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10</th>\n",
" <td>Coors</td>\n",
" <td>140</td>\n",
" <td>18</td>\n",
" <td>4.6</td>\n",
" <td>0.44</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Schlitz</td>\n",
" <td>151</td>\n",
" <td>19</td>\n",
" <td>4.9</td>\n",
" <td>0.43</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12</th>\n",
" <td>Michelob_Light</td>\n",
" <td>135</td>\n",
" <td>11</td>\n",
" <td>4.2</td>\n",
" <td>0.50</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>Becks</td>\n",
" <td>150</td>\n",
" <td>19</td>\n",
" <td>4.7</td>\n",
" <td>0.76</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14</th>\n",
" <td>Kirin</td>\n",
" <td>149</td>\n",
" <td>6</td>\n",
" <td>5.0</td>\n",
" <td>0.79</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16</th>\n",
" <td>Hamms</td>\n",
" <td>139</td>\n",
" <td>19</td>\n",
" <td>4.4</td>\n",
" <td>0.43</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17</th>\n",
" <td>Heilemans_Old_Style</td>\n",
" <td>144</td>\n",
" <td>24</td>\n",
" <td>4.9</td>\n",
" <td>0.43</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Kronenbourg</td>\n",
" <td>170</td>\n",
" <td>7</td>\n",
" <td>5.2</td>\n",
" <td>0.73</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Budweiser</td>\n",
" <td>144</td>\n",
" <td>15</td>\n",
" <td>4.7</td>\n",
" <td>0.43</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18</th>\n",
" <td>Olympia_Goled_Light</td>\n",
" <td>72</td>\n",
" <td>6</td>\n",
" <td>2.9</td>\n",
" <td>0.46</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15</th>\n",
" <td>Pabst_Extra_Light</td>\n",
" <td>68</td>\n",
" <td>15</td>\n",
" <td>2.3</td>\n",
" <td>0.38</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" name calories sodium alcohol cost cluster\n",
"9 Budweiser_Light 113 8 3.7 0.40 0\n",
"11 Coors_Light 102 15 4.1 0.46 0\n",
"8 Miller_Lite 99 10 4.3 0.43 0\n",
"19 Schlitz_Light 97 7 4.2 0.47 0\n",
"4 Heineken 152 11 5.0 0.77 1\n",
"5 Old_Milwaukee 145 23 4.6 0.28 1\n",
"6 Augsberger 175 24 5.5 0.40 1\n",
"7 Srohs_Bohemian_Style 149 27 4.7 0.42 1\n",
"2 Lowenbrau 157 15 0.9 0.48 1\n",
"10 Coors 140 18 4.6 0.44 1\n",
"1 Schlitz 151 19 4.9 0.43 1\n",
"12 Michelob_Light 135 11 4.2 0.50 1\n",
"13 Becks 150 19 4.7 0.76 1\n",
"14 Kirin 149 6 5.0 0.79 1\n",
"16 Hamms 139 19 4.4 0.43 1\n",
"17 Heilemans_Old_Style 144 24 4.9 0.43 1\n",
"3 Kronenbourg 170 7 5.2 0.73 1\n",
"0 Budweiser 144 15 4.7 0.43 1\n",
"18 Olympia_Goled_Light 72 6 2.9 0.46 2\n",
"15 Pabst_Extra_Light 68 15 2.3 0.38 2"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# save the cluster labels and sort by cluster\n",
"beer['cluster'] = km.labels_\n",
"beer.sort_values(by='cluster')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"查看聚类结果中各个簇的中心点坐标"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[102.75 , 10. , 4.075 , 0.44 ],\n",
" [150. , 17. , 4.52142857, 0.52071429],\n",
" [ 70. , 10.5 , 2.6 , 0.42 ]])"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# review the cluster centers\n",
"km.cluster_centers_"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"用pandas查看各类别样本的坐标均值并回答是否和先前计算相同\n",
"\n",
"答:"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_50398/58857758.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" beer.groupby('cluster').mean()\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>calories</th>\n",
" <th>sodium</th>\n",
" <th>alcohol</th>\n",
" <th>cost</th>\n",
" </tr>\n",
" <tr>\n",
" <th>cluster</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>102.75</td>\n",
" <td>10.0</td>\n",
" <td>4.075000</td>\n",
" <td>0.440000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>150.00</td>\n",
" <td>17.0</td>\n",
" <td>4.521429</td>\n",
" <td>0.520714</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>70.00</td>\n",
" <td>10.5</td>\n",
" <td>2.600000</td>\n",
" <td>0.420000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" calories sodium alcohol cost\n",
"cluster \n",
"0 102.75 10.0 4.075000 0.440000\n",
"1 150.00 17.0 4.521429 0.520714\n",
"2 70.00 10.5 2.600000 0.420000"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# calculate the mean of each feature for each cluster\n",
"beer.groupby('cluster').mean()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/tmp/ipykernel_50398/1501469021.py:2: FutureWarning: The default value of numeric_only in DataFrameGroupBy.mean is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.\n",
" centers = beer.groupby('cluster').mean()\n"
]
}
],
"source": [
"# save the DataFrame of cluster centers\n",
"centers = beer.groupby('cluster').mean()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"聚类结果可视化\n",
"\n",
"> **要求** 请运行、阅读和理解以下程序,并通过添加`注释`或者`markdown cell`,以说明每段代码的功能。"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"# allow plots to appear in the notebook\n",
"%matplotlib inline\n",
"import matplotlib.pyplot as plt\n",
"# set the font size\n",
"plt.rcParams['font.size'] = 14\n"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"# create a \"colors\" array for plotting\n",
"import numpy as np\n",
"colors = np.array(['red', 'green', 'blue', 'yellow'])"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'alcohol')"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkMAAAG6CAYAAAALTELXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAhUlEQVR4nO3deXwUVb7+8ac6nTRZ6CAQ9pAIAjogiyyKIKugFxTBhRkVBETEbbxXUBl0ZnBDxvkBV68zLsggIqKCgisosgsigqADCAJjCEhYIktCgIQkfX5/ZJIhZus0ne5O1+ftq1/aVaeqvn1M6IeqU6csY4wRAACATTmCXQAAAEAwEYYAAICtEYYAAICtEYYAAICtEYYAAICtEYYAAICtEYYAAICtOYNdQFXzeDxKS0tTzZo1ZVlWsMsBAABeMMbo5MmTatSokRyOqj13E/ZhKC0tTYmJicEuAwAA+GD//v1q0qRJlR4j7MNQzZo1JRV0ptvtDnI1AADAG5mZmUpMTCz6Hq9KYR+GCi+Nud1uwhAAANVMIIa4MIAaAADYGmEIAADYGmEIAADYGmEIAADYGmEIAADYGmEIAADYGmEIAADYGmEIAADYWthPuggAAPwrOy9bC7Yv0Bc/faHsvGw1u6CZ7uxwp1rWaRns0nxCGAIAAF774l9f6Hfv/U7Hso8pwoqQkZHDcui5dc9peNvheu361+RyuoJdZqUQhgAAgFfW7VunAfMGyGM8kqR8ky9JRe/f2vqWsvOy9e7N7wbkMRr+wpghAADglfFLx8tjPEXh59c8xqMFPyzQV/u/CnBl54cwBAAAKrTtyDZtOLChzCBUyOlw6pVvXwlQVf5BGAIAABXaenirV+3yPHnacnBLFVfjX4QhAABQoQhHhNdtHVb1ihfVq1oAABAUXRp3kaWKB0U7HU71SOoRgIr8hzAEAAAqlFwrWddedK0irPLPEOV58nRvp3sDVJV/EIYAAIBXnr/2edV01Sw3EE3oNkGt67UOYFXnjzAEAAC80rJOS31151dq16CdJCnCilCkI1KSFBsZqyl9p2hK3ynBLNEnTLoIAAC8dknCJfr27m+18cDGYo/juOU3tyg2KjbY5fmEMAQAACqtc+PO6ty4c7DL8AsukwEAAFsjDAEAAFsjDAEAAFsjDAEAAFsjDAEAAFsjDAEAAFsjDAEAAFsjDAEAAFsjDAEAAFtjBmoAAMLY4azD+seWf2hFygrl5OXoNwm/0d0d71bHRh2DXVrIsIwxJthFVKXMzEzFx8crIyNDbrc72OUAABAwr337mu5ffL/yTb48xiNJcjqcyvPk6ebf3Kw3h7ypGs4aQa6ydIH8/ubMEAAAYejdbe/q7k/uLrE8z5MnSVq4Y6EkacEtCwJaVyhizBAAAGHGYzx65ItHKmzz3g/vafPBzQGqKnQRhgAACDPLf1qu/Zn7K2zndDj12revBaCi0EYYAgAgzPx49EdZsipsl+fJ0/b07QGoKLQRhgAACDNREVEy8u7+KJfTVcXVhD7CEAAAYaZnUk+v2jksh/ok96niakIfYQgAgDDTqm4r9UrupQgrotx2EVaERl82OkBVhS7CEAAAYWjGdTPkdrnLDUQvD3xZ9WLrBbCq0EQYAgAgDLWo00Ib7tqgHkk9ipYVDqpOik/S/Jvnc1bo35h0EQCAMNWiTgutGLFCP/7yo1anrlZufq5a1W2lPhf2kcPifEghwhAAAGGuVd1WalW3VbDLCFnEQgAAYGucGQIABF1GdoYOnzqsmlE11SCugSyr/AkDjTE6mHVQp86eUv24+nK7eBA3fBeSZ4aSk5NlWVapr169egW7PACAn3yb9q2GLhiqOn+to1Z/a6VG0xupw6sd9MZ3b8iYkpMGeoxHs7bMUttX2qrx9MZq+beWqvPXOrr1vVv13aHvAv8BEBZC9sxQfHy8/ud//qfE8uTk5IDXAgDwv0U7Fmnoe0MlI+Wb/KLlW49s1cgPR2p16mrNHDSzaKCvx3g0fNFwzds6r9ijJvI8eXpvx3t6f8f7WvTbRRrYcmDAPwuqN8uUFr2DrDDw7N2797z3lZmZqfj4eGVkZMjt5jQqAISCvSf2quWLLZXnySv3sRF/+6+/6f4u90uSpq+froeXPlxme0uWXE6X9vx+jxq7G1dJ3QicQH5/h+RlMgBAeHtl0yvyGE+5QciSpanrp8pjPMr35Gva+mnltjcyys3P1YxvZ1RFyQhjIXuZLCcnR7Nnz1ZaWprcbrc6d+6syy+/3KvtcnJyit5nZmZWZZkAAB/M2zqv2KWx0hgZ7T2xV98f+l45+TlKO5lW4X7zTb7mbZunJ3s/6a9SYQMhG4YOHTqkUaNGFVvWuXNnvf3222revHmZ202ZMkVPPskvAQCEsoycDK/bHs8+rpy8nIob/tuJ7BM+VAQ7C8nLZKNGjdLy5ct1+PBhnTp1Slu2bNHw4cO1ceNG9e3bVydPnixz24kTJyojI6PotX///gBWDgDwRkJMgtdt68fWr9Tzs+rH1velJNhYSIahSZMmqU+fPqpXr55iYmLUvn17zZkzR8OHD1dqaqpee+21Mrd1uVxyu93FXgCA83c697RmbZmlHq/3ULMXmqnjjI56bu1z+uX0L5Xe18j2Iyt8HIQlS23qtVGeJ0+vffuaIh2RFe7XYTk0sv3IStcDewvJu8nKsm7dOnXv3l033nij3n//fa+24W4yADh/P6T/oH5v9lPayTRZsooGMjssh2o4a+j9oe/r2ouu9Xp/h7MOq8WLLXQq95Q8xlNmu4EtBurT3Z/K6XAqz5NX7j4dlkNul1t7fr9HdWLqeF0LQhN3k5Whbt26kqRTp04FuRIAsI9fTv+iPm/00eGsw5JU7I4uj/HoTO4Z3fDODdpycIvX+6wfV1+Lb1+smMgYRVgRxdY5rYLhrH0u7KNPd38qSRUGoQgrQnFRcVpy+xKCECqtWoWhDRs2SGLiRQAIpBnfzlD66fQy7/4yMvJ4PJqydkql9tu9aXdtu3ebHrriIdVy1ZJUEGoGtBygT279RN8c+Mar/bhdbo3vOl7b7t2mK5pcUakaACkEL5Pt3LlTTZs2VUxMTInlvXv31qFDh7R69Wr16NHDq/1xmQwAzk/S/yZpX+a+CttFWBE68sgR1Y6uXeljGGOUk5+jqIgoOSyH3vz+Td3xwR1eHfPBLg9q+rXTK31MhLZAfn+H3K3177zzjqZPn64ePXooKSlJsbGx2rVrlxYvXqzc3FxNnDjR6yAEADg/xhjtz/Turtx8k6/9Gft9CkOWZamGs0bR+5QTKYp0RCrXk1vudh7j0d6MvZU+HnCukAtDvXv31o4dO7RlyxZ9+eWXOn36tOrWrasBAwbovvvuU//+/YNdIgDYhmVZioqIUk6+d/P8REdG++W4NZw1yh1YXchhOfx2TNhXyIWhnj17qmfPnsEuAwDwb9dedK0+3f1phYOYm8Y31UW1L/LLMfs3768JyyZU2C7f5Oua5tf45Ziwr2o1gBoAEHi/7/L7CoOQJUsPdnmwwrmDvNW+QXt1bdK1xJ1m53JYDl1Q4wINbT3UL8eEfRGGAADl6tusr8Z3HV/meofl0NXNrtaDlz/o1+POGTJHtWrUKjUQRVgRirAiNP+W+cXGGgG+IAwBACr0//r9P7004CU1cTcptjzeFa8J3Sbok9s+UWRExTNEV8ZFtS/SxjEbdX2r60ucceqa2FWrR67W1c2u9usxYU8hd2u9v3FrPQD4T74nX+v2r9OhrEOKd8WrR1KPgAxgPpB5QN8c+Eb5Jl9t6rXRxXUvrvJjIrgC+f1NGAIAACGHx3EAAAAESMjdWg8ACG17ju3RoaxDcrvcalOvjV/uINt7Yq9+zvxZcVFxurTepTp25ph2H9stp8OpNvXaKCYypuKdAD4iDAEAvPLhzg81+cvJ2pi2sWjZhbUu1Piu43Vv53t9CkVf/OsLPb3maX2578uiZTWcNZSTl1P0QNiaUTU1usNo/annn3ya3RqoCGOGAAAVmvbVND38xcNyWI5iM0NbsmRkNLztcM0ePLtSgWjWllm666O75LAcZT4EtlCEFaF
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# scatter plot of calories versus alcohol, colored by cluster (0=red, 1=green, 2=blue)\n",
"plt.scatter(beer.calories, beer.alcohol, c=colors[beer.cluster], s=50)\n",
"\n",
"# cluster centers, marked by \"+\"\n",
"plt.scatter(centers.calories, centers.alcohol, linewidths=3, marker='+', s=300, c='black')\n",
"\n",
"# add labels\n",
"plt.xlabel('calories')\n",
"plt.ylabel('alcohol')"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.19454664171120434"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# calculate SC for K=3\n",
"from sklearn import metrics\n",
"from sklearn.preprocessing import StandardScaler\n",
"scaler = StandardScaler()\n",
"scaler.fit(X)\n",
"X_scaled=scaler.transform(X)\n",
"metrics.silhouette_score(X_scaled, km.labels_)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [],
"source": [
"# calculate SC for K=2 through K=19\n",
"k_range = range(2, 20)\n",
"scores = []\n",
"for k in k_range:\n",
" km = KMeans(n_clusters=k, random_state=1)\n",
" km.fit(X_scaled)\n",
" scores.append(metrics.silhouette_score(X_scaled, km.labels_))"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkUAAAG6CAYAAAAGUjKQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAABv0ElEQVR4nO3deVhUZf8G8PvMwr7vCMgi4JJbCrigIu6ZqampWa5lmZXti7aoLba8+jPbLDMBS1OLzCx3BRQFV1xyBUREERSQXWBgzu8PAiUQhmHgDHB/rovrfTnnzDP3PBzyyznPeR5BFEURRERERK2cTOoARERERPqARRERERERWBQRERERAWBRRERERASARRERERERABZFRERERABYFBEREREBABRSB2gu1Go1UlNTYW5uDkEQpI5DREREGhBFEXl5eWjTpg1kstqvBbEo0lBqairc3NykjkFERERaSElJgaura63HsCjSkLm5OYDyTrWwsJA4DaBSqbBr1y4MGzYMSqVS6jiSYT/cxb4ox34ox364i31RrrX2Q25uLtzc3Cr/Ha8NiyINVdwys7Cw0JuiyMTEBBYWFq3q5P4v9sNd7Ity7Idy7Ie72BflWns/aDL0hQOtiYiIiMCiiIiIiAgAiyIiIiIiACyKiIiIiACwKCIiIiICwKKIiIiICACLIiIiIiIALIqIiIiIALAoIiIiIgLAooiIiIgIAIsiIiIiIgAsioiIiIgAsCiiRlJYUgpRFKWOQUREpDGF1AGoZUnLKcKKvfHYdCwFfu7W+GG6HyyMWt9qzERE1PywKCKdyC4swcqoRIQevILiUjUA4HBSFqb8EIu1s3rBxtRA4oRERES14+0zapDCklJ8E5GA/p9H4PuoyyguVcPfwxrLHusGW1MD/HM9FxO/j0FaTpHUUYmIiGrFK0WkFVWZGhuOpuDLvfG4lVcMAOjgZI63RnTAwPb2EAQB3dysMPXHw0i4mY/Hvj+EdU/1RltbE4mTExER1YxFEdWLWi1i6+lU/N/uS0jOLAQAtLUxwWvDfPFI1zaQyYTKY70dzPDrnD54YvVhJGcWYsJ3h/Dz073g62guVXwiIqL7YlFEGhFFEZGXbuHzHRdx/kYuAMDOzBAvDfbGJP+2MFDUfCfW1doEvz7bB1N/PIKL6XmY9H0M1s7qhS6ulk0Zn4iIqE4siqhOx5Oz8NmOiziSlAUAMDdUYM7AdpgZ6AETg7pPIQcLI2x8tjemrzmCU9dy8PgPsfhxuh96edk2dnQiIiKNsSii+7qYlof/7byIPefTAQCGChlm9PXAnKB2sK7n02RWJgZYN7s3ngo9isNJWZi25gi+n9oTA9s7NEZ0IiKiemNRRNWkZBVi+Z5L2Bx3HaIIyGUCJvq5Yt5gHzhbGmvdrpmhAmGzAjB33Qnsu3ATs9cew4rJD2JkF2cdpiciItIOiyKqdCuvGN9EJGDd4WSoyspno364izNeHeaLdvZmOnkPI6Uc30/tiVc2nsRfp2/ghfUn8On4rpjo56aT9omIiLTFooiQV6TCD/svY3V0EgpLygAA/X3s8Mbw9ujqaqXz91PKZVgx+UGYGSqw4WgK3vztNAqKSzEz0FPn70VERKQpFkWtWJGqDD/HJuObiATcLlQBALq5WuLNER0Q6G3XqO8tlwn4ZFwXmBkqsDo6CYu3nkN+USleGOQNQRDqboCIiEjHWBS1QqVlavx+4jq+2HMJqf/ONN3O3hRvDG+P4Q84NVlRIggC3nm4I8yNlFi+5xKW7b6EvOJSzH+oAwsjIiJqciyKWhFRFLHzbBr+t/MiEm8VAACcLY3wyhBfjOvhAoW86Vd9EQQBLw3xgZmRAh/+dQ6r9l9GXlEpPhrbGXIZCyMiImo6LIpakcVbzyH00BUAgLWJEs8He+PJ3u4wUsqlDQbgqX6eMDdU4O3fT+OXI1dRUFyKZRO7QSlBoUZERK0Ti6JWori0DBuOXgUAPDewHZ4b2A4WRkqJU1U10d8NJoZyvLzhJP48lYrCklJ8PaWHXhRtRETU8vHP8FbiRHI2ilRq2Jsb4s3h7fWuIKowqmsb/DDND4YKGfacv4mZIUeRX1wqdSwiImoFWBS1EocSMwAAfdvZ6v0g5uAODgibFQAzQwViLmfiydWHkV1YInUsIiJq4VgUtRIHE8qLosB2jfuova709rLF+tm9YGWixMmUbExeFYubeUVSxyIiohaMRVErkFekwqlrOQCAvt7NZxHWrq5W2PhMHziYG+JCWh4mfR+L69l3pI5FREQtFIuiVuBIUhbK1CLcbU3gam0idZx6ae9kjl/n9IGrtTGSMgrw2MpDuHwrX+pYRETUArEoagUOJmQCAPo2k1tn/+Vua4pf5/RBO3tTpOYUYeL3MTiXmit1LCIiamFYFLUCFYOsA5vRrbP/crY0xqZn++CBNhbIyC/B5FUxOHH1ttSxiIioBWFR1MJl5BfjQloeAKCPV/MtigDA1swQ62f3hp+7NXKLSvHk6sM4lJgpdSwiImohWBS1cBVFQydnC9iaGUqcpuEsjZVY+1QA+vvYobCkDLN/jsOZLP2eYoCIiJoHFkUt3MH45n/r7L9MDBRYPd0PIx5wQkmpGmsuyvDPdY4xIiKihmFR1MIdrJi00bt5DrK+H0OFHF9PeRCD2ttDDQFr/l3TjYiISFssilqwq5mFuHb7DhQyAQEeNlLH0TmFXIZ5g9oBAHacTefkjkRE1CAsilqwiqtED7a1gqlhy1z794E2FvAwE6EqE7HhSIrUcYiIqBljUdSCVSzt0VznJ9LUAGc1AGDd4WSoytQSpyEiouaKRVELpVaLiPn3ybPAFjae6L+62YiwMzNAem4xdp5NkzoOERE1UyyKWqiL6XnILCiBsVKO7m5WUsdpVAoZMNnPFQCw9lCyxGmIiKi5YlHUQlXcOgvwtIGBouX/mCf7u0IhE3DkShaXACEiIq20/H8tW6lDlbfOWs78RLVxtDDCiM5OAIC1MVekDUNERM0Si6IWSFWmxuHLrWM80b2m9/UAAPxx8jqyC0ukDUNERM0Oi6IW6FRKNgpKymBjaoCOThZSx2kyfu7W6ORsgSKVGpuO8fF8IiKqHxZFLdDBhPKrRH28bCGTtZ51wQRBwPS+7gCAn2KTUaYWJU5ERETNCYuiFuju0h6tYzzRvUZ3c4GlsRIpWXcQceGm1HGIiKgZYVHUwhSWlCLu6m0AQGALn7SxJsYGckz2dwMAhHHANRER1QOLohbm6JXbUJWJcLEyhrutidRxJPFkb3cIAnAgPgOJt/KljkNERM0Ei6IW5lDl0h62EITWM57oXm42JhjcwREA8FMMJ3MkIiLNsChqYSrGE7WmR/FrUjHg+rfj15BfXCpxGiIiag5YFLUg2YUlOPvvbM5927W+Qdb3CmxnBy97U+QXl2LziWtSxyEiomaARVELEpOYCVEEfBzM4GBhJHUcSclkAqb38QAAhMUkQxT5eD4REdWORVELEp3AW2f3GtfDBaYGciTczK9c9oSIiOh+WBS1IHfXO2NRBADmRkqM7+kKAAg7dEXaMEREpPdYFLUQqdl3kJRRAJkA9PKykTqO3pj27y20PefTce12obRhiIhIr7EoaiEO/nvrrKurFSyMlBKn0R/eDmbo520HtQj8HHtV6jhERKTHWBS1EHdvnbXup85qMq1P+eP5G45eRZGqTOI0RESkr1gUtQCiKFZeKWqNS3vUZXBHR7hYGSO7UIU/T6VKHYeIiPQUi6IWIPFWPm7mFcNQIUMPd2up4+gduUzA1H+vFoUdusLH84mIqEYsilqAgwnlt878PKxhpJRLnEY/TfJzg6FChrOpuTjx74K5RERE92JR1AIcrFzvjLfO7sfa1ABjurcBAIQd4npoRERUHYuiZq5MLSLmMucn0kTF4/nbztzAzdwiacMQEZHeYVHUzP2Tmou8olKYGynQxcVS6jh6rbOLJfzcrVGqFrH+CB/PJyKiqvS2KDp69ChGjhwJKysrmJqaonfv3ti0aZPW7d2+fRsuLi4QBAEjRozQYVJpxfz7KH4fL1vIZYLEafTftL4eAIB1h6+ipFQ
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot the results\n",
"plt.plot(k_range, scores)\n",
"plt.xlabel('Number of clusters')\n",
"plt.ylabel('Silhouette Coefficient')\n",
"plt.grid(True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# PCA实验"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"请使用PCA算法将前文中的啤酒数据`X`降维到2维空间并绘制出降维之后的数据点并且计算降维导致的重建误差。\n",
"\n",
"参见:[PCA算法文档](https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html)\n",
"\n",
"> 提示着重看文档中的示例Examples"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[-11.41071341 -18.7563276 -24.2856944 -36.55439065 -19.00636267\n",
" -13.15190363 -43.12530515 -17.5095788 33.85968231 20.12053057\n",
" -7.70749236 30.40940729 -2.06904 -17.75787186 -15.55267904\n",
" 64.28684296 -6.80198502 -12.25534761 61.13554293 36.13268615]\n"
]
},
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7fd3c9bb7ac0>"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjYAAAGiCAYAAAD0qYz9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjYuMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy89olMNAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA0sklEQVR4nO3deXxU1eH+8efOTDKBhAQ0rBISlrBVhC+yaWRTAaVUrWAR2RW+qKXVgoooitZS1OLS9uvPWqmAosWF1lIFQWWTiIiyFCxIEFkEWVzIJAFCMnN+f8REY7YhJHNn7nzefeUlmTlJnpyGzMO5595rGWOMAAAAHMBldwAAAICaQrEBAACOQbEBAACOQbEBAACOQbEBAACOQbEBAACOQbEBAACOQbEBAACO4bE7QKgFAgEdOnRI9erVk2VZdscBAABBMMYoJydHzZo1k8tV8bpM1BWbQ4cOKSUlxe4YAACgGg4cOKDmzZtX+HzUFZt69epJKpqYxMREm9MAAIBg+Hw+paSklLyOVyTqik3x4afExESKDQAAEaaqbSRsHgYAAI5BsQEAAI5BsQEAAI5BsQEAAI5BsQEAAI5BsQEAAI5BsQEAAI4RddexcYICf4GO5h1VjDtGDes25NYQAAB8h2ITQQ76Durx9Y9r7ua58uX7JEnp56TrVz1+pUndJinWHWtzQgAA7GUZY4zdIULJ5/MpKSlJ2dnZEXXl4U+OfqJ+C/rp25Pfym/8JY9bKlqt6ZPaR8tGLlOdmDo2JQQAoPYE+/rNHpsIUOAv0OCXBpcpNZJkvvvfe/vf05QVU2xKCABAeKDYRIDXd76u/dn7y5SaHwqYgJ7b/Jy+OflNCJMBABBeKDYR4JVPXpHbclc57rT/tP796b9DkAgAgPBEsYkAx04cq3S1ppjbcuvrk1+HIBEAAOGJYhMBGsc3DmrFxm/8ali3YQgSAQAQnig2EWBEpxFBrdjEueN0VburQpAIAIDwRLGJAEPaDlHrBq3lcVV82SGX5dLN3W5WUlxSCJMBABBeKDYRwOPyaNnIZWpYt2GZQ1Iuq+j/wkGtB+mRAY/YEQ8AgLBBsYkQ6eema+vNWzWjz4xS+2guaHyBnrvqOS0ZsYQrDwMAoh5XHo5Axhj58n3yuDyKj423Ow4AALUu2Ndv7hUVgSzLYi8NAADl4FAUAABwDFZsgB/56sRX2nZkm4yMzm90vhrFN7I7EgAgSBQb4Duff/u5ZqyaoVc+eUWFgUJJRWekDe0wVL+79Hdqc04bmxMCAKrCoShA0qdffapuz3bTy9tfLik1klQYKNRr/31N3Z/tru1Ht9uYEAAQDIoNop4xRsNeHSZfvq/cKzz7jV85+Tka+spQRdlJhAAQcSg2iHqZBzK1/ej2Uis1P+Y3fu36epdWfr4yhMkAAGeKYoOo98auNyq9XUUxj8ujN3a9EYJEAIDqotgg6uWdzpMlK7ixBXm1nAYAcDZsKTYLFy7UpEmT1K1bN3m9XlmWpfnz51c43ufzacqUKUpNTZXX61VaWpruvPNO5ebmhi40HCslKSWou6cbY5SSmBKCRACA6rKl2MyYMUN//etftW/fPjVt2rTSsXl5eerbt6+eeOIJtW/fXr/5zW/Url07zZkzR5deeqlOnToVotRwqlEXjApqXMAENKbzmFpOAwA4G7YUm7lz52rv3r06duyYbr755krHPvroo9qyZYumTZum5cuX6+GHH9by5cs1bdo0bdy4UU888USIUsOpmtVrpgn/M6HkTunlcVkuje48Wqn1U0OYDABwpmwpNpdffrlSU6t+gTDGaO7cuUpISNB9991X6rn77rtPCQkJmjt3bm3FRBT58+A/65r210hSqY3EbsstSRqcPljPDHnGjmgAgDMQ1puHs7KydOjQIWVkZCg+vvRdrOPj45WRkaE9e/bowIEDNiWEU8S6Y/Xada/prZFv6co2V6ph3YZqWLehrmhzhd684U396/p/Kc4TZ3dMAEAVwvqWCllZWZKk9PT0cp9PT0/X8uXLlZWVpZSU8jd15ufnKz8/v+R9n89X80HhCJZlaVCbQRrUZpDdUQAA1RTWKzbZ2dmSpKSkpHKfT0xMLDWuPLNnz1ZSUlLJW0UFCAAARL6wLjY1Yfr06crOzi5547AVAADOFdaHoopXaipakSk+rFTRio4keb1eeb3emg8HAADCTliv2BTvrSnea/NjVe3BAQAA0SXsi02zZs2UmZmpvLzSl7LPy8tTZmamWrZsyb4ZAAAgKcyLjWVZmjBhgnJzc/XQQw+Veu6hhx5Sbm6uJk6caFM6AAAQbixjjAn1F507d67WrVsnSdq2bZs2bdqkjIwMtWnTRpJ0ySWXaMKECZKKVmYyMjK0detWDRw4UF27dtWmTZu0YsUKde/eXWvWrFGdOnWC/to+n09JSUnKzs4uOasKAACEt2Bfv23ZPLxu3TotWLCg1GOZmZnKzMwseb+42MTHx2vNmjV64IEHtHjxYq1atUpNmzbV1KlTNXPmzDMqNQAAwNlsWbGxEys2AABEnmBfv8N6jw0AAMCZoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNgAAADHoNjAsbJPZevzbz/X8VPH7Y4CAAgRj90BgJq26vNV+sP7f9Bbu9+SkZEkDWg1QHdefKcGtB5gczoAQG1ixQaO8n8f/p8uff5SrfhsRUmpkaSVn6/UwIUDNef9OTamAwDUNooNHGPd/nX61bJfSZL8xl/queL373z7Tr27592QZwMAhAbFBo7xxPon5HFVfnTVbbn1+PrHQ5QIABBqFBs4wmn/af3r03+pMFBY6Ti/8WvZ7mXKPZ0bomQAgFCi2MARcvJzyhx+qoiR4UwpAHAoig0cIdGbqBhXTFBjXZZLDeIa1HIiAIAdKDZwhBh3jK77yXVV7rHxuDy6pt01io+ND1EyAEAoUWzgGL/p9RsFTKDSMf6AX1MumhKiRACAUKPYwDG6Neum+VfPl8tylVm58bg8smTp2Z89q4wWGTYlBADUNooNHGV059H6aOJHGnH+iJI9NzGuGP3iJ7/QhgkbdFPXm2xOiKjj80l//KPUoYMUFyclJkrDh0vvvWd3MsCRLGOMqXqYvdLS0rRv375yn+vbt69Wr14d9Ofy+XxKSkpSdna2EhMTayghwpE/4FdeQZ7iY+LldrntjoNotGeP1K+f9MUXRe8X/7r1eKTCQumuu6SHH5Ysy7aIQKQI9vU7Yu4VlZSUpNtvv73M42lpaSHPgsjgdrmV6KW8wib5+dLll0tffvl9oSlW+N31lh59VEpLk265JeTxAKeKmBUbSdq7d+9Zfy5WbACExEsvSSNHVj2uWTNp/37JzaoiUJlgX7/ZYwMAtWHePMkVxK/YQ4ekNWtqPw8QJSLmUFR+fr7mz5+vQ4cOKTExUd27d1fPnj3tjgUA5fviCylQ+eUHShw6VLtZgCgSMcXm8OHDGj9+fKnHunfvrr///e9q3bp1hR+Xn5+v/Pz8kvd9Pl+tZQSAEmdyqJvD4kCNiYhDUePHj9e7776rI0eOKC8vT5s3b9bo0aO1ceNGXXbZZcrJyanwY2fPnq2kpKSSt5SUlBAmBxC1hg0L7lBU3bpS//61nyecHDkivftu0duxY3angcNExObhiowZM0YvvPCCHnvsMU2ZUv7VZMtbsUlJSWHzMIDa9fXXUosW0smTZc+KKuZySb/6lfTkkyGNZpvdu6V
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.decomposition import PCA\n",
"pca = PCA(n_components=2)\n",
"pca.fit(X)\n",
"X_pca = pca.transform(X)\n",
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=colors[beer.cluster], s=50)\n"
]
}
],
"metadata": {
"kernelspec": {
2022-12-09 16:14:16 +00:00
"display_name": "Python 3 (ipykernel)",
2022-11-23 03:03:21 +00:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
},
"vscode": {
"interpreter": {
"hash": "1f0d395e06aa83586067b19165efc9b683889967164248deef4bbf1fa27cfb00"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}