{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Supervised Learning\n", "## Overfitting and Regularization\n", "\n", "Alexander Goncearenco\n", "\n", "March 7, 2019" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Recap supervised learning and regression" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_boston\n", "\n", "boston = load_boston()\n", "X = boston.data\n", "y = boston.target\n", "\n", "# boston.DESCR.splitlines()\n", "# ?load_boston" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "('X:', (506, 13), 'y:', (506,))" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\"X:\", X.shape, \"y:\", y.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7406426641094095" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import LinearRegression\n", "\n", "lr_house_price = LinearRegression().fit(X, y)\n", "lr_house_price.score(X, y)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "predicted_price = lr_house_price.predict(X)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(506,)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicted_price.shape" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "\n", "import pandas as pd\n", "import seaborn as sb\n", "\n", "d = pd.DataFrame(X)\n", "d['predicted_price'] = pd.Series(predicted_price)\n", "d['actual_price'] = pd.Series(y)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112predicted_priceactual_price
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.9830.00384324.0
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.1425.02556221.6
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.0330.56759734.7
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.9428.60703633.4
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.3327.94352436.2
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 10 \\\n", "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 \n", "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 \n", "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 \n", "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 \n", "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 \n", "\n", " 11 12 predicted_price actual_price \n", "0 396.90 4.98 30.003843 24.0 \n", "1 396.90 9.14 25.025562 21.6 \n", "2 392.83 4.03 30.567597 34.7 \n", "3 394.63 2.94 28.607036 33.4 \n", "4 396.90 5.33 27.943524 36.2 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### How we evaluate performance of models?" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789101112predicted_priceactual_pricesquared_error
00.0063218.02.310.00.5386.57565.24.09001.0296.015.3396.904.9830.00384324.036.046135
10.027310.07.070.00.4696.42178.94.96712.0242.017.8396.909.1425.02556221.611.734478
20.027290.07.070.00.4697.18561.14.96712.0242.017.8392.834.0330.56759734.717.076757
30.032370.02.180.00.4586.99845.86.06223.0222.018.7394.632.9428.60703633.422.972499
40.069050.02.180.00.4587.14754.26.06223.0222.018.7396.905.3327.94352436.268.169392
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 10 \\\n", "0 0.00632 18.0 2.31 0.0 0.538 6.575 65.2 4.0900 1.0 296.0 15.3 \n", "1 0.02731 0.0 7.07 0.0 0.469 6.421 78.9 4.9671 2.0 242.0 17.8 \n", "2 0.02729 0.0 7.07 0.0 0.469 7.185 61.1 4.9671 2.0 242.0 17.8 \n", "3 0.03237 0.0 2.18 0.0 0.458 6.998 45.8 6.0622 3.0 222.0 18.7 \n", "4 0.06905 0.0 2.18 0.0 0.458 7.147 54.2 6.0622 3.0 222.0 18.7 \n", "\n", " 11 12 predicted_price actual_price squared_error \n", "0 396.90 4.98 30.003843 24.0 36.046135 \n", "1 396.90 9.14 25.025562 21.6 11.734478 \n", "2 392.83 4.03 30.567597 34.7 17.076757 \n", "3 394.63 2.94 28.607036 33.4 22.972499 \n", "4 396.90 5.33 27.943524 36.2 68.169392 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Squared error for each example:\n", "\n", "d['squared_error'] = (d['predicted_price'] - d['actual_price'])**2\n", "d.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-21.894831181729202" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Overall performance: mean squared error:\n", "\n", "-d['squared_error'].mean()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "21.894831181729202" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# It is the same as mean_squared_error in scikit-learn\n", "\n", "from sklearn.metrics import mean_squared_error\n", "mean_squared_error(d['actual_price'], d['predicted_price'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'np' is not defined", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# Sometimes Root Mean Squared Error (RMSE) is used\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# RMSE preserves the scale of the units (e.g. price in dollars)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msqrt\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmean_squared_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'actual_price'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0md\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'predicted_price'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;31mNameError\u001b[0m: name 'np' is not defined" ] } ], "source": [ "# Sometimes Root Mean Squared Error (RMSE) is used\n", "# RMSE preserves the scale of the units (e.g. price in dollars)\n", "np.sqrt(mean_squared_error(d['actual_price'], d['predicted_price']))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## But will the model be able to predict the future cases?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Remember what happens when we train and predict using KNearestNeighbors(K=1)?" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.0" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.neighbors import KNeighborsRegressor\n", "\n", "predicted_price = KNeighborsRegressor(n_neighbors=1).fit(X, y).predict(X)\n", "actual_price = y\n", "mean_squared_error(actual_price, predicted_price)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Solution: split the dataset\n", "\n", "The learning algorithm uses examples only from the training set and it cannot use examples from any of the holdout sets.\n", "\n", "* Training set (70% - 95%)\n", "\n", "* Holdout sets: (5% - 30%)\n", " - Validation set\n", " - Test set\n", " \n", "\n", "We want good performance on a holdout set, using the data the model has not seen before!" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Why do we need two holdout sets?\n", "\n", "We need validation set to choose the learning algorithm and its hyperparameters." ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((339, 13), (339,))" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape, y_train.shape" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((167, 13), (167,))" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_test.shape, y_test.shape" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "20.72402343733974" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Linear Regression\n", "\n", "predicted_price = LinearRegression().fit(X_train, y_train).predict(X_test)\n", "mean_squared_error(y_test, predicted_price)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "44.078263473053894" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# KNN Regression(K=1)\n", "\n", "predicted_price = KNeighborsRegressor(n_neighbors=1).fit(X_train, y_train).predict(X_test)\n", "mean_squared_error(y_test, predicted_price)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Overfitting and Underfitting\n", "\n", "\n", "* Overfitting - model performs well on the training set but performs poorly on the test set\n", "\n", "* Underfitting - model does not perform well on the training set but its performance on test set is not much different\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import PolynomialFeatures\n", "from sklearn.pipeline import Pipeline\n", "\n", "X12 = X[:, 12].reshape(-1, 1) # only one feature only but maintain 2D array shape\n", "X12_train, X12_test, y_train, y_test = train_test_split(X12, y, test_size=0.33, random_state=42)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Examples of underfitting and overfitting:\n", "\n", "m1 - Linear Regression\n", "\n", "m2 - Polynomial regression of the second degree\n", "\n", "m5 - Polynomial regression of the fifth degree" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Model m1:\n", "\n", "$$ y = \\beta_1 x + \\beta_0 $$" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "m1 = LinearRegression(fit_intercept=True)\n", "m1.fit(X12_train, y_train)\n", "m1_train_predicted = m1.predict(X12_train)\n", "m1_test_predicted = m1.predict(X12_test)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "38.71120361270592" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_squared_error(y_train, m1_train_predicted)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "38.410075117662345" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_squared_error(y_test, m1_test_predicted)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sb.scatterplot(X12_train.flatten(), y_train)\n", "sb.scatterplot(X12_train.flatten(), m1_train_predicted)" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sb.scatterplot(X12_test.flatten(), y_test)\n", "sb.scatterplot(X12_test.flatten(), m1_test_predicted)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Model m2:\n", "\n", "$$ y = \\beta_1 x + \\beta_2 x^2 + \\beta_0 $$" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "m2 = Pipeline([('poly', PolynomialFeatures(degree=2)),\n", " ('linear', LinearRegression(fit_intercept=True))])\n", "m2.fit(X12_train, y_train)\n", "m2_train_predicted = m2.predict(X12_train)\n", "m2_test_predicted = m2.predict(X12_test)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "29.442905187577004" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_squared_error(y_train, m2_train_predicted)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "32.51162005950131" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_squared_error(y_test, m2_test_predicted)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sb.scatterplot(X12_train.flatten(), y_train)\n", "sb.scatterplot(X12_train.flatten(), m2_train_predicted)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sb.scatterplot(X12_test.flatten(), y_test)\n", "sb.scatterplot(X12_test.flatten(), m2_test_predicted)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Model m10:\n", "\n", "$$ y = \\beta_1 x + \\beta_2 x^2 + \\beta_3 x^3 + ... + \\beta_{10} x^{10} + \\beta_0 $$" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "m10 = Pipeline([('poly', PolynomialFeatures(degree=10)),\n", " ('linear', LinearRegression(fit_intercept=True))])\n", "m10.fit(X12_train, y_train)\n", "m10_train_predicted = m10.predict(X12_train)\n", "m10_test_predicted = m10.predict(X12_test)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "25.73897546019498" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_squared_error(y_train, m10_train_predicted)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "29.210589562808984" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mean_squared_error(y_test, m10_test_predicted)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXUAAAD8CAYAAACINTRsAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXl8VPW5/9/fM0symSQkhASQgCgqNUVaCCrqvS2uaKVSRcEFEDdAr7XttYpd6LW1i2j9adUiqFVZXBDRilJFpXLb6w5FrcVG3CDIkhAYsk0yy/n+/jiZyazJJGSbyfN+vXiROXPmnO8cyHOe8/l+vs+jtNYIgiAImYHR2wMQBEEQug4J6oIgCBmEBHVBEIQMQoK6IAhCBiFBXRAEIYOQoC4IgpBBSFAXBEHIICSoC4IgZBAS1AVBEDIIe0+ebNCgQXrkyJE9eUpBEIS0Z/Pmzfu01sWp7NujQX3kyJFs2rSpJ08pCIKQ9iiltqe6r8gvgiAIGYQEdUEQhAxCgrogCEIGIUFdEAQhg5CgLgiCkEFIUBcEQcggUrI0KqW+BOqAIBDQWk9QSg0EVgEjgS+B6VrrA90zTAvT1NQ0+PAFgiilsCkwDIMitxPDUHH7maZJUEMgaGIYCpfDwBfU+AImDptBSW4WdrsRdVyn3RY+XrLthzr2VI7VlecWBKH/0BGf+qla630Rr28BNmitb1dK3dLyekGXji4C09RU7K3jmuWb2HnAS2mhi0XTxrLszS/40ZmjGT04LxyIK/bWcferFVx+8hEsWPMhOw94OaushOtPO5rrHv9H+PNLZpYzuiSXT/c1RB33odkTOLo4l23V9XHbQ+c51LG3dayO7i8IghDiUOSXqcCylp+XAd879OEkp6bBxz2v/ps/TDmMv887ij9MOYzlb37OtPLhXLN8EzUNvvB+1yzfxLTy4eGADjCtfHg4oAPsPOBl/srNVNU3h4NnaPs1yzcl3R46T0fH3pFjdXR/QRCEEKlm6hp4RSmlgaVa6weBwVrr3QBa691KqZJEH1RKzQXmAowYMaLTA9VmkN+eYqfohYvAs4PhBSP47XeXsScri50HvPgCQQB8gSA7D3gpcDnCQRGIew1WsAyYOuF2f9BMuD10no4QGlOqx+ro/oIgCCFSzdRP0VqPB84B/ksp9a1UT6C1flBrPUFrPaG4OKXSBQkppJaiFy4Hzw5rg2cHRS9czmHOBkoLXTjtNgCcdhulhS48Xj+lha7w52NfA5QWurAbKuF2h81IuD10no4QGlOqx+ro/oIgCCFSCupa610tf1cBzwEnAHuVUkMBWv6u6q5BAti1vzWgh/DsIOhr4qHZEyhyOwEocjt5aPYE1myuZNG0seHguGZzJYsvGx9+HdLUS3KzeGj2hKjtD82ekHR76DwdITSmVI/V0f0FQRBCKK112zso5QYMrXVdy8+vAr8CTgdqIiZKB2qtb27rWBMmTNCdLuhVvxcePiM6sBeMwH/Fq9jyBrftfjFNDNXqfvEHTOzifhEEIU1QSm3WWk9IZd9UNPXBwHNKqdD+T2itX1ZKvQc8rZS6CtgBXNTZAadETjFc/CQ8dYkV2AtGwIzHcdjtWJJ/a8AzDEVxXlbKh062f0eP05lzdNX+giAIkEJQ11p/DnwjwfYarGy9ZzAMKCmDq14DXz3s/wzW/Qjqq6xgX1Jm7ZMCkgULgpCppNeKUsOwEvJXFkKgGc74JUz+Lbz+W2isTukQIQ/4+Yvf4JRFr3P+4jeo2FuHabYtQwmCIKQD6RXUAUwTTpwH638Kj51r/X3iPGt7CogHXBCETCb9groOwtrro6yNrL3e2p4C4gEXBCGTScOgrhNaG2nHxRNCPOCCIGQy6RfU7U7L+RJJwQhrewqIB1wQhEymRxtPdwmJrI0XP2ltTwHDUIwenMdz150i7hdBEDKO9AvqIWvj1a9BwGdl6DnFKdsZBUEQMpn0C+pgBfDcwZ36qJS1FQQhk+l36a1YGgVByGTSM1NPgWSrRsXSKAhCJpORQb0tiSVkaYwM7GJpFAQhU8hI+aUtiUUsjYIgZDIZmam3JbGIpVEQhEwmM4K6aVoFvVosjjnOAW1KLFLWVhCETCX95RfThKqtVgONe8bAw2dQUPcpK648vl2JxTQ11XXNfHWgkeq6ZqnUKAhC2pP+mXpjdevqUgDPDtRTlzDyqtfalFjEry4IQiaS/pl6wJewwJcK+ijOy2JYYQ7FeVlxgVr86oIgZCLpH9Q7WeBL/OqCIGQi6R/UQwW+QoG9pcCX6RrUpl4uJXgFQchE0l9TT1Dgy3QNoqKqIUovX37lCeRm2/EHTJx2G4UuBw/NnhCnqYtfXRCEdCb9gzrEFfiqqWuO0suLc7PYW9vE7Ec+jArgRxfnil9dEISMIv3llwTE6uXzJ43ipmc+jJsUPeD1tzmZKgiCkG5kZFCP1csLXI5enRQVP7wgCD1FZsgvkZgmg/Dw16tHUbHPxy9e20OjL9hrRbzEDy8IQk+SWZl6y+pS9aczcN43luNeuoBnLihg/Ih8lswsj1phumRmOYUuR7cPSfzwgiD0JJmVqYdWl+aWwOTfgqsQW/1uctyDeeH9ahZOKaPA5cDj9XPvhk/4zflju70GjPjhBUHoSTIrqAd8VkA/41fw5/nhxtS26Sv5ztjh3Lr2Y7ZUesK7/893uz+wSv12QRB6ksySX+xOOOOXVkDPLYEZK+F7D2A07KXIaGD+pFHhXXsqsEr9dkEQepKMyNRDreu0mUdx/jBUbgmc9gtYe304Wx82fSV1hYcDPRtYpX67IAg9idK65+x1EyZM0Js2berSY8a6S7bdPAZHTQVsegy+eQm4CsF7AN5/Et85d1GtB0hgFQQhrVBKbdZaT0hl37SXX2LdJZ8dMGHQaDhxHrz/pBXQ3cVw5i9x2G1tLjQSP7kgCOlO2ssvse6Sn7y8k2cvG4l6Z6kV2CMkGDVjJbgGgi3+a4ufXBCETCDtM/XY1aNbKmsxgwFLegkFdLD+XjUT6vckPI74yQVByATSPqgncpdgs1uSS4LmGQT9CY8jfnJBEDKBtJdfErlLlMsGZsCqrR4Z2AtGgGGzVp4a0fcz8ZMLgpAJpJypK6VsSqktSqkXW14foZR6Rym1TSm1SinVa8Zrw1DR1RbtdsgfZvnUI5tnXLQc3nkIGqrjjiF+ckEQMoGOZOo/AD4G8lteLwLu1lo/pZRaAlwFPNDF4+s8difkDoFLV7dk50F46z7YshJOuCpud/GTC4KQCaQU1JVSpcC5wG+A/1ZKKeA04NKWXZYBt9KXgjpA0AdPXBQvwajEkkoo4xcEQUhXUpVf7gFuBsyW10WAR2sdaHm9ExjWxWM7dBwumLo4WoKZutjaLgiCkIG0m6krpaYAVVrrzUqpSaHNCXZNuFJHKTUXmAswYsSITg6zk7gGQt5QOPcuS2N35oI2IdAEwUBCv7ogCEI6k0pUOwU4Tyn1HSAbS1O/ByhQStlbsvVSYFeiD2utHwQeBKtMQJeMOlUMAwYeCTkD4WAlLJsSXojEjJVQ8nUJ7IIgZBTtyi9a659orUu11iOBi4G/aq0vA14HLmzZ7XLg+W4b5aFgGOD3WguPYhYi6fq9UhZAEISM4lDS1AXAU0qpXwNbgD91zZC6gaA/8UIk08/5S96QsgCCIGQMHVpRqrXeqLWe0vLz51rrE7TWR2mtL9JaN3fPELsAw2idLA1RMAKUXcoCCIKQUaR9mYCUUAZ8b0m8C8bfwLjh+eHdpCyAIAjpTv+YJTTsVrZ+7l3gyLHK8W64FVVfxe+mPMfZD9cCUhZAEIT0J6ODemtHpHyK3SWoR86O2+fIAdbDipQFEAQhE8jYoB7fEek4HAkKfDkcdt5YcKqUBRAEISPIWE09viNSMH516fTlKJuTYQOy47ohSRckQRDSkYzN1BN1RHrsvEEM+N4DkDfEKr/rb4A9H0LhkdYipZZyvKEs/+5XK5hWPpwitxOvL8BhA1zY7Rl7HxQEIQPI2AiVqCPSza/XESgcBbW74fFp8OAkWHcj1O0G7/7wvjUNPu5+tYLLTz6C217cyoVL3uLSh9+hoqpOMnZBEPo0GRfUQ7KJaZosnVUeVR999slHYgZ88Px10atLn78O/E3hY/gCQaaVD2fBmg+jfOzzVmzuUh+7SDyCIHQ1GSW/xE6OnlVWwhNXn4jNUARNza/XbWXCOUVJ2tw1hzsihSZNE7W38/qDmKY+5AlVaXQtCEJ3kFGZeuzk6Ctbq7j04XdapJgcfnP+WJTD1TpZWjrBKux15XqriUaLBFPkdlKSlxUl34CV7X9WVU/F3vZlmLaycNPU7KltkkbXgiB0ORkV1NtqHh1qgOHMHwwzHofR58Jpv4D1P4VHJlsae+0uCAYwDMVhA1xx8s2iaWO5d8O2doNvKAs/f/EbnLLodc5f/Eb4RhB6b5fHK42uBUHocjIqqMdOjkKCVaKGAfmHwVm/grXXx1RuvAzq9wJgtxscOySfVXMnsmruRBZOKeP36yvYUulpN/jGPjFEZuGh92oafO2PVRAEoYNkVFBPuXm0a6DV0i6htt6agRuGwmm3cePqD5i3YjNbKj3h47YVfNt6Ygi9t2TjZyyaNlYaXQuC0KVk1ERpys2jDcNqRp1ghSmGLVxewBcI4rAbLL/yBGY/8m7UhGai4Bv6XFBrHp1zPPdu2JbwRlBa6GJLpYffr69g4ZQyitxODitwMSQ/WyZJBUE4JJTWPWejmzBhgt60adMhHycy6HZ6eX/DPqj6uNXe2FK5MTBoNBV1WcxbsTkcxJdfeQIuhw1f0MRhMyjJzYpbhJTIzXLnhWO54+UKquubw84WQFwvgiB0CKXUZq31hFT2TbtMvcusgJH9SweMAIcLrYMQaObFLVVh+aQ4N4u9tU3c9MyHbZ4vkY5+0zMfsmruxLgbT0pPE4IgCJ0g7TT1tiYhO0Sof+mwCehAEyybgrp3HPZl3+HmcQFuPuNIAOZPGhUO6G2dL5mODsTVlQk5cYYV5sS9JwiCcCikXabe1iRkhzEM8Deinp4FR3wLTvo+GDaU1lx7YgGvVuyjwOVI6XxKKUoLXVH7lha6UEoCtiAIPUfaZeop2RY7QtBvBfQJV8MTF8H9E+Dxaai6XTx6yWgafcGUzmdTxLlZFk0bi01iuiAIPUjaBfWUbYupYnPAyT+A1bOjPetPz2KA0cTxh+endD7DMFj25hcsnFIW9rUve/MLDKNrL7HUixEEoS36r/slRDAAB3fAvePi37thCzhzMXOK2z1fT9RykXoxgtA/yWj3C7RONB4KkTeGwwwHKpFnPRiAQBOGDqZ0vqJcJ09ccyI2pXA5bRS4utbVkmyS+LnrTjnk6yEIQmaQdvJLVxBbm2XlR83o6SuiuyJdtBze/AOYQXTTgaTH2d9gSSGVBxr5vLqBHzz5PjMefJu9tc1dPu4unSQWBCEj6ZdBPTbjffaDvVY9mMvWwPWb4NLVsOlh+OJvoIPQUIMZCEQdwzQ1X9Y0ULGnjhkPvs2379zIj1d/wC3nfI3i3KyUbJYd1ce7fJJYEISMo18G9ciMd9zwAn48eTRfHWyGxhpYeQEsPtEK6N9bAq/8AtVQDY37ogLwvoZmttc0xnnYb1z9AfMnjWo3g26rkmMyunySWBCEjCMtNfVDJZTx7jzgZf6kUSxY8yHFuQ6enFZC9uXroG4XNFTDa7+AnZtg7z9Rc9Zx/tI3KM7N4obTj+bIYjc5TltCOaTA5Wg3g+6MPp5ybRtBEPot/TJTL3I7w7XSQ4uLtlTWcsmaKqtUwCOTYdVMK6CDNYFqBvmPIwfy48mjWfj8R/x7T11SD3ujL9hmBm2aGq8/0Cl9XFajCoLQFhkZ1NvTqg1DMbokjyeuPpGS/NYOR1sqa9nbqFonTMfNhOveges3owwb/3P2iHDf0iUbP6PQ7eDOC6MXHC2dVc7XD8snP9tOTYMv7twh2eWzqgbRxwVB6HLS0qfeFh3xcpumxuP1sdvTxLyVVlXGyWXFLDkzG/XhajhuGjw9K1zFUU9fyU/eCPLU5t2ApcfffPZohg/MwdSQbTeobw7ElemNPHd1XTPnL7ZknB9PHh2+SYjnXBCEZHTEp55xQT0UNGNrsLSlVcfWTy82a1A6CI+dG+ddb579F0bf8WHCY6dy7q8ONHLKotcB66Ywf9KosAY/dIBLArogCHF0JKhnnPzSGS93SKceOsBFTb2Pz/c3gRlI2BnJSYDbpowG4t0nqZw70pa4pdLDvBWbuXH1BzjtNgnogiAcMhkX1A/Fy+3x+thzsIkmx0C0YW/V1kMUjEAZNmYe5+IfPzuV5647JUouSeXcvWFLlHoxgtB/yDj5pbP1UUxT8/Hu2rC2/u4Pv0GJ94vozkjn3Q/vLIVvL4C8wZA3pFPn7tLaNd10PQRB6Dv0a00dOhc0Y/XwccPzeXZ6CQoF3v3g91p9TZVVg53Bx0H+0C45d3fSmTmGrqKvXQtBSFcyvqBXe3Sm4JcvEKQ4N4uFU8oocDnweP1op0b56uCVn8Fpv4Dn/6s1a5+xEnIGgd1xyOfuTnqrXow8IQhC79Cupq6UylZKvauU+kAp9S+l1C9bth+hlHpHKbVNKbVKKZXWa9WznQY3nz2a217cyowH3+a2F7fylT8HbXNYcsva66Prra+aCfV7rEqOCUikY6eibXe1/t1b9WK6rO2gIAgdIpWJ0mbgNK31N4BvAmcrpSYCi4C7tdZHAweAq7pvmN2LaWoam824Oi6X/GkzVRShB45K6IQh0Ax7P4oL7Anruuyp48uahjZrvXSmHkx79Fa9GKkoKQi9Q7tBXVvUt7x0tPzRwGnAMy3blwHf65YR9gA1DT5qm/zhrkVLZ5UzbngBOw94adY2sGUldMJgBqzFSfV7ot7a19Acn6Wu2ERVXXObmWt3ZLeR9WLeWBDv2OkupKKkIPQOKVkalVI2pdT7QBXwKvAZ4NFah1LUncCw7hli19CWrGGaJoGgjpJefjx5NGeVleBy2tG5g+PrrU9fYf08+bdx52ryJ85SB+U647ZFZq7dld32Rr0YqSgpCL1DShOlWusg8E2lVAHwHHBsot0SfVYpNReYCzBixIhEu3Q77U3aBTX81xP/iMqQF6z5kMevPpFCl4OK6noG2g9j8Jx1VqNqww7/fgkKhoG72NoW8IcnTW1KhatAhigtdGGLCaaxmWtk9chk+6QLUlFSEHqHDi0+0lp7gI3ARKBAKRW6KZQCu5J85kGt9QSt9YTi4uJDGWunaU/W0FonzJANpaht9nPN8k3MX11BwAyCUvDyT2D48eAqtP5oExr2WoEdcDltcYW+7rxwLNl2o83MNdOyW6koKQg9T7uZulKqGPBrrT1KKRdwBtYk6evAhcBTwOXA89050EOhPVkjWYb8aVU9JXlZFOdmsaXSw883HOB3pw9ETZwPQR/8+dpWi+PUxeBrhKKjKHA5GZyfzW1Tx5DjtNHoCzI4P5vi3Ow2M1fJbgVBOFRSydSHAq8rpT4E3gNe1Vq/CCwA/lsp9SlQBPyp+4Z5aLQ3aZcoQ140bSz3btjGvJWbueH0owF4avNudtRpyB8Gz82Ntjg+fx0A2rsfw1CMLHIzZtgASgtdjBk2gJFFbux2o93MNZXsVpb9C4KQjHYzda31h8C4BNs/B07ojkF1NaGgHauph2SNUIa8au5Edh7w4vH6+f36CrZUegA4YpCb0kIXxblZ1BoD0OogKpHF0bsfAl7qjVwONpk47bZOV15MthpTFvUIgtAWGbmiNJZUZA3DUDjtNm5c/UGcDJOTZQt/dsaDb/PMZUcypGBEXFleGqpR639KzuUvMuuJHVTX+zsVcBMF7uVXnkButh1/wOxwGzxBEPoPGVelMRmpyBqFLgdPXH0iz8w/iaWzyjmrrITlV1oPIyH9vTg3i2ufr0TPeDza4nje/fDGPeDZgaFNFk87qtM+89iJ3eLcLPbWNnHB4jfZecDb5bZHkXMEIXPoF5l6KpimZlt1fVR2/NgVx9PsN5n9yJvhbXdeOJY7Xq5ge2AgI+esg9qWJtV//ZXV07RgBNRsY0juYF7+r+M5+4/vdTjgxk7szp80Krza1eP1d6ntUeQcQcgs+k2m3h6JbI+V+62VoJHbbnrmQ244/Wh+tPYLgspmedTX/7Q1oH9vCdizUX4vo/OaeGH+8R0OuLETu6Hm2ABLNn7GomkxfVFnlmMz6FSGLTVaBCGzkEy9hUS2xxynLaHUMaokl/svLafBYZCX24Ca9Weo220FeHs2PDMHPDtQBSMYM+NxtKuwQ2OJndjVEM7Ot1R6+P36Cm6bOoYji918Xt3Az//8EdX1zZ3KsKVGiyBkFhLUW0jkVW/0BcPbQv1Ei9xO7IZiSH62FTyzj4LaryzP+uTfwgs3QG6J9bOrEFW/B+UuxnSXUNMYSMl/Hjux67AZ3Hnh2LAEU13fTLbDoKq2mSseey/8uc5MmGbSKta2kNruQn9BgnoLiWyPhxfl8NDsCdz9agWXn3wEC9Z8mEB3NiB3iFVf3e+1Avppv2gt1dtSez1oy+b8xVtS1q0j67J/daCRO16uiKr1fsfLFdxyzteiPtOZDLs9u2cmIPMGQn8iIzsfdZbIbE4phU2Bw27Q5DeZvvStuGx21dyJrVmfrw6aPFD9b1h3Y5zdUc9eywP/aOKO1z4Pfz7VrDpZ96Lbpo6JytQ729Eo07PY3uz+JAhdQUc6H8lEaQSGoShyO6ltCjB96Vuc+Lu/ct79b+ALmAl1550HvOGa5wF7DtqeDQOPjA7opRNg8m9RZpBrJ+Ry8xlHhj+fKKtOZC9MtOL1rou+QaHb0SV1Yg6lRks62CFl3kDoT4j8EkMiN8gX+xoS6s4erz/sFnl63kmseKOKm0/OQ4UWJpVOiJJiVMEIrp2+gsH5x3L361/G6dZtyQQhjd3rC/Dxnjpuf+nfAGFJprTQ1enVq50lXWSN/jJvIAggmXocibK6ezdsY+nM8rjaMEs2fgZYgT8QNHng79v5yWs16BkrLS39lB/GtcFTT8/igmMcrP/+iXFZdVv2wlA27XLaue3FrWyp9LCl0sO8FZu5cfUHOO22Hg+k6WKHzLTql4LQFpKpx5Aoq6uub2ZogVVh0esP8llVfVRtmFCt9NJCF09t3s2PJ41n0MznrA8nqBGjar8ix16DKvkaGNnht1KRCfrSxGa6yBpS/VLoT0imHkOyrK7A5aQ4L4vSAhdDBmRTXd8c9X6W3QgvCrrm6U9p8gesOuuJ2uA1VKOengX1VRBozWpTaQHXW+3pEpFOLeuktrvQXxD3SwLac4Mker+mwcfPnvuQaeXDKXA50NrkmDw/hcFq1KqZrfbG8+63SgoATHvECvwOF7hLMFFpoVGHSBdNXRDSnY64XySot0Oqdr/YAHdWWQk/P7eMw5wN2INNrTVi3rjH+sCZv26tyV4wAmY8DsXHYhr2pOcLBEyq6pvxB00cNoMcp0FDc+/KCZluhxSEvoAE9QR0Jvi0l4nGHrPQ5eCA149pmuxr8DFvxWaKcx08PfNoHA174OlZVhCfs661a1KIghFwxUvgLgF7vD4eCJj8e28d81duDo/lgcvGs/HfVYwpLeCIQW5ysmwMcou0IAiZhgT1GDorE7S1aKXQ5WDXQS9Vdc3UNPhYs7mSH5x+DEMLsgmYOlwmF+Di8mH8+rujsDUdQAV8YNjg3m/Gn/CG90EZkDc0LrDv8ngTLoB66pqJXPzQ2z0qf0h2Lgg9iyw+iqGz1rtk7g7TNKmoquPSh9/hwiVvcduLW7n85CP4w4ZP+KDyIA3NgajPnVo2hEl/eI9nPwmg7U4rcCeaQFWGVRis9qvwBGpocY8/mHgBVDCiaXbs90q2MOhQFgyFbpDnL36DUxa9Hl581RcXHQlCf6RfBPXOWu+SuTuCGuat2BwVTBessSZJc5w2vtzXmLB07o3PfmwFdsNmNaqObLIxfTm8/BN4ZDIsPw+qP0Y31BDwevj5cx/gC5gJx2Ko6Aw59L0CAZOdBxrZXtPAR7tq+dlzH1orXwPmIQXldPGmC0J/pV8E9c5a7xLZG5fOKieQJGsucjvxeP3cu2EbD1w2ntJCF+OGF1CclxXuprTyvd08W+FD5w6Gc++y9PXL1sD/3gkV66yDeXbAqpkofwPOpgMsmXoY+U54IGYB1OLLxlPr9cd9L5fTlvBJ4u5XK6iqb04YlPfUNqWUuaeLN10Q+iv9YvHRoSzYKcp18sQ1J2JXCsNQ7K1tprkla47Vtwe6nfxm3cdU1zdT3xzg6XkT2d/gZ/Yj74bPu2jaWJa9+QUnHHkMpUUOlOkHTWtAD+HZYTlmXvkZ6tsLKBk4ihJ3FuuuPxFPEwRNjc0Af1CHxxL6XgFTJ3ySWDilLKmMs8vj5cIlb7Wry8uSeyESmV/pe/SLoJ7qisLI/6Aup429tc1RN4JQK7viPCeLLxvPdY//I/ze4svGs2TjZ1TXN7No2lgefeMLfj7l62G3CrQG10fnHM8Nqz7krgvHMDzbiz3oba0XE6JgBAR94doxqsX6mD99BXkDhuMxs/E0aUYOdMd9r90HE/cxLXI7o24CIUoLXWH5pL1G1n1pRavQu8g6hb5JvwjqEF2fPBGx/0EfnXM8C5//KK6V3cIpZcxbsRmAR+ccT5bdINthI2iaTD9+OKeXDeZvFXu55Zxj0RGTmCF2HvCyv8FHdX0z2w800zwgh6OLC7HNWBm/SMkMWk03YmrHqDl/odCspdBhQ3kbKM4pwlTWAqjdB70opZI+SSzZ+BlLZ5YzL8IauWjaWH6/viJqjMkqSHq8PgbmOHhq7kRMrcl2tNooezJrS+VckkV2L8nmV6Skce/Sb4J6e8T+B03Wyq7A5QDgla1VVuAG/EGTbKfBkcVuhhe6GFbgYvYj77JwSlnC4NroC/LAzHJGDMzG7XBQVd+MzzaSw2evRdXvhewC2PArOOm6hLVjCDSj/jwPvr0ABo5CB30ow857n/uxOZyMKnbHBe4/Xmo9Sbz5eQ23fOdr4exeKcWtaz8K17EJjVEpK0iHgqBpar6saWBvbVO4A1MoMxvkzurRrC2Vc0kW2f3I/ErfpF9MlKbFD/KXAAAgAElEQVSCaZosnFLGqrkTWTqrHFPrhJOrnpaJydJCF9trGvn2nRuZ8eDbfLKnHk+jH2WosOSSqEn0A5eNJ9thcN+GTzjYGGDHgUZ2ebz8aPVHfNgwgCr7UHRWLnrSAvA3JrY+GjZLlll3I9w3HvXYd1B1uzlnpOLMYUG0GeQPGz5h4ZQynpl/EsuvPIHH396Ox+vjiatPxB8w8QdNAOw2xS3nHBtXgfLWtR9FuWJqGnxsr2kMB3SIdr70pCsmlXOJS6f7SafaP/0JydSxsrp9DT5ue3FrOKu766Jv8MdLx/FfT7S2oAtp6pE/Q6s0c9vUMYwqcYcDSahJ9MIpZRxdksu2qnp+8fy/wlnx3G+Noq4pgC9oUl3fzK1rP2b+pFEUNDkYkDWC0UOGoGashEhZ5qLlUL83rqQvqy9HTf4tav1POWrGSpZ+rxSfNvi0TrG9ppGbzx7N3rpmfr1ua8LWfM9eezI79jdS0+Dj+S1fMa18OA3NAfbUNjEkPxtfIJj06SWUmXU2a+uoTBKbIYb6xzb6AlTXWbq/ZJHdj8yv9E0kqGNldbFukRtXf8CT10zk2etOxh8wcdgN7Ibi/kvHoYHvP7ElSrLYecBLjtOGLUbP3lLp4bYXt3Lb1DEs2fiZFbRdDhp9QfxBkxynjXtf2sb9l47jQIOfHKeNRl+Q4rwstLsA5RoAc/4CZsDK0N95CEZPTizLuAoht8SScBw5ZNmzKMvzcZTLxGO6yc2y89PvlLGntoni3Kxw96Zrlm9i1dyJXLjkLcYNL+DHk0dHBf2ls8oZVpAd1Yg7RGRm1hlXTGdkkkgHTqLxPjR7AoPzs8Sl081ISeO+icgvJNcGDQUledkMK8yhJC+bApfV6m7b3vpw6d0QpYUuinKzUAoWt3jUQ9uXzCznsIIsbj57NLe9uJUZD77Nwuc/wm4ocrPs/OK7x2I3DBY+/1H4veaAJY9gz7YaW+cUoX1eOO6C5LKMNltlmfsnwGPnomo+I+uVWyhp/JQRdg8l7ON/P97Nz849lnHDC8LfNaitsc6fNCocIEPvzVuxmVpvgKNK3Nx5YbScFMrMitxOll95Ao/OOZ5Vcyfy6JzjWX7lCe1mbZ2RSSLXDyQa7zXLNxEwtTTG6AGkpHHfQzJ12vZexzajvmb5Jopzs7jrom9w4+oPoiyNd7z8Ma9sreKsshKWX3kCB71+PI1+SvKyCARNrloW/TTw0N8/54bTj8HTGGDh8+8ndxHYHZiGneY8G9nBRnAXo6avaC0Q1oZbhrXXWz1SV82Emc+SozU3n5wPdj+rZx7BBztreXBzLdkOg4dmT4grcRAaT1VdMyOKchg9JI9VcycS1JDtMBjocoavj91QPPnudl7ZWhUOou3RGZkkMkNs9CUerz9gShYp9EskqJNcGyx0OaKkgWfmnxSWLG5/6d/cfsFxDBmQTbbDxq9e+BevbK0CLGfM1t11YfvjGwtOBaI153HDC7hp8tc46PUzfKAraWAL3VS8/gC+gGbOo/+kONfB4guOYMicv4AOorSG9T9L7pZxFVp/N9bAI5MtT/zUxdjdxZTnHeCPZw/CyHEwyJ3FntqmpD72kvxsbIpwgATipJNF08ZSXedjS6WnTXtb6HuFjt9RmSSUIVbXJf98ezbWziJWSaEvI/ILybsJHfD6o6SBmgZf+HF+S6WHmX96lzmPvkeTPxgO6CFC9sdQgIl0CoR04Csee48Ll7xF5X5vQheBw26E67R8646N3PHyxyy+bDzFedl83pDNfy7dxrPbTHRWPvqcRTBgRGJZxnsg3HEJsAL889fBwR3QXIedAEbdLgzPlwylhr9eP47JZcXhcdx10TcYOiAb09T4AiZf7mvgy5oG9jXElxxYsOZD5k8aFX7t9QcTFhQLfa/rn9iSVNJJhZ7uPyoFzYS+Tr8ovdtZvjrQyCmLXg+/TjQpt2jaWLTW3PLsP+OyxdumjqEkL4tjh+YDrVntwillYadN6Lg3nz06yv+9dFY5Qwdkc9790aV/zyor4Rff/ToXP/h21PZr//NwfvztwRh1u1GrLouWZd5ZCifOszou7Yy4/nPWgT0L/E1WkM8tsbzvxZb/HjOANhx4bAPZ7zXZc9DL8re+5IpTjiDHaWOg28l/3rEx7rqtmjuRGQ++Hb4GVzz2XtQEaE2DL6qk8bjhBdxw+tGMKsnF5eh45tuTmXNb5ZhlwY3QXXSk9K7IL20Qq7VvqfSw7M0veHreSfgCJl/sa+D3662yAUtmlsc1sGjymwwtyA4HmGQ68JZKD3e8XMGKq05Aa3DYDPKybRz0xuvFoUVPsdsf+Pt2Jh07lNtfOsjvpjzHUYV2bDYDhYJzboeXbokO6AUjrAnXnEHwzJVWQD/tF9YN4D9vRHlrwJGD8jdSWHA4+a6BrHlvL1f9x5H86f8+55ITDmdQbmKHicfrT2j7DMkxsTr6lkoPVzz2Hm8sOLVTgbG7ZJZEiFVS6OtIUG+DRFr7j84czZD8bADcWXbuv3QcTruNgmw7T1x9Yrhpxn1/3caPzhxNgatVBmhLB66ub8ZpM8h2GhxsDDDlvrdZOKWMs8pKwn1PPV4/azZXEjQT12/xeP1sqazl7IdrKS108cy8ExlsqwcUTLoFtfefrRn81MXgyIGA19o2+bfWpOrUP1rBft2N1vbR56LO/BV2fz03n5xH0B7k2CnDOUgOShG3cnXJzHIcNsVTcycmtH2Gsul0tRum89iF/oEE9TZoz4cbmx2WFubgctoZOiCb8SPGJpUBEt0sls4qx24omv2a21/6mJ0HvGzYupfrTzs6qnDYAzPLMbUZ5755YGY59234pGUc1vEG5bpQdjfVdc28t7eJc+a8BKYflA1V9xW8fLMltxSMaJ1MzT8MVpxv/Vw6wZJtVlqvVcEI7DNWkp9TTB4HCJqK175o4vGrTyRoahSwr95HttuBy2kktH1a5QdMls4qD68NOBQdvKcnLWXBjdDXaVdTV0oNB5YDQwATeFBr/Qel1EBgFTAS+BKYrrU+0Nax0k1T704ig1HQ1Px63dawFTBUYGv+pFFR2ju0avWFbgdoKGwJJoayyvA6bQauLINCV1ZUHZSdBxq59OF3Whbs5PPzScUcPsBOUV42ylMJvjorO7/sGfjjCdbJZqyE9T+Nrx557l1gy4Jtr6GPv5LmoCKgHHx00IndZmnthW47ew76ooLfkpnl3Lvhk7Dtc+GUr6PR2JTC5bRR4Oq4lt4b9V3E/SL0NF2tqQeAG7XW/1BK5QGblVKvAnOADVrr25VStwC3AAs6O+j+RqsUEz3xFnKQLJxSFu6YFElo5eri1z/lhtOP4TcJlv0vnVVOoSsr6lw2Q0XMDdQybUUtAO/+5FRKBmVBMICesRLVUG0F7kgrZCSeHZZs8+drYeazqOXnkd0i6Zw4YyVmfilPbNnD+COLKc7N4vGrT8RuKLIdBj959p9hl1B1nY+dBxrjioN1JCD3VpXAntTwBaGjtGtp1Frv1lr/o+XnOuBjYBgwFVjWstsy4HvdNchMJtnEW6iL0lllJSydVR4uNHZWWQker59p5cOZv3Iz08qHJ1wBGrsi02E3EtomMQxwFxPIGUx1ziiChaPQMx6PtkJGEtoe8r2Hgn5LeQJbcy0zx2RR6japrGkIFzzbdbA5an5h/qRRCYuD7WuIlmw6c+1k0lLoz3TIp66UGgmMA94BBmutd4MV+IGSrh5cfyBZpbvDClyUjyjg+6cfEy4tcNuLW7n+tKPZsHUvRW5n2AufSmCzGyrOD37nhWOxt9RB33XQywVL3mXU7/7BBc8epObidejDxqFnrIzupXre/fDGPdG+99IJreUJ7v0m6rHvMKBxB+OLfNx1geXUuXblZuZ+e1R4PMnG7Q+YVNWl1lqvrSqBh9JcWxDSmZSDulIqF1gD/FBrXduBz81VSm1SSm2qrq7uzBgzmmSLZ4bkZ6NRXBvTOem6x//BOccNpSQvi7PKShjodqZU/tTrC3LHyxXh8sILp5Rxx8sVeH1Bahp8VNU1R8kz5ff8kyN+9wFbgyOsAP/9f1ha+l9/BfVVcNEyeP9J6+Cn/DC+auSqmSjTzwVHKb74yTf4+7VlDHZbxc7GDS+gqMUOGWLc8AKemjsRX9DkX1/Vcv0TW9pd2JPs2oVWAssCIaE/ktLiI6WUA3gRWK+1/n8t2yqASVrr3UqpocBGrfXoto4jE6WJSTbxFrv4KcSG//42g/IcVO5v4t4NnyQspRurTSdbNLP2+lPw+oL4AiazWnqphkrZFrmdDHA52FvbxBuf7OW/Ty7AThCUQvmbwO+Fp2fC9x6Ax86N/2JXrodHJltZ/bSH0dmF6JxBfNlg5/aXK/j+aUdz7eP/oDg3K27xVWiyuLq+uU2NPNG1i13cFPquskBISFe6dKJUKaWAPwEfhwJ6C2uBy4HbW/5+vhNjFUg+8ZbME60U1DcFw4udqut8LJxSRpHbyWEFLobkZ8dNNiay4i2/8oRwH9bi3CzuvNDqrRp7k1h25Qk88PftPPD37QDcfMaRzCt3Y2Tnw+y1KG22Tq6GiC1LsOZq1Ll3oWx2RjqdDHTZqG8OcNvUMYwsygnfUCB6snjeis1RNXBib3yJrp1o7UJ/JhX55RRgFnCaUur9lj/fwQrmZyqltgFntrwWUiBVvbfI7WTprPK4Mr51TX52H2yKWuk6b8VmLlzyFjZluUJij52ovk1utj0c5EOrWm8++9i4idcdNY1RUskdr33Otx/8lI1fGTz7qUY7ciz7YyLtPUTINVNfhRFs5nenF7Krpg6n3aC2KXGlxVDtHJfT1iE5RTryCP2ZdjN1rfX/Ack8Zqd37XAyn454qw1DcUxxLo9ffSLVdc0McDnwNPr50dPvJ+x/elZZCfvqfVErPCOPHZvVfnWgMW65fk19c1yAvXfDtriVow/MLCfLrlj4/EesfM/B0mlHUDznLygzAEE/vHZr4rIEgWYwA6g/X8sFM1ZCjgO0yd9vGMcNz33Glkpruqa00Orl+tDsCQRM3SHroiwQEvozUqWxh0nmrd5T25Qw8/Q0Bbjs4Xe4cMlb7G/wYWrNzgOJ+5/+7NyycOCNPHYym2CijDbU3SiS6vpmBrodPHnNRJ6ZfxILp5Rx34ZPaPQFuf+Scdxz8Xj8WQOpdRajc4qsZh0Tr43O3Kc9DK4ia3K1xRKpVs1E7dqCeuxchrOX1ZeUcnH50HDtnLKheYwenIc/YHZITklWdVMWCAn9ASkT0MMk03t3ebwc9Prjgk/k/h6vH6fN8ptH9j8tcjspyHESMHXCYzf5zYRjSZTRHl6UE7eEf9G0sfzP2n/x6/OP4/AiN0MHZHPcsDHYFBiGEbOiMhscOZjZBag568AMopQBnu3w9h9aq0VCVAs+fA3Yswv43ZmDaNTDuO/Nr7jqW0cBoGJaBEL7coosEBL6K5Kp9zDJ9N5QBh+7aChy/yUbP6PQ7Qj7zUP9T/1Bk5tWf4DRcqzYY9uSJKiJMtqRRW4GuZ1R1sffr6/gla1V+ANmuHXZYQUuBg9wJWxhZiob+1QB2wNFbA8MRCsbDBgOE+ZEl/8NteA741dQsR4CzaiADzdNLPiPAgqzDT7eU8utaz+Keyp5aPYEbAYJ5yXEoy70ZyRT72ESZcch+14iSSFy/y2VHha//im/PO/r4ZZyNgXXt1RDrG3ys/iy8VEFwO68cCwuZ8cyWsMwEtacSWWiMdGcwWNzyhnpDmDLHYyqb2kmEtmC79NXYcwF8MRF4SqSavoKbDYHL26p4pWtVXEOnyZ/MFxrPnLuAOK7MfVEPRhB6CtIk4xewDQ1e2qb2OXxUtPgY8nGz9hS6UnqpW6rgFTIf16cm8Ut53yNP/3f50wrH06R28lAtxOHTTGsIKfHCmVV1TVxweI3424IK646gaWvf8IvTh2EywhaM++hFnw5g1oDeoiCEXDZGnTAiz9rIJ/vb+Yn63ezpbKWv918Kpc+9HbcOZ677hSAfuFRl6Ji/QtpktHHMQzFkPxsDnr9/HDV++06NNrSh0OZ/J6DTeFSvKGiWaWFLp697uSUf9kjA8Xg/Cyeve5k/AEz5aBhmprG5sRzBgrF/33uoWzzbsYNz2fxtKMZcs7tVnAP+BIXDlMKtWomzktX8zWnh2cvKeXlHTbsijYnTjPdo95b1SmF9EA09V6iqxwaoeOMKnEnraWSCrG9N8+7/w1q6n0MTaKbJ6KmwccX+xoS6vo7DzSGdfEtlbVctGwrX/oLCdhcaLszceEwM2gF9yYPvPIzVM02zh7uZ6ixnwcuPi7uHLG9YGPfyxSSOahi52OE/okE9V4klIEPK8xJOXAmO47LYT+kYNYVgcIXCPLSP3ez+LLxUZOaD8wsZ3B+FoaC2y84jmfmn8TT807i8KJc7HnFqNwhMH1FtAXyouXw1n3Wz0FfuGCYum886tFzOHvwAV6Yf3z4HKGnnNh6MGeVlfD41SfS5A9QVZfYNppuyIpZoS1EfuljdFYrPdQFN10RKJx2G+ccN5T7/7otXA/e4/Vz34ZPmFY+nNte3MqdF45loNsZXcrA7oSSr8Ocv1idmYIBePMP8MXfWidTX7ghqmCYWjWTMbPX8sVPvgFao3NdUb1gQ9JRTYOPy1qag2SKTNFXW+qJzt83kInSPsShaqXt/VKlMuEaW9ArWS2ZZOffsb+RSb/fGPfeqrkTmfHg22GdvyQvO9lBrJox/gYw7FbD7JOuS14wrKHa6s40YyUUfw3sWeHv89FXB1n4/EcpTZqmU0Dqi5p6XxxTJiETpWnKoXbyaWtCtb1fulCmf/erFSlVfUx2/hxn4izS4/WHv1ObOr9hQN5gK7j76tCTFqDq9yYvGBbqzrRqJsxea73OyscXCJLjtKX09JFuAam93rm9QW91oRLiEU29D9GdWml7mnkoUNx63pi4gl6pauumqalvDsQ147jrom+wZONn4dcpyQSGAdkDUMVl6MFjEzfrCJUcACuwm0ForgVvDXlZRsKSB4nOn44Tj101H9NViM7fd5BMvQ/RnVppKr90hqHQOnGpgVR+OWsafMx+5F2Kc7PCmnqjL4ihVNiHn6rOH9WYW+dR3agZP3utlbU3VMM7S6NLDhSMgOqPLSlm+gry8oYwaYiP9XO/zn+/sJ31W6uTnj8TA1JPy0l9Vefvj0hQ70N0Z3XBtn7pIgNAZ+qshAgFx1Cf1BB/u2kSf7v5VLIdBoPc7WeVsXLIo3OO58l3d7LwO8dQmmdDuUtg8m+sxUs7N7Vm7n/9lZWxPz0LNXst1GzDXXgES747GP/5oziocyjMiT9/RwNSX9ffe0NOksqYfQeZKO1jdDRgpLp/sl/0o4tz2VZdH95+VlkJN5x+TLgBR0cCQrLuSh3VVWOPM254AXdN/0bLU4CDn08q5tjSYlzBWpQZtDL0N+5prSlTOgHO/X+Wzt5SdoAZK9HuEvbpfIrycuImkGOvzdJZ5QxyO+MKlqWD/t5V/w4dpa/f7NKZjkyUSlBPYzoaYFJt/XZWWQm3njcGrXWHfjkPJeBFjg1a69mEeO66kzl/8ZtRn7n5jCO5doIb9di50ZOol622mmDHTqyeexfaXUJdwdfIz4l230TJPabm1+u28srWqrjv0FsBsyMka4P4xoJTGVaY0wsjEg6VjgR1mShNYzo6wZdoci2RnvzK1iq01h2ehOvsKtnY1awzHnybm88ezbjhBeF9mgNm3KTnE5t3U+sYhI5duDTwyMRlBxw5qKdnkeerBk8l1O+1XDYR18Zpt3Hpw++ESy3EXtN00N97clWtVMTse0hQT2O6asFQVwaAzrgyEt2cbnrmQ244/ejweA4f6Ipz1dx54VjmPLaFO7bYCV7+F/QN76PnrAOvJ3HZgVBzjkAz3DMGHj4DqraGAzu0f03ToQxB7Kra7tK3Y2/GbbUZ7K3g3x9vOjJRmsZ0heOgL0xwJQuko0pyeWPBqTjtNnyBIHe8bDUFGVXspnK/lztermBLpYctlR5e+MjFwillPLt5B/ecdzjZM1aiIjX10ERqwQgwA9ZJckugbhfa6QbDBo4ccpy5bV7TvnC92qOnfOypetN7ax4iHeY/ugPR1NOYrvpP29sTXKno1JH7hFanxvLXG7+NqeGm1R8wusTFb84YhM30w/7P4H8XQX0VTF8O7z0M1f+26smsvd4K/KPPhcm/RqPwG9l8f21llA0y8poeyvXq7WvdlaSq3ffWPEQ6zH+kiqwo7Sd0VUbWE63f2gpmqWS/kft4vP6E2bTNUNz41Pvh7L2iqoG7zzuCw4u/hrrgIavswDsPwZaVVlmBUEAvnWB53pdPRXl24CwYwZIZK+F742l0FOLKar2xHMp17suZY2duNqk+KfbWPESy83p9Aarr6LYbqhkMEqyvRgWb0bYsbLnFGLaek+ckqKc56dCLs71g1tbNKa7G+7UnE9SapTPLw022Swtd/PHS8XgafVTXtzbZ3lJZy8wnPuGF60+msO5T8NXBMWfBx39uLS8AcMoPWwM8hAuGce5d5LhL0CVlVFQ3HXIwrmnwcferFVHFzu5+tYLfnD+2V2+qnb3ZpCpF9dbCpGTn/XhPHbe9uLVbbqhmMIi5dyuOpy8NS3+B6U/A4LIeC+wivwjdTmcfgxMFmyUzy7l3wydU1/m44fSjOWKQG6fd4Na1H1Fd5+PHk0cnrluDBu9+aNwPni8th8yK861fvDnrEhcMm7MO/nwtes46aup9bD8Y4Ncbq9lSWUtpoYtVcycyOC8buz01v8Heg14+rW6IGt+iaWM5qtjN4AGubpNm2gva+xua+aDyIDlOGx6vnyUbP6O6vjklmSKVMfclTT3UOrKtTmOHgv/gHhyPnhlnp/Vf8SqOAUM6fVyRX4Q+RSqP38k89LETcfNXbmbhlDLmrdjMFY+9R2mhi7XXn8KPzhzNNcs38fv1Fdw2dQxHDHKTk2WLWMGqwD0IM7uQoDMfA40x43HUqsssV0yigmEht0ztLga98jMGfXsBa2aMQttHojUoathdW8CQfHdKgT2oiaurs2DNh6yaO5G9B73sa/Axb0XHF321R1sTmkVuJ7s9TeFqlpGBLxV5JJUnxd4qQBZ5Xq8vwMd76sIBHbpHAlLB5oR2WhXsuTpCEtSFbqe9x++2VnQmuhkUuBxRr72+YEpBwzQ1FVUNXLPcaqo9/z9HcuOcV7ATQF20DFZfntgtE2rSsfZ6jND7Fy0Dw86wrADB5gAYA6wiZAkI3bD8QTPh99lX72NPbVNUs++urHLY1k21psEXlrFC2xes+ZDbpo7pUnmkt2TC0Hmr6+h0M/WOoG1ZCRMEbes5d5T41IVupz3fdKJMct6KzfhNndATHirjG3rttNtS8sfHnmfJ37/k1KUf43EUW794V7yM/v4/4Ny7rIBeX9XapCNGc7duANtRy8/DXrsD6r6Cuj3QsC9qYVOkl/vfe+oSfp89tU0UuBzdNpnYlrc+WcA/YpC7T9k0D5Uit5MVVx7PC1ccw9/nHcULVxzDiiuP7/LvaMsttjT0iMVwgelPYMst7tLztIUEdaHbaW+labLAUlPvY+ms8qibwZKZ5azZXBl+3RXdnRp9JrgHofMOw+Mcgi7+GlzwkKWpv7MUbI7EK1Qja7nveh/+dCZUfQzPzAkvbKpvasbZVM2GK0dy6pBm1s/9OpPLisPjXzRtLEs2fhZ29ERSWuhCKXXIC2fauqkmC/g5WbZed+R0JQaakcHtHPfSBQxfdgLHvXQBI4PbrbmWrjyPzYYxuAz/Fa8SuOGf+K94FaMHJ0lBJkqFPkCyidSFU8r4ZukADMMIyyqFLgcHvP5OabO7PF6mL30r7jxPzzuJwwqswBYImBxobCY/6MFmgK2xGlW/J3Etmcm/tQI6tE62Rm4ffS560i2Wbh8h2+icItAmGA4Cyob2N6HsWdiUhqAPpWw0KweGYUP56jGxUVkPNvdADi/Ktb6vaUJjNQR84HBZTxNBn9UaMKclKwy9b3OibQ7w1YNpErBlh212pqnZXlOPWV9Faa5CGTZMuwtnblGPBqJup36vdaON/Te8+jXIHdx740oRmShNAzJpEcqhUuR2snRWedQk4aJpY1n25heMH1EYp8V2Vpu1KVg0bWyc+8TWctlNU0dVrCwtdLHiyuMZedhhqBmPQ2RwDmnu0DqpCq0ZPMA3L2kN6GCtYPU1oCK0e8fUxfDhU/DNmfDc3PD27KmLwemGl26C+iqOmrqYOl8jB71HUOhyWuUNnrrEOubpt8Lz17WO7eInwZ4NK88PL6xS374Jnp4Nnh04QvuUlGEAI4PbUc9f0vr5qYvBPNxqDRh5o4iYM0ib/7+hm5+v0brZRlbz9OywbnoZhmTqvUBfXoTSWwQCJrsOeqmqa6amwceazZX86MzR4WvSFUGkuq6Znz33IdPKh4d94ms2V4Z94m1aL92Olsy32cqK1/8MKtZFB/hQbfdQpn7lenhkcusAZqy0mnjEZouXroYnLkpYVZJAs3WsltfNxceRZTdas85kxzz3Lnj8orbPe/Vr1s+JMtiZz7XeFCJuAgC6oZqAvwlfEOp9JlX1AXIHDml9iugrmGbrzS/2Zhz6t5JMXegKpJ9jPHa7QWlhDi6nnaEDshk/YmzUAqSuuAkWuZ1h62OixTKJNPfi3Cx8gSBfHQzitBdQlO/E0EH4zh1Wow6IbtYxdTFsuNX6OXdwtBMicsFTCM8Oq+5MkqqSOHKiXtu1HwKq/WM6IkrsJtsnlKUmeq9xX/TE8FOXwDWvQ90e1FOX4GjJ+N3TVzC4eACafVB7wDqva2BSJ1CPEMrO/V6o22U9zXh2WH/WXm/ddNf/1LpR5fTcBGZPIUG9F0iH8q29QTLbW1fdBNvzS8daL8cNL+Dms0cz48G3428mA0qtg5omfPduOGcR2JxWgL7wMUuycBVZgSOUKfobE/vhzWDi7R6D/vsAAAxZSURBVP5GK1OPeG3Ys0DRun8yj72/sfV1sn3sztafEzX1jsSzwwqSoe8CVrBsrrM6TYUy4enLIdAEWltaf08H+Payc88OGDzGytBjJKVMIfO+URqQDuVbO0t3lDrtyptgW9bHWJfIDacfzU3PJG7CHf6eB5uo1gWY+aWQNxjcg6BguJWl2+yWZHHVa/CDD2DwcegZj0fXfp+6GN66D85/MH57ziBLA255rQuPRLmLrWB08ZPW9jfusfaN/OzFT0Lhka3b3n/SCrax++TEHCv03kXLrM9EUjACdDA6+J/yw1YtH1paCc6GXVvgse9YTqD9n0MwYE1UxtSw7xYaq6NvPKHs/JQftn4PZ47175OBAR0kU+8V0qF8a2forrmCnqodEpvJB9towp3y9zQMK9i3oHOK0XP+gjIDaMOOHzvGf/wYmz2bwOUvYdd+lLKhbU6UzYaa9idQNnC4UJFZb0mZlW2G3C9XvRbvfgm9b3Natsw5f7ECs90F7uLoY131GgS81rmcbjj1p7D3nzGTr67U5KTQ9uevs7R9wwbvPQLjLrOePALNkDsU7A66nIAv+Zgib2YZjAT1XqC3lk13N901V9CTN8FICai6rjnhzUQplfB7rr3+FIImbf6bGnY7Zn4plZ5GvnX7xvD2ccMLmD9pFMcOKcTlsFPkdqIMZQXfxANtf4Iv9v2cgcmPlRe9r5k1gOAVr6KCPrTNaVkglUpNTop0AjlyIOiHMRe0TgYXjIDpKywZxBYTgiKtmglcN+1idyYeU8HhGS25RJLZ364P05kOQX2d7porMAzF0cW5PD3vJP73pkk8Pe8kji7ufqdFskU7NkXCCdXdnqaUugAZhsLlsEdJcFsqPdz24lZcTnuv/38IlVM4dclWjrrjn5y6ZCsVVQ2YqNYnhB9+BEO+YTlrIqWb8+63JKHQa3+jFWhXz46RaWZB/Z7YE1t6+MNnJO1M1S6J5KSLn4T8YRktuUTSbqaulHoEmAJUaa3HtGwbCKwCRgJfAtO11ge6b5hCOtARmaQjFsVE/vGesIAme6KqafCFv2cowx5VnEvl/kaKc7PYecDb7lNKX5bg2n3iinwCyCmCK14G029NjsY6gXIHW52mEkkiQX/0tkR6+FOXdMx2aBjR0lRnsv00JxX55THgfmB5xLZbgA1a69uVUre0vF7Q9cMT0olUA1VHtffetIAmcuSEvufdr1Zw+clHxC1mClUCbOsppS9LcB164rLZYcAw62fThCl3w9m/Dc8DkF1g2QoTSSK2GE09mR7e0QVCqUhTGUy7ty+t9d+A/TGbpwLLWn5eBnyvi8clpCHt1XgJkSxI1zQk/uXtaxbQ0Pe89bwxCUvpzp80Cmh/MrevSnCddmeFtPnCkZYDyD3ICvq5Qy0NPVISmb4CcmPqi4f08EgirZdCSnT2mWSw1no3QMvfJcl2VErNVUptUkptqq6uTrabkCGkEqg6GqT7ogXUMBQ6iTumwOXolJzSHXbQztBeVc0OY3dYk6JXvAQ3vG/9nWiSNJkenuFula6m290vWusHgQfBKhPQ3ecT+j4dtSj2Vf052fcIlRboaEPqvlI6olukIZsdQgu2kp+43+vhXUFKtV+UUiOBFyMmSiuASVrr3UqpocBGrfXo9o4jtV8E6FwA64sFpLoyEHe25Z/QP+iJ2i9rgcuB21v+fr6TxxH6IZ3JBPtig+2uzGj72rxBV9IXb8iZTCqWxieBScAgpdRO4H+wgvnTSqmrgB3ARd05SCHz6EtB+lCCTld9j55aNdvT9CVZqb8gpXeFfk0gYFJRVdctDZ8h9RtGW31aDcNI2+y2N2SlTHwykNK7gpACpqnZddAbDujQtR74jmSpcXVnTM2v123lla1VfT67bSuI9rSsJE8GUiZA6MfUNPioqmvutqDTUT9+SMpx2m1c+vA7vLK1KqXP9SaRjbUTlUfoaTtqR695JiJBXei3+ALB8JL/SLoq6HQ2S+3o53rT395eEO1yz3s7ZPKEc6qI/CL0W5x2G2s2V8b1LV06q7xLgk5nJz87WkOnN+WG9oJoRx1Ch6qHZ+qEc0eQTF3ot4Ta2y178wsWTinjmfknsXreRKvi4kHvIWe9nc1SO/K5npYbYp8KXM725RXDUBS5nTjttvDTUaLr2p6Ukwo9/WTQFxH3i9CvicwMXU4be2ubuzTr7WzmmernvjrQyCmLXo/b/saCUxlWmBO3/VBI9lSQZTeY/ci7Sa9Zqk8TXeWUEfeLIPRjYptidHU1yM762FP9XE/KDcmeCp697uQ25ZVUq2x2lR7el9ZA9AYivwhCC+k4ydaTckOy6+MPmG0WcUv1uvbFwm3piGTqgtBCOk6y9WRd9lSuTyLpI9Xr2lcLt6UboqkLQgu97STp67R3fZK9f3RxbsqdqzJRD+8KOqKpS1AXhAgkqLRNW9enrYnOUBtAua6dQyZKBaGTZOIkW1feqNq6Pm1p55l4XfsqEtQFIYPpSUkpHeckMhFxvwhCBtOTi5Nk4U/fQDJ1QchgetKm2ZNOHCE5EtQFIYPpaUlEtPPeR+QXQchgRBLpf0imLggZjEgi/Q8J6oKQ4Ygk0r8Q+UUQBCGDkKAuCIKQQUhQFwRByCAkqAuCIGQQEtQFQRAyiB6t0qiUqga2R2waBOzrsQF0Dhlj1yBj7DrSYZwyxq4hNMbDtdbFqXygR4N63MmV2pRqOcneQsbYNcgYu450GKeMsWvozBhFfhEEQcggJKgLgiBkEL0d1B/s5fOngoyxa5Axdh3pME4ZY9fQ4TH2qqYuCIIgdC29nakLgiAIXUivBHWl1NlKqQql1KdKqVt6YwypoJT6Uin1T6XU+0qpPtExWyn1iFKqSin1UcS2gUqpV5VS21r+LuyDY7xVKfVVy7V8Xyn1nV4e43Cl1OtKqY+VUv9SSv2gZXufuZZtjLHPXEulVLZS6l2l1ActY/xly/YjlFLvtFzHVUqpXqv128YYH1NKfRFxHb/ZW2OMGKtNKbVFKfViy+uOX0etdY/+AWzAZ8CRgBP4ACjr6XGkONYvgUG9PY6YMX0LGA98FLHtDuCWlp9vARb1wTHeCvy4t69fxHiGAuNbfs4DPgHK+tK1bGOMfeZaAgrIbfnZAbwDTASeBi5u2b4EuLYPjvEx4MLevoYxY/1v4AngxZbXHb6OvZGpnwB8qrX+XGvtA54CpvbCONISrfXf+P/tnT1oFEEYhp+vUBEVQkQlJEKICAoiUbCKiAQRjeIPWAgWKQQbLayUINhZ+tNZqEhQsfAPUypGayUaYySCCoIhISkkqI2oeS1mDo/z7kJSZMble2DZmbk97uVl992bb/du4UvF8H6gN7Z7gQPzKqqCGhqzQtK4pJex/Q0YAZrJyMs6GrNBge+xuyAuAjqBu3E8tY+1NGaFmbUAe4CrsW/MwccUod4MfC7rj5LZjlqGgEdmNmBmx1KLqcMqSeMQggBYmVhPLU6Y2VAszyQtEZVjZq3AJsI3uCy9rNAIGXkZSwaDwCTwmDATn5L0K26S/Biv1Cip5OO56ONFM0v9p/OXgFPAdOwvZw4+pgj1ao9cye6sGemQtBnYDRw3s22pBf3HXAbWAO3AOHA+rZyAmS0F7gEnJX1NracaVTRm5aWk35LagRbCTHx9tc3mV1XFh1doNLMNQA+wDtgCNAKnU+kzs73ApKSB8uEqm87oY4pQHwVWl/VbgLEEOmZE0lhcTwIPCDtsjkyYWRNAXE8m1vMPkibigTUNXCEDL81sASEsb0m6H4ez8rKaxhy9BJA0BTwj1KsbzKz0ZLVsjvEyjbtieUuSfgDXSetjB7DPzD4RStKdhG/us/YxRai/ANbGq7oLgcNAXwIddTGzJWa2rNQGdgLD9d+VjD6gO7a7gYcJtVSlFJSRgyT2MtYrrwEjki6UvZSNl7U05uSlma0ws4bYXgzsINT+nwKH4mapfaym8V3ZydsItepkPkrqkdQiqZWQif2SjjAXHxNd4e0iXMn/CJxJdaV5Bo1thDtzXgNvc9EJ3CZMuX8SZj1HCbW3J8D7uG7MUOMN4A0wRAjOpsQatxKmskPAYFy6cvKyjsZsvAQ2Aq+ilmHgbBxvA54DH4A7wKIMNfZHH4eBm8Q7ZFIvwHb+3v0yax/9F6WO4zgFwn9R6jiOUyA81B3HcQqEh7rjOE6B8FB3HMcpEB7qjuM4BcJD3XEcp0B4qDuO4xQID3XHcZwC8QeUfB0d/qQ4KAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sb.scatterplot(X12_train.flatten(), y_train)\n", "sb.scatterplot(X12_train.flatten(), m10_train_predicted)" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "sb.scatterplot(X12_test.flatten(), y_test)\n", "sb.scatterplot(X12_test.flatten(), m10_test_predicted)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Let us summarize the models' errors" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "M1 train 39.0\n", "M1 test 38.0\n", "M2 train 29.0\n", "M2 test 33.0\n", "M10 train 26.0\n", "M10 test 29.0\n" ] } ], "source": [ "print(\"M1 train\", round(mean_squared_error(y_train, m1_train_predicted)))\n", "print(\"M1 test\", round(mean_squared_error(y_test, m1_test_predicted)))\n", " \n", "print(\"M2 train\", round(mean_squared_error(y_train, m2_train_predicted)))\n", "print(\"M2 test\", round(mean_squared_error(y_test, m2_test_predicted)))\n", "\n", "print(\"M10 train\", round(mean_squared_error(y_train, m10_train_predicted)))\n", "print(\"M10 test\", round(mean_squared_error(y_test, m10_test_predicted)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will talk about model selection and feature selection in mode detail in one of the next classes." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Is there only one way to split the dataset? Cross-validation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Cross-validation is reusing the dataset and creates multiple train-holdout subset pairs.\n", "\n", "The major assumption is that our whole dataset is a representative sample. By taking the random subsamples from the whole dataset we can estimate the performance of the model on previously unseen data." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### LeaveOneOut\n", "\n", "LeaveOneOut (or LOO) is a simple cross-validation. Each learning set is created by taking all the samples except one, the test set being the sample left out. Thus, for n samples, we have n different training sets and n different tests set. This cross-validation procedure does not waste much data as only one sample is removed from the training set" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "##### KFold\n", "KFold divides all the samples in k groups of samples, called folds (if k = n, this is equivalent to the Leave One Out strategy), of equal sizes (if possible). The prediction function is learned using \n", "k−1folds, and the fold left out is used for test.\n", "\n", "![](https://scikit-learn.org/stable/_images/sphx_glr_plot_cv_indices_0041.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### ShuffleSplit\n", "\n", "The ShuffleSplit iterator will generate a user defined number of independent train / test dataset splits. Samples are first shuffled and then split into a pair of train and test sets.\n", "\n", "![](https://scikit-learn.org/stable/_images/sphx_glr_plot_cv_indices_0061.png)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### TimeSeriesSplit\n", "\n", "TimeSeriesSplit is a variation of k-fold which returns first \n", "k folds as train set and the (k+1)th fold as test set. Note that unlike standard cross-validation methods, successive training sets are supersets of those that come before them. Also, it adds all surplus data to the first training partition, which is always used to train the model.\n", "\n", "![](https://scikit-learn.org/stable/_images/sphx_glr_plot_cv_indices_0101.png)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.716098217736928" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "KNeighborsRegressor().fit(X, y).score(X, y)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7079649368669324" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXQAAAD8CAYAAABn919SAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAADwFJREFUeJzt3W+MHPV9x/HPJyFRI44apw4r11A2lQiKhVuQV1GkPMheUVqKJQhNU2G1CBSSi6pC8sCt5KYPioqQrCoJT5ontCBQpHCiUVqIQaXUZYNSkajnYDCORUjpNbWxIBCwOJT+cfTtgxviq7nzzO3s7N5+5/2STrczO/ub7/fW+/Hc7OzvHBECAEy/d0y6AADAaBDoAJAEgQ4ASRDoAJAEgQ4ASRDoAJAEgQ4ASRDoAJAEgQ4ASZwzzp1t2bIlut1u7XHefPNNnXvuufULmjJt7Vtqb+9t7Vtqb++r9X3w4MFXIuJ9ZY8da6B3u10tLCzUHmcwGKjf79cvaMq0tW+pvb23tW+pvb2v1rft/6jyWE65AEASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASY/2kKICNo7v34Ynte3HfrontOzOO0AEgCQIdAJIg0AEgCQIdAJIg0AEgCQIdAJIg0AEgCQIdAJIg0AEgCQIdAJIg0AEgCQIdAJIg0AEgCQIdAJIg0AEgCQIdAJIg0AEgidJAt32R7cdtH7V9xPbni/W32T5u+1DxdXXz5QIA1lLlT9CdkrQnIr5n+zxJB20/Vtx3Z0R8sbnyAABVlQZ6RJyQdKK4/Ybto5K2NV0YAGB91nUO3XZX0hWSvlususX2M7bvsb15xLUBANbBEVFtQ3tG0rck3RER37DdkfSKpJB0u6StEfGpVR43J2lOkjqdzs75+fnaRS8tLWlmZqb2ONOmrX1L7e29yb4PHz/ZyLhV7Ni2qXQbnvPTZmdnD0ZEr+yxlQLd9rsk7Zf0aER8eZX7u5L2R8RlZxun1+vFwsJC6f7KDAYD9fv92uNMm7b2LbW39yb77u59uJFxq1jct6t0G57z02xXCvQqV7lY0t2Sjq4Mc9tbV2x2naRnqxYMABi9Kle5fETSDZIO2z5UrPuCpN22L9fyKZdFSZ9tpEIAQCVVrnL5tiSvctcjoy8HADAsPikKAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQBIEOAEkQ6ACQRGmg277I9uO2j9o+Yvvzxfr32n7M9vPF983NlwsAWEuVI/RTkvZExAclfVjSH9neLmmvpAMRcYmkA8UyAGBCSgM9Ik5ExPeK229IOippm6RrJd1XbHafpI83VSQAoNy6zqHb7kq6QtJ3JXUi4oS0HPqSLhh1cQCA6hwR1Ta0ZyR9S9IdEfEN269HxPkr7n8tIt52Ht32nKQ5Sep0Ojvn5+drF720tKSZmZna40ybtvYttbf3Jvs+fPxkI+NWsWPbptJteM5Pm52dPRgRvbLHVgp02++StF/SoxHx5WLdc5L6EXHC9lZJg4i49Gzj9Hq9WFhYKN1fmcFgoH6/X3ucadPWvqX29t5k3929DzcybhWL+3aVbsNzfprtSoFe5SoXS7pb0tG3wrzwkKQbi9s3SnqwasEAgNE7p8I2H5F0g6TDtg8V674gaZ+kB2zfLOlHkj7ZTIkAgCpKAz0ivi3Ja9x95WjLAQAMi0+KAkASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJEGgA0ASBDoAJFEa6Lbvsf2y7WdXrLvN9nHbh4qvq5stEwBQpsoR+r2Srlpl/Z0RcXnx9choywIArFdpoEfEE5J+MoZaAAA11DmHfovtZ4pTMptHVhEAYCiOiPKN7K6k/RFxWbHckfSKpJB0u6StEfGpNR47J2lOkjqdzs75+fnaRS8tLWlmZqb2ONOmrX1L7e29yb4PHz/ZyLhV7Ni2qXQbnvPTZmdnD0ZEr+yxQwV61fvO1Ov1YmFhoXR/ZQaDgfr9fu1xpk1b+5ba23uTfXf3PtzIuFUs7ttVug3P+Wm2KwX6UKdcbG9dsXidpGfX2hYAMB7nlG1g+35JfUlbbB+T9OeS+rYv1/Ipl0VJn22wRgBABaWBHhG7V1l9dwO1AABq4JOiAJAEgQ4ASZSecgHQrLNdbbJnxyndNMGrUTBdOEIHgCQIdABIgkAHgCQIdABIgkAHgCQIdABIgkAHgCS4Dh3QZGcebKMqP+8mrsGvMsvjNOMIHQCSINABIAkCHQCSINABIAkCHQCSINABIAkuW8SGstblbEwjC5TjCB0AkiDQASAJAh0AkiDQASAJAh0AkiDQASAJAh0AkiDQASAJAh0AkiDQASCJ0kC3fY/tl20/u2Lde20/Zvv54vvmZssEAJSpcoR+r6Srzli3V9KBiLhE0oFiGQAwQaWBHhFPSPrJGauvlXRfcfs+SR8fcV0AgHUa9hx6JyJOSFLx/YLRlQQAGIYjonwjuytpf0RcViy/HhHnr7j/tYhY9Ty67TlJc5LU6XR2zs/P1y56aWlJMzMztceZNm3o+/Dxk6uu77xHeumnYy5mA2hr31Izve/Ytmm0AzZgtdf57OzswYjolT122PnQX7K9NSJO2N4q6eW1NoyIuyTdJUm9Xi/6/f6QuzxtMBhoFONMmzb0vdac53t2nNKXDrdv+v629i010/vi7/dHOl4T6rzOhz3l8pCkG4vbN0p6cMhxAAAjUuWyxfslPSnpUtvHbN8saZ+kj9l+XtLHimUAwASV/j4TEbvXuOvKEdcCAKiBT4oCQBIEOgAkQaADQBIEOgAkQaADQBIEOgAkQaADQBIEOgAkQaADQBIEOgAk0c5p3AC0UneN2TzHYXHfrsb3wRE6ACRBoANAEgQ6ACRBoANAEgQ6ACRBoANAEgQ6ACRBoANAEgQ6ACRBoANAEgQ6ACRBoANAEgQ6ACRBoANAEgQ6ACRBoANAEgQ6ACRR6y8W2V6U9Iakn0k6FRG9URQFAFi/UfwJutmIeGUE4wAAauCUCwAkUTfQQ9I/2j5oe24UBQEAhuOIGP7B9i9HxIu2L5D0mKRbI+KJM7aZkzQnSZ1OZ+f8/HydeiVJS0tLmpmZqT3OtBlX34ePn2x8H+vVeY/00k8nXcX4tbVvKV/vO7ZtqrTdaq/z2dnZg1Xeo6wV6P9vIPs2SUsR8cW1tun1erGwsFB7X4PBQP1+v/Y402ZcfXf3Ptz4PtZrz45T+tLhUbzlM13a2reUr/fFfbsqbbfa69x2pUAf+pSL7XNtn/fWbUm/KenZYccDANRT57+/jqS/s/3WOF+LiH8YSVUAgHUbOtAj4gVJvz7CWgAANXDZIgAkQaADQBJ53kJO6MyrTfbsOKWbNuAVKAA2Bo7QASAJAh0AkiDQASAJAh0AkiDQASAJAh0AkiDQASAJAh0AkiDQASAJAh0AkiDQASAJAh0AkiDQASAJAh0Akpia6XNXTiU77mlkq/5xVwCYJI7QASAJAh0AkiDQASAJAh0AkiDQASAJAh0AkpiayxYnqTvGSyQBYFgcoQNAEgQ6ACRBoANAErUC3fZVtp+z/UPbe0dVFABg/YYOdNvvlPQVSb8tabuk3ba3j6owAMD61DlC/5CkH0bECxHxP5LmJV07mrIAAOtVJ9C3SfrPFcvHinUAgAlwRAz3QPuTkn4rIj5dLN8g6UMRcesZ281JmisWL5X03PDl/twWSa+MYJxp09a+pfb23ta+pfb2vlrfF0fE+8oeWOeDRcckXbRi+UJJL565UUTcJemuGvt5G9sLEdEb5ZjToK19S+3tva19S+3tvU7fdU65/KukS2y/3/a7JV0v6aEa4wEAahj6CD0iTtm+RdKjkt4p6Z6IODKyygAA61JrLpeIeETSIyOqZT1GegpnirS1b6m9vbe1b6m9vQ/d99BvigIANhY++g8ASWzoQK86tYDt37UdtlO8I17Wt+2bbP/Y9qHi69OTqLMJVZ5z279n+/u2j9j+2rhrbEKF5/zOFc/3D2y/Pok6m1Ch91+x/bjtp2w/Y/vqSdQ5ahX6vtj2gaLnge0LSweNiA35peU3Wv9N0q9KerekpyVtX2W78yQ9Iek7knqTrnscfUu6SdJfTbrWCfV+iaSnJG0uli+YdN3j6PuM7W/V8kUIE699TM/5XZL+sLi9XdLipOseU99/K+nG4vZvSPpq2bgb+Qi96tQCt0v6S0n/Nc7iGtTmKRWq9P4ZSV+JiNckKSJeHnONTVjvc75b0v1jqax5VXoPSb9Y3N6kVT7vMoWq9L1d0oHi9uOr3P82GznQS6cWsH2FpIsiYv84C2tY1SkVPlH8KvZ12xetcv80qtL7ByR9wPa/2P6O7avGVl1zKk+jYftiSe+X9M9jqGscqvR+m6Q/sH1My1fV3arpV6XvpyV9orh9naTzbP/S2QbdyIHuVdb9/JIc2++QdKekPWOraDzO2nfhm5K6EfFrkv5J0n2NVzUeVXo/R8unXfpaPlL9G9vnN1xX06r0/ZbrJX09In7WYD3jVKX33ZLujYgLJV0t6avF63+aVen7jyV91PZTkj4q6bikU2cbdCP/UMqmFjhP0mWSBrYXJX1Y0kMJ3hgtnVIhIl6NiP8uFv9a0s4x1da0KtNJHJP0YET8b0T8u5bnBrpkTPU1pdI0GoXrled0i1St95slPSBJEfGkpF/Q8nwn06zK6/zFiPidiLhC0p8V606ebdCNHOhnnVogIk5GxJaI6EZEV8tvil4TEQuTKXdkSqdUsL11xeI1ko6Osb4mVZlO4u8lzUqS7S1aPgXzwlirHL1K02jYvlTSZklPjrm+JlXp/UeSrpQk2x/UcqD/eKxVjl6V1/mWFb+J/Kmke8oG3bCBHhGnJL01tcBRSQ9ExBHbf2H7mslW15yKfX+uuGTvaUmf0/JVL1OvYu+PSnrV9ve1/EbRn0TEq5OpeDTW8W99t6T5KC57yKBi73skfab4936/pJum/WdQse++pOds/0BSR9IdZePySVEASGLDHqEDANaHQAeAJAh0AEiCQAeAJAh0AEiCQAeAJAh0AEiCQAeAJP4Pl98x+WeoV8oAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn.model_selection import ShuffleSplit\n", "from sklearn.model_selection import cross_val_score\n", "\n", "reg = LinearRegression()\n", "cv = ShuffleSplit(n_splits=100, test_size=0.1, random_state=0)\n", "\n", "# here we try to maximize the score, that is why neg_mean_squared_error\n", "# essentially, score = - cost_function\n", "s = cross_val_score(reg, X, y, cv=cv)\n", "pd.Series(s).hist()\n", "s.mean() # R^2" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "['accuracy',\n", " 'adjusted_mutual_info_score',\n", " 'adjusted_rand_score',\n", " 'average_precision',\n", " 'balanced_accuracy',\n", " 'brier_score_loss',\n", " 'completeness_score',\n", " 'explained_variance',\n", " 'f1',\n", " 'f1_macro',\n", " 'f1_micro',\n", " 'f1_samples',\n", " 'f1_weighted',\n", " 'fowlkes_mallows_score',\n", " 'homogeneity_score',\n", " 'mutual_info_score',\n", " 'neg_log_loss',\n", " 'neg_mean_absolute_error',\n", " 'neg_mean_squared_error',\n", " 'neg_mean_squared_log_error',\n", " 'neg_median_absolute_error',\n", " 'normalized_mutual_info_score',\n", " 'precision',\n", " 'precision_macro',\n", " 'precision_micro',\n", " 'precision_samples',\n", " 'precision_weighted',\n", " 'r2',\n", " 'recall',\n", " 'recall_macro',\n", " 'recall_micro',\n", " 'recall_samples',\n", " 'recall_weighted',\n", " 'roc_auc',\n", " 'v_measure_score']" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import sklearn.metrics\n", "sorted(sklearn.metrics.SCORERS.keys())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Bias - Variance Tradeoff\n", "\n", "The **bias** is an error from erroneous assumptions in the learning algorithm. High bias can cause an algorithm to miss the relevant relations between features and target outputs (underfitting).\n", "\n", "\n", "The **variance** is an error from sensitivity to small fluctuations in the training set. High variance can cause an algorithm to model the random noise in the training data, rather than the intended outputs (overfitting)." ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Let's plot some learning curves" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "scrolled": true, "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "#From http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html\n", "\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "from sklearn.naive_bayes import GaussianNB\n", "from sklearn.svm import SVC\n", "from sklearn.datasets import load_digits\n", "from sklearn.model_selection import learning_curve\n", "from sklearn.model_selection import ShuffleSplit\n", "\n", "def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,\n", " n_jobs=None, train_sizes=np.linspace(.3, 1.0, 10)):\n", " plt.figure()\n", " plt.title(title)\n", " if ylim is not None:\n", " plt.ylim(*ylim)\n", " plt.xlabel(\"Training examples\")\n", " plt.ylabel(\"Score\")\n", " train_sizes, train_scores, test_scores = learning_curve(\n", " estimator, X, y, cv=cv, n_jobs=n_jobs, train_sizes=train_sizes, scoring=\"neg_mean_squared_error\")\n", " train_scores_mean = np.mean(train_scores, axis=1)\n", " train_scores_std = np.std(train_scores, axis=1)\n", " test_scores_mean = np.mean(test_scores, axis=1)\n", " test_scores_std = np.std(test_scores, axis=1)\n", " plt.grid()\n", "\n", " plt.fill_between(train_sizes, train_scores_mean - train_scores_std,\n", " train_scores_mean + train_scores_std, alpha=0.1,\n", " color=\"r\")\n", " plt.fill_between(train_sizes, test_scores_mean - test_scores_std,\n", " test_scores_mean + test_scores_std, alpha=0.1, color=\"g\")\n", " plt.plot(train_sizes, train_scores_mean, 'o-', color=\"r\",\n", " label=\"Training score\")\n", " plt.plot(train_sizes, test_scores_mean, 'o-', color=\"g\",\n", " label=\"Cross-validation score\")\n", "\n", " plt.legend(loc=\"best\")\n", " return plt" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "scrolled": true, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html\n", "title = \"Learning Curves\"\n", "\n", "# Create the CV iterator\n", "cv_iterator = KFold(n_splits=5, shuffle=True, random_state=10)\n", "model = LinearRegression()\n", "# model = KNeighborsRegressor(n_neighbors=2)\n", "\n", "plot_learning_curve(model, title, X, y, cv=cv_iterator, n_jobs=4)\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "#### Lasso Regression\n", "\n", "“when you have two competing theories that make exactly the same predictions, the simpler one is the better.” - William of Ockham\n", "\n", "So for a regression model LASSO (least absolute shrinkage and selection operator), or more commonly referred to as L1 regularization, could be used to penalize for the large number of parameters.\n", "\n", "* L1 regularization (the last term of the equation) favors a sparse model with features having coefficients equal to zero or close to zero:\n", "\n", "$$ Loss = ||y - Xw||^2_2 + \\alpha * ||w||_1$$\n", "\n", "L1 norm $||w||_1$ is simply a sum of absolute values of coefficients and $\\alpha$ regulates the strength of regularization. A zero coefficient for a feature essentially mean that the feature is eliminated.\n", "\n" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "scrolled": true, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "34.634124343427146" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.linear_model import Lasso, LinearRegression\n", "from sklearn.model_selection import cross_val_score, KFold\n", "from sklearn.metrics import mean_squared_error\n", "\n", "llr = Lasso(alpha=0.5)\n", "llr.fit(X, y)\n", "preds = llr.predict(X)\n", "\n", "# Create the CV iterator\n", "cv_iterator = KFold(n_splits=5, shuffle=True, random_state=10)\n", "\n", "# Note: default in sklearn: higher return values are better than lower return values\n", "cross_val_score(llr, X, y, cv=cv_iterator, scoring=\"neg_mean_squared_error\")\n", "cross_val_score(llr, X, y, cv=5, scoring=\"neg_mean_squared_error\")\n", "abs(np.mean(cross_val_score(llr, X, y, cv=5, scoring=\"neg_mean_squared_error\")))" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "scrolled": true, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# http://scikit-learn.org/stable/auto_examples/model_selection/plot_learning_curve.html\n", "title = \"Learning Curves\"\n", "\n", "# Create the CV iterator\n", "cv_iterator = KFold(n_splits=5, shuffle=True, random_state=10)\n", "llr = Lasso(alpha=0.5)\n", "\n", "plot_learning_curve(llr, title, X, y, cv=cv_iterator, n_jobs=4)\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Ridge regression addresses some of the problems of Ordinary Least Squares by imposing a penalty on the size of coefficients. The ridge coefficients minimize a penalized residual sum of squares,\n", " \n", "\n", "$$ Loss = ||y - Xw||^2_2 + \\alpha * ||w||^2_2$$\n", "\n", "Here, \n", "α\n", "≥\n", "0\n", " is a complexity parameter that controls the amount of shrinkage: the larger the value of \n", "α\n", ", the greater the amount of shrinkage and thus the coefficients become more robust to collinearity." ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from sklearn.linear_model import Ridge\n", "\n", "title = \"Learning Curves\"\n", "\n", "# Create the CV iterator\n", "cv_iterator = KFold(n_splits=5, shuffle=True, random_state=10)\n", "llr = Ridge(alpha=0.5)\n", "\n", "plot_learning_curve(llr, title, X, y, cv=cv_iterator, n_jobs=4)\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Optimizing hyperparameters with GridSearchCV\n" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "GridSearchCV(cv=KFold(n_splits=5, random_state=None, shuffle=False),\n", " error_score='raise-deprecating',\n", " estimator=KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',\n", " metric_params=None, n_jobs=None, n_neighbors=5, p=2,\n", " weights='uniform'),\n", " fit_params=None, iid=False, n_jobs=None,\n", " param_grid={'n_neighbors': [1, 3, 5, 7, 9]},\n", " pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',\n", " scoring=None, verbose=0)" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import GridSearchCV\n", "\n", "# Hyperparameter optimization:\n", "# (Hyper)parameter grid\n", "p_grid = {\n", " \"n_neighbors\": [1, 3, 5, 7, 9]\n", "}\n", "\n", "# Note that GridSearchCV requires a cross-validation scenario, in this case KFold\n", "\n", "knnr = KNeighborsRegressor()\n", "grid_search = GridSearchCV(estimator=knnr, param_grid=p_grid, cv=KFold(n_splits=5), iid=False)\n", "grid_search.fit(X, y)" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
mean_fit_timestd_fit_timemean_score_timestd_score_timeparam_n_neighborsparamssplit0_test_scoresplit1_test_scoresplit2_test_scoresplit3_test_score...mean_test_scorestd_test_scorerank_test_scoresplit0_train_scoresplit1_train_scoresplit2_train_scoresplit3_train_scoresplit4_train_scoremean_train_scorestd_train_score
00.0005030.0002540.0009520.0001171{'n_neighbors': 1}-2.627907-0.236199-0.600097-0.076035...-1.0271720.95897751.0000001.0000001.0000001.0000001.0000001.0000000.000000
10.0003260.0000150.0010070.0000283{'n_neighbors': 3}-1.3100980.217386-0.466775-0.026002...-0.3805460.52129340.8082600.7983740.7804390.7908680.7420800.7840040.022857
20.0003330.0000090.0010820.0001385{'n_neighbors': 5}-1.1092120.149350-0.425920-0.014744...-0.3150160.44013030.7497610.7100130.7300120.7314330.6520180.7146470.033748
30.0003430.0000060.0010800.0000807{'n_neighbors': 7}-1.0081520.166533-0.4385160.031234...-0.3053350.41191920.6986210.6665510.6581430.6958480.6216030.6681530.028160
40.0003580.0000110.0011420.0001219{'n_neighbors': 9}-1.0082670.156244-0.3901390.039706...-0.2742950.41162510.6612070.6358980.6117500.6513730.5798080.6280070.029319
\n", "

5 rows × 21 columns

\n", "
" ], "text/plain": [ " mean_fit_time std_fit_time mean_score_time std_score_time \\\n", "0 0.000503 0.000254 0.000952 0.000117 \n", "1 0.000326 0.000015 0.001007 0.000028 \n", "2 0.000333 0.000009 0.001082 0.000138 \n", "3 0.000343 0.000006 0.001080 0.000080 \n", "4 0.000358 0.000011 0.001142 0.000121 \n", "\n", " param_n_neighbors params split0_test_score split1_test_score \\\n", "0 1 {'n_neighbors': 1} -2.627907 -0.236199 \n", "1 3 {'n_neighbors': 3} -1.310098 0.217386 \n", "2 5 {'n_neighbors': 5} -1.109212 0.149350 \n", "3 7 {'n_neighbors': 7} -1.008152 0.166533 \n", "4 9 {'n_neighbors': 9} -1.008267 0.156244 \n", "\n", " split2_test_score split3_test_score ... mean_test_score \\\n", "0 -0.600097 -0.076035 ... -1.027172 \n", "1 -0.466775 -0.026002 ... -0.380546 \n", "2 -0.425920 -0.014744 ... -0.315016 \n", "3 -0.438516 0.031234 ... -0.305335 \n", "4 -0.390139 0.039706 ... -0.274295 \n", "\n", " std_test_score rank_test_score split0_train_score split1_train_score \\\n", "0 0.958977 5 1.000000 1.000000 \n", "1 0.521293 4 0.808260 0.798374 \n", "2 0.440130 3 0.749761 0.710013 \n", "3 0.411919 2 0.698621 0.666551 \n", "4 0.411625 1 0.661207 0.635898 \n", "\n", " split2_train_score split3_train_score split4_train_score \\\n", "0 1.000000 1.000000 1.000000 \n", "1 0.780439 0.790868 0.742080 \n", "2 0.730012 0.731433 0.652018 \n", "3 0.658143 0.695848 0.621603 \n", "4 0.611750 0.651373 0.579808 \n", "\n", " mean_train_score std_train_score \n", "0 1.000000 0.000000 \n", "1 0.784004 0.022857 \n", "2 0.714647 0.033748 \n", "3 0.668153 0.028160 \n", "4 0.628007 0.029319 \n", "\n", "[5 rows x 21 columns]" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# exploring the results of hyperparameter search using 5-fold cross-validation\n", "\n", "import warnings\n", "\n", "with warnings.catch_warnings():\n", " warnings.filterwarnings(\"ignore\",category=FutureWarning)\n", " df_cv_results = pd.DataFrame(grid_search.cv_results_)\n", "\n", "df_cv_results" ] }, { "cell_type": "code", "execution_count": 73, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'n_neighbors': 9}" ] }, "execution_count": 73, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_search.best_params_" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "-0.27429538641415063" ] }, "execution_count": 75, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_search.best_score_" ] }, { "cell_type": "code", "execution_count": 76, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "KNeighborsRegressor(algorithm='auto', leaf_size=30, metric='minkowski',\n", " metric_params=None, n_jobs=None, n_neighbors=9, p=2,\n", " weights='uniform')" ] }, "execution_count": 76, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grid_search.best_estimator_" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 22.333333\n", "1 22.088889\n", "2 23.122222\n", "3 24.600000\n", "4 23.922222\n", "dtype: float64" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "best_model = grid_search.best_estimator_\n", "pd.Series(best_model.predict(X)).head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Nested GridSearchCV cross-validation" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "scrolled": true, "slideshow": { "slide_type": "slide" } }, "outputs": [ { "data": { "text/plain": [ "array([0.56587668, 0.47906321, 0.46745653, 0.67292231, 0.52349235,\n", " 0.72262197, 0.68445332, 0.46320908, 0.39704074, 0.37277133])" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# CV iterators\n", "inner_cv_iterator = ShuffleSplit(n_splits=20, random_state=10)\n", "outer_cv_iterator = KFold(n_splits=10, shuffle=True, random_state=10)\n", "\n", "# Hyperparameter optimization:\n", "\n", "# (Hyper)parameter grid\n", "p_grid = {\n", " \"n_neighbors\": [1, 3, 5, 7, 9]\n", "}\n", "\n", "knnr = KNeighborsRegressor()\n", "grid_search = GridSearchCV(estimator=knnr, param_grid=p_grid, cv=inner_cv_iterator)\n", "\n", "cross_val_score(estimator=grid_search, X=X, y=y, cv=outer_cv_iterator)" ] } ], "metadata": { "celltoolbar": "Slideshow", "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" }, "livereveal": { "start_slideshow_at": "selected" } }, "nbformat": 4, "nbformat_minor": 2 }