{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Chapter 7: Problems with the predictors\n",
"\n",
"Load the packages:"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'%.7f'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import scipy as sp\n",
"import statsmodels.api as sm\n",
"import statsmodels.formula.api as smf\n",
"%precision 7\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Errors in the predictors\n",
"\n",
"Load in the data:"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" speed | \n",
" dist | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 4 | \n",
" 2 | \n",
"
\n",
" \n",
" 1 | \n",
" 4 | \n",
" 10 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 4 | \n",
"
\n",
" \n",
" 3 | \n",
" 7 | \n",
" 22 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 | \n",
" 16 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" speed dist\n",
"0 4 2\n",
"1 4 10\n",
"2 7 4\n",
"3 7 22\n",
"4 8 16"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cars = pd.read_csv(\"data/cars.csv\")\n",
"cars.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set up the plot and the random seed"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEKCAYAAAAIO8L1AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAGUJJREFUeJzt3X+UZGV95/H3JwNqi3oaMq0LPTMZzLKTsI5xsEXcSVyiJKOGI51JjBIxE0N2kj2QSHbDCupZzFlzmAQTTUzCZoKYMXFBojiQo3uQ8EPIbkB6GJbhhyMc5Mf8WKY5ZILILDDjd/+o29L03K6qrupbz33qfl7nzKmqp25VfefpOv3t+31+XEUEZmZmc/1Q6gDMzKyenCDMzKyUE4SZmZVygjAzs1JOEGZmVsoJwszMSjlBmJlZKScIMzMr5QRhZmaljkgdQD+WLl0aK1euTB2GmVlWtm3b9kREjHU6LusEsXLlSqamplKHYWaWFUmPdHOcS0xmZlbKCcLMzEo5QZiZWSknCDMzK+UEYWZmpSpLEJIul7RP0j2z2i6R9C1Jd0v6iqTRWc9dKOlBSTslrasqLjOz1LZu383aTTdy/AVfZe2mG9m6fXfqkEpVeQbx18A75rRdD7wuIl4PfBu4EEDSicD7gH9bvOYvJC2pMDYzsyS2bt/NhVfvYPf+AwSwe/8BLrx6Ry2TRGUJIiJuAZ6c0/b1iDhYPLwNWFbcPwO4MiKejYjvAA8CJ1cVm5lZKpdct5MDzx96UduB5w9xyXU7E0U0v5RjEL8G/M/i/jjw2KzndhVth5G0UdKUpKnp6emKQzQzW1x79h9YUHtKSRKEpI8CB4EvzDSVHBZlr42IzRExERETY2MdV4qbmdXKcaMjC2pPaeAJQtIG4HTg/RExkwR2ActnHbYM2DPo2MzMqnb+ulWMHPniIdaRI5dw/rpViSKa30AThKR3AB8G3h0Rz8x66lrgfZJeKul44ATgm4OMzcxsECbXjHPx+tWMj44gYHx0hIvXr2ZyTWlVPanKNuuTdAVwKrBU0i7gIlqzll4KXC8J4LaI+M2IuFfSVcB9tEpP50TEofJ3NjPL2+Sa8VomhLn0QpUnPxMTE+HdXM3MFkbStoiY6HScV1KbmVkpJwgzMyvlBGFmZqWcIMzMrJQThJmZlXKCMDOzUk4QZmZWygnCzMxKOUGYmVkpJwgzMyvlBGFmZqWcIMzMrJQThJmZlXKCMDOzUk4QZmZWygnCzMxKOUGYmVkpJwgzMyvlBGFmZqWcIMzMrJQThJmZlXKCMDOzUk4QZmZWygnCzMxKVZYgJF0uaZ+ke2a1HSPpekkPFLdHF+2S9KeSHpR0t6STqorLzMy6U+UZxF8D75jTdgFwQ0ScANxQPAZ4J3BC8W8jcGmFcZmZWRcqSxARcQvw5JzmM4Atxf0twOSs9s9Hy23AqKRjq4rNzMw6G/QYxGsiYi9Acfvqon0ceGzWcbuKNjMzS6Qug9QqaYvSA6WNkqYkTU1PT1cclplZcw06QTw+UzoqbvcV7buA5bOOWwbsKXuDiNgcERMRMTE2NlZpsGZmTTboBHEtsKG4vwG4Zlb7rxSzmU4B/mWmFGVmZmkcUdUbS7oCOBVYKmkXcBGwCbhK0tnAo8B7isO/BrwLeBB4BvhgVXGZmVl3KksQEXHmPE+9veTYAM6pKhYzM1u4ugxSm5lZzThBmJlZKScIMzMrVdkYhJmZldu6fTeXXLeTPfsPcNzoCOevW8XkmvqtDXaCMDMboK3bd3Ph1Ts48PwhAHbvP8CFV+8AqF2ScInJzGyALrlu5w+Sw4wDzx/ikut2Jopofk4QZmYDtHv/gQW1p+QEYWY2QEtUtvXc/O0pOUGYmQ3QoSjdh3Te9pScIMzMBmh8dGRB7Sk5QZiZDdD561YxcuSSF7WNHLmE89etShTR/DzN1cxsgGamsnodhJmZHWZyzXgtE8JcLjGZmVkpJwgzMyvlBGFmZqWcIMzMrJQHqc3MMjLInWCdIMzMMjHonWCdIMysVC7XLGiSdjvBOkGY2UDkdM2CJtkzz46v87X3y4PUZnaYnK5Z0CTHzbNf03zt/XKCMLPDDPovVevOoPdxcoIws8MM+i9V687kmnEuXr+a8dERRGsH2IvXr/YsJjMbnPPXrXrRGATUd8fRphnkPk5JEoSk3wF+HQhgB/BB4FjgSuAY4E7gAxHxXIr4zNppwuyenHYcteooBnwVI0njwD8CJ0bEAUlXAV8D3gVcHRFXSvrvwP+JiEvbvdfExERMTU1VH7RZYe7sHmj9ZV3lab7ZYpO0LSImOh2XagziCGBE0hHAy4G9wNuALxXPbwEmE8VmNi/P7rEmGXiCiIjdwCeBR2klhn8BtgH7I+JgcdguoPTPMUkbJU1Jmpqenh5EyGY/4Nk91iQDTxCSjgbOAI4HjgOOAt5Zcmhp7SsiNkfERERMjI2NVReoWQnP7rEmSVFiOg34TkRMR8TzwNXAvwNGi5ITwDJgT4LYzNrK6XrCZv1KkSAeBU6R9HJJAt4O3AfcBPxiccwG4JoEsZm1Neh56GYpDXwWE4Ck3wPeCxwEttOa8jrOC9NctwNnRcSz7d7Hs5jMqtOE6bxN1e0spiTrICLiIuCiOc0PAScnCMfM5vBmfQbeasPMSng6r4EThJmV8HReAycIMyvh6bwGThBmVsLTeQ28m6uZlfBmfQZOEGY2j0FuK2311HWJSdKPSDqtuD8i6ZXVhWVmZql1lSAk/QdaO63+ZdG0DNhaVVBmZpZetyWmc2gtYrsdICIekPTqyqIyMxtiuaxS7zZBPBsRz7W2ToJiU73B79FhZpa5nFapdzsG8Q1JH6F1kZ+fAf4O+PvqwjIzG045rVLv9gziAuBsWteP/g1alwi9rKqgzCxvuZRQUshplXq3CWIEuDwi/gpA0pKi7ZmqAjOzPOVUQknhuNERdpckgzquUu+2xHQDrYQwYwT4h8UPx8xyl1MJJYWcVql3ewbxsoh4euZBRDwt6eUVxWRmGcuphJJCTqvUu00Q35N0UkTcCSDpjYB/2mZ2mJxKKJBmvCSXVerdlpjOA/5O0q2SbgW+CJxbXVhmlqucSigz4yW79x8geGG8ZOv23alDq4WuziAi4g5JPwasAgR8KyKerzQyM8tSTiWUduMldYx30BayWd+bgJXFa9ZIIiI+X0lUZpa1Xksogy73eLykva4ShKS/AX4UuAuYSbcBOEGY2aJIMT02t/GSQev2DGICODEivL2GmVUiRbnn/HWrXpSUoL7jJSl0myDuAf4VsLfCWMyswVKUe3IaL0mh2wSxFLhP0jeBZ2caI+LdlURlZo2TqtyTy5TTFLpNEB+vMggzM5d76qfbaa7fWMwPlTRKa7O/19Ea7P41YCet9RUrgYeBX4qIf17MzzWz+nK5p37UzbizpFOAzwA/DrwEWAJ8LyJe1dOHSluAWyPiMkkvAV4OfAR4MiI2SboAODoiPtzufSYmJmJqaqqXEMx65p1KLXeStkXERKfjul1J/WfAmcADtDbq+/WirZfAXgW8FfgsQEQ8FxH7gTOALcVhW4DJXt7frEpeeWtN0m2CICIeBJZExKGI+Bxwao+f+VpgGvicpO2SLpN0FPCaiNhbfNZewJc0tdrxTqXWJN0miGeKUtBdkv5Q0u8AR/X4mUcAJwGXRsQa4Hu0LkjUFUkbJU1Jmpqenu4xBLPeeOWtNUm3CeIDxbHn0vqFvhxY3+Nn7gJ2RcTtxeMv0UoYj0s6FqC43Vf24ojYHBETETExNjbWYwhmvZlvyqVX3tow6jZBTEbE/4uIpyLi9yLiPwGn9/KBEfF/gcckzcxdeztwH3AtsKFo2wBc08v7m1Upp51KzfrV7TqIDcCfzGn71ZK2bv0W8IWibPUQ8EFayeoqSWcDjwLv6fG9zSrTpKmYKWZreYZYvbSd5irpTOCXgZ8Ebp311KuAgxFxWrXhtedprmbVmLtxHrTOlC5ev7qyX9gpPrOpup3m2ukM4n/T2n9pKfBHs9q/C9zde3hmVmcpNs7ztRnqp22CiIhHgEcknQYciIjvS/o3wI8BOwYRoJkNXorZWp4hVj/djkHcAvyUpKOBG4Ap4L3A+6sKzGwY5VJjT7FxXo7XZsjl59mrbmcxKSKeoTW19TMR8fPAidWFZTZ8clqFnWK2Vm4zxHL6efaq6wQh6S20zhi+WrQt5HKlZo2X0yrsyTXjXLx+NeOjIwgYHx2pfLA4xWf2I6efZ6+6/SV/HnAh8JWIuFfSa4GbqgvLbPjkVmP3dRLay+3n2YuFbPf9jVmPHwJ+u6qgzIZRjjX2QUpxTep+NOHn2bbEJOnTxe3fS7p27r/BhGg2HHKrsQ9abiWbJvw8O51B/E1x+8mqAzEbdk1ahd2L3Eo2Tfh5dloHsa24/YakseK+t1A161GKun4uUzFzLNkM+zhNpxKTJH1c0hPAt4BvS5qW9F8HE56Z9SOnqZhNKNnkptM01/OAtcCbIuKHI+Jo4M3A2uKaEGZWYznV9XOb5toEnRLErwBnRsR3ZhqKGUxnFc+ZWY3lVte3eumUII6MiCfmNhbjEEdWE5KZLZacLnCUUzmsKToliOd6fM7MaiCnun5O5bCm6DTN9SckPVXSLuBlFcRjNjC5zO7pR6qpmL30rcth9dNpmuuSds+b5Sq3Vbv9GPRUzF77NsdprsOu2836zIaKyxnV6bVvcyqHNYV3ZLVGcjmjOr32bRNWJufGCcIayeWM6vTTt8O+Mjk3LjFZI7mcUR337fDwGYQ1kssZ1XHfDg9FROoYejYxMRFTU1OpwzAbSk2YBtxUkrZFxESn43wGYWaHadI0YJufxyDM7DCeBmyQ8AxC0hJgCtgdEadLOh64EjgGuBP4QER4Ow8z+iv3eFWz9SrlGcSHgPtnPf4D4FMRcQLwz8DZSaIyq5l+NrHr9bU5bfJn1UmSICQtA34OuKx4LOBtwJeKQ7YAkyliM6ubfso9XtVs/UhVYvo08F+AVxaPfxjYHxEHi8e7gNJzYEkbgY0AK1asqDhMs/T6Kfd4VbP1Y+AJQtLpwL6I2Cbp1JnmkkNL599GxGZgM7SmuVYSpFmN9LMy2auarR8pSkxrgXdLepjWoPTbaJ1RjEqaSVjLgD0JYjOrnX7KPS4VWT8GniAi4sKIWBYRK4H3ATdGxPuBm4BfLA7bAFwz6NjM6qifazX7Os/Wj6QrqYsS0+8W01xfywvTXLcDZ0XEs+1e75XUloJXGHfmPqq3LFZSR8TNwM3F/YeAk1PGY9aJVxh35j4aHl5JbbYAXmHcmftoeHgvJls0uZUVPrZ1B1fc/hiHIlgiceabl/OJydVtX+MVxp25j4aHzyBsUfSz2jeFj23dwd/e9iiHijG4QxH87W2P8rGtO9q+ziuMO3MfDQ8nCFsUuZUVrrj9sQW1z/C00c7cR8PDJSZbFLmVFQ7NM3tvvvYZXmHcmftoeDhB2KLI7RrPS6TSZLBEZYv6XyzFCuNB7+bar177KLdxrGHnEpMtitzKCme+efmC2lNKsZtrCjnF2hROELYoclux+4nJ1Zx1yoofnDEskTjrlBUdZzGlkGI31xRyirUpXGKyRZPb5m6fmFxdy4QwV4rdXFPIKdam8BmEWc31M200pymnOcXaFE4QZjXXlN1cc4q1KVxislrodfZKE2a99DNtNKcppznF2hRJd3Ptl3dzHQ5zN3eD1l+OnQa5e32dWdN1u5urS0yWXK+zVzzrxaxaThCWXK+zVzzrxaxaHoOwRdPreECvq7D7Xb2dYtyjCWMmNjx8BmGLop9VsL3OXuln1kuv8TZlVbMZOEHYIulnPKDXVdj9rN5OMe7hMRPLjUtMtij6HQ8Y9CrsFOMeHjOx3PgMwhZFilWw/ZRseo23KauazcAJwhZJilWw/ZRsUox7eKWw5cYlpprLZdZLilWw/ZRseo23KauazcArqWvNK4XbW7vpxtJpruOjI/yvC96WICKzPHgl9RDwrJf2XLIxq9bAE4Sk5ZJuknS/pHslfahoP0bS9ZIeKG6PHnRsdeNZL+3ldpEis9ykOIM4CPzniPhx4BTgHEknAhcAN0TECcANxeNG86yX4bN1+27WbrqR4y/4Kms33ehFclZrA08QEbE3Iu4s7n8XuB8YB84AthSHbQEmBx1b3biE0l5uK5Nzi9cs6RiEpJXAGuB24DURsRdaSQR4dbrI6sEllPZyG6PJLV6zZNNcJb0C+DJwXkQ8peLi8V28biOwEWDFihXVBVgTOV3nedBTcnMbo8ktXrMkZxCSjqSVHL4QEVcXzY9LOrZ4/lhgX9lrI2JzRExExMTY2NhgAraOUpRPchujyS1esxSzmAR8Frg/Iv541lPXAhuK+xuAawYdm/Wu3/JJL4O3uY3R5BavWYoS01rgA8AOSXcVbR8BNgFXSTobeBR4T4LYhkZO5Z65CwJnzj6AtjHntjI5t3jNBp4gIuIfgfkGHN4+yFiGVa+/cPvRz8V72p19dLP1RU6/YHOL15rNK6mHUIrZMv2UTzx4a1ZPThBDKMUv3H6m5Hrw1qyevJvrEOr3Ws296rV8cv66VaWbEnZ76VDX9M2q4TOIIZTbbJlezz68MtmsWj6DGEI5zpbp5eyjn8FtM+vMCWJINWG2jAe3zarlEpNly4PbZtVygrBs5TbWYpabRpaYPPOlvVz6J8exFrOcNC5BpFhlnJPc+qcJYy1mqTSuxOQ9+dtz/5jZjMYlCM98ac/9Y2YzGpcgPPOlPfePmc1oXILwzJf23D9mNqNxg9Se+dKe+8fMZigiUsfQs4mJiZiamkodhplZViRti4iJTsc1rsRkZmbdcYIwM7NSThBmZlaqcYPUucll2wszGz5OEDWW27YXZjZcXGKqMW97YWYpNfIMIpeyjbe9MLOUGncGkdN1jL3thZml1LgEkVPZxttemFlKtUsQkt4haaekByVdsNjvn1PZZnLNOBevX8346AgCxkdHuHj96lqWw8xs+NRqDELSEuDPgZ8BdgF3SLo2Iu5brM84bnSE3SXJoK5lG18Qx8xSqdsZxMnAgxHxUEQ8B1wJnLGYH+CyjZlZd+qWIMaBx2Y93lW0/YCkjZKmJE1NT08v+ANctjEz606tSkyAStpetN1sRGwGNkNrN9dePsRlGzOzzup2BrELWD7r8TJgT6JYzMwarW4J4g7gBEnHS3oJ8D7g2sQxmZk1Uq1KTBFxUNK5wHXAEuDyiLg3cVhmZo1UqwQBEBFfA76WOg4zs6arW4nJzMxqIutrUkuaBh5J8NFLgScSfG4u3D+duY/ac/901k8f/UhEjHU6KOsEkYqkqW4u+N1U7p/O3EftuX86G0QfucRkZmalnCDMzKyUE0RvNqcOoObcP525j9pz/3RWeR95DMLMzEr5DMLMzEo5QSyQpIcl7ZB0l6Sp1PGkJulySfsk3TOr7RhJ10t6oLg9OmWMqc3TRx+XtLv4Ht0l6V0pY0xJ0nJJN0m6X9K9kj5UtPt7RNv+qfw75BLTAkl6GJiICM/RBiS9FXga+HxEvK5o+0PgyYjYVFwV8OiI+HDKOFOap48+DjwdEZ9MGVsdSDoWODYi7pT0SmAbMAn8Kv4eteufX6Li75DPIKwvEXEL8OSc5jOALcX9LbS+zI01Tx9ZISL2RsSdxf3vAvfTug6Mv0e07Z/KOUEsXABfl7RN0sbUwdTUayJiL7S+3MCrE8dTV+dKursoQTWyfDKXpJXAGuB2/D06zJz+gYq/Q04QC7c2Ik4C3gmcU5QPzBbqUuBHgTcAe4E/ShtOepJeAXwZOC8inkodT92U9E/l3yEniAWKiD3F7T7gK7Suo20v9nhRN52pn+5LHE/tRMTjEXEoIr4P/BUN/x5JOpLWL78vRMTVRbO/R4Wy/hnEd8gJYgEkHVUMEiHpKOBngXvav6qRrgU2FPc3ANckjKWWZn7xFX6eBn+PJAn4LHB/RPzxrKf8PWL+/hnEd8izmBZA0mtpnTVA61oa/yMifj9hSMlJugI4ldbOko8DFwFbgauAFcCjwHsiorGDtPP00am0SgMBPAz8xky9vWkk/SRwK7AD+H7R/BFadfbGf4/a9M+ZVPwdcoIwM7NSLjGZmVkpJwgzMyvlBGFmZqWcIMzMrJQThJmZlXKCMOtA0keLXTTvLnbNfHOFn3WzJF+L2WrhiNQBmNWZpLcApwMnRcSzkpYCL0kcltlA+AzCrL1jgSci4lmAiHgiIvYU1wX5A0nfLP79awBJY5K+LOmO4t/aov2oYkO1OyRtl3RG0T4i6cri7OSLwEiq/6jZXE4QZu19HVgu6duS/kLSv5/13FMRcTLwZ8Cni7Y/AT4VEW8CfgG4rGj/KHBj0f7TwCXFdi3/EXgmIl4P/D7wxur/S2bdcYnJrI2IeFrSG4GfovWL/YvFxWsArph1+6ni/mnAia3tcwB4VbF/188C75b0u0X7y2htIfFW4E+Lz7pb0t1V/n/MFsIJwqyDiDgE3AzcLGkHL2wgN3ufmpn7PwS8JSIOzH6PYsO1X4iInXPa576PWW24xGTWhqRVkk6Y1fQG4JHi/ntn3f5Tcf/rwLmzXv+G4u51wG8ViQJJa4r2W4D3F22vA16/2P8Hs175DMKsvVcAn5E0ChwEHgQ20prZ9FJJt9P6Q+vM4vjfBv68KBUdQSsB/Cbw32iNU9xdJImHi/e4FPhccfxdwDcH9P8y68i7uZr1QNLDwEREPJE6FrOquMRkZmalfAZhZmalfAZhZmalnCDMzKyUE4SZmZVygjAzs1JOEGZmVsoJwszMSv1/XbxYJ4mT8HMAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots()\n",
"ax.scatter(cars.speed, cars.dist)\n",
"plt.xlabel(\"Speed\")\n",
"plt.ylabel(\"Distance\")\n",
"xr = np.array(ax.get_xlim())\n",
"np.random.seed(123)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Do the four fits with increasing amounts of error. Since we only need univariate regression np.polyfit is adequate."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"\n",
"est = np.polyfit(cars.speed, cars.dist, 1)\n",
"ax.plot(xr, est[1] + est[0] * xr)\n",
"est1 = np.polyfit(cars.speed + np.random.normal(size=50), cars.dist, 1)\n",
"ax.plot(xr, est1[1] + est1[0] * xr, c='red')\n",
"est2 = np.polyfit(cars.speed + np.random.normal(scale=2,size=50), cars.dist, 1)\n",
"ax.plot(xr, est2[1] + est2[0] * xr, c='green')\n",
"est5 = np.polyfit(cars.speed + np.random.normal(scale=5,size=50), cars.dist, 1)\n",
"ax.plot(xr, est5[1] + est5[0] * xr, c='cyan')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([ 3.9324088, -17.5790949]),\n",
" array([ 3.7603449, -14.9792162]),\n",
" array([ 3.47154 , -10.7660126]),\n",
" array([2.0647813, 9.9521318]))"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"est, est1, est2, est5"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"vv = np.repeat(np.array([0.1, 0.2, 0.3, 0.4, 0.5]), [1000, 1000, 1000, 1000, 1000])\n",
"slopes = np.zeros(5000)\n",
"for i in range(5000):\n",
" slopes[i] = np.polyfit(cars.speed+np.random.normal(scale=np.sqrt(vv[i]), size=50), cars.dist, 1)[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([-0.1275732, 3.994792 ])"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"betas = np.reshape(slopes, (5, 1000)).mean(axis=1)\n",
"betas = np.append(betas,est[0])\n",
"variances = np.array([0.6, 0.7, 0.8, 0.9, 1.0, 0.5])\n",
"gv = np.polyfit(variances, betas,1)\n",
"gv"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzt3Xd4VVX6/v/3kwIJEHpATJBeBWkREYGEgFJGUaw4oqIooyMWBEYZZ8Y+fj8CKnZRUVHHAhYUFaSFJi1IryK9SJMOAZKs3x/n4A9jYk7gJDvJuV/XlcudtdfZeZbAnX3W3mcvc84hIiKhIczrAkREpOAo9EVEQohCX0QkhCj0RURCiEJfRCSEKPRFREKIQl9EJIQo9EVEQkjAoW9m4Wa2yMzGZ7OvpJl9YmbrzGyemdU8bd8Qf/saM+sSnLJFRORMROSh7/3AKqBsNvv6Avucc3XNrBfwf8ANZtYY6AWcD5wLTDaz+s65jJx+SOXKlV3NmjXzUJaIiCxcuHCPcy42t34Bhb6ZxQN/AZ4GHsymy5XAY/7tscDLZmb+9o+dc8eBDWa2DmgNzMnpZ9WsWZPU1NRAyhIRET8z2xRIv0Cnd14A/gFk5rA/DtgC4JxLBw4AlU5v99vqbxMREQ/kGvpmdjmwyzm38M+6ZdPm/qQ968/oZ2apZpa6e/fu3EoSEZEzFMiZ/iVADzPbCHwMJJvZB1n6bAWqA5hZBFAO+PX0dr94YHvWH+CcG+mcS3DOJcTG5jolJSIiZyjX0HfODXHOxTvnauK7KDvVOdc7S7evgFv929f6+zh/ey//3T21gHrA/KBVLyIieZKXu3d+x8yeAFKdc18BbwPv+y/U/orvlwPOuRVm9imwEkgH7vmzO3dERCR/WWFbRCUhIcHl+e6dWS9AXEuo1eH/b9swA7b9CO0eCG6BIiKFkJktdM4l5NaveHwiN64ljOnjC3rw/XdMH1+7iIj85oyndwqVWh1w177DsQ9vJq35rVRc+QFc9+7vz/xFRKSYnOkDG8smMDq9ExVTRzCr/JUcPret1yWJiBQ6xSb0ax1aSL/oFCbH3kqjbWMY/OxLjFu8jcJ2zUJExEvFI/T9c/hh179L53teZF/3kfy/zOf4+NMPueGNuazacdDrCkVECoXiEfrbfvzdHH7di7oT0/sDHmp6lJ92HeLyl2bx2FcrOHDspLd1ioh4rHjcsvkn9h89wfDv1/LhvE1UKFWCh7o15NqW8YSFZfeECBGRoim0btn8E+VLleDJq5rwVf921Kxcmn+MXco1r//Asq0HvC5NRKTAFfvQP6VJXDnG3nUxw69rxpZfj9HjlVn884tl7DtywuvSREQKTMiEPoCZcU2reKYOSuT2S2rxyYItdByewgdzN5GRWbimuURE8kNIhf4pZaMi+ffljfnu/vY0PCeGf325nCtfmcXCTfu8Lk1EJF+FZOifUr9qDB/d2YaXbmzBnkMnuOa1Hxg0Zgm7Dx33ujQRkXwR0qEPvimfK5qdy5SBidydVIdxi7eRPDyFd2ZvID0jp4XCRESKppAP/VNKl4zgoa4NmfBAB5pXL8/jX6/kLy/OYu76vV6XJiISNAr9LOrElmH07a154+ZWHD6eTq+Rc7nvo0X8ciDN69JERM6aQj8bZkaX889h8oOJ3N+pHhNW/EKn4Sm8Mf1nTqRrykdEii6F/p+ILhHOgEvrM3lAIhfXqcwz362m64gZzPxJi7eLSNGk0A/AeZVK8datCbzT50IyMx03vz2fu95fyNZ9R70uTUQkTxT6edCxYRUmDujA4C4NSFm7i87PTeelKT+RdlLL/opI0ZBr6JtZlJnNN7MlZrbCzB7Ppk8NM5tiZkvNLMXM4k/b96z/davM7EUzK9JPOisZEc49HesyZWASnRpWZfiktVz2/AymrNrpdWkiIrkK5Ez/OJDsnGsGNAe6mlmbLH2GAaOdcxcATwDPAJhZW+AS4AKgCXAhkBik2j0VVz6aV25qyYd3XESJiDD6vpdK33cXsGnvEa9LExHJUa6h73wO+7+N9H9lfVBNY2CKf3sacOWplwNRQAmgpP+1xeqU+JK6lfn2vvY80r0Rc9fv5dLnZjD8+zUcO6EpHxEpfAKa0zezcDNbDOwCJjnn5mXpsgS4xr/dE4gxs0rOuTn4fgns8H9NdM6tCk7phUeJiDDu7FCbaYOS+MsF1Xhp6jo6PzedCct3aLlGESlUAgp951yGc645EA+0NrMmWboMAhLNbBG+6ZttQLqZ1QUa+V8XBySbWYesxzezfmaWamapu3cX3dshq5SN4vkbmvPp3y4mJiqCuz74kVtGzWfdrsO5v1hEpADkeeUsM3sUOOKcG5bD/jLAaudcvJkNBqKcc0/69/0HSHPOPZvT8YO9cpZX0jMy+XDeZob5p3r6tqvFvZ3qUaZkhNeliUgxFLSVs8ws1szK+7ejgc7A6ix9KpvZqWMNAUb5tzfjewcQYWaR+N4FFLvpnexEhIdxa9uaTBuUxNUt43hjxno6DU9h3OJtmvIREc8EMr1TDZhmZkuBBfjm9Meb2RNm1sPfJwlYY2ZrgarA0/72scDPwDJ88/5LnHNfB3MAhV3lMiV59tpmfPH3tlSJieL+jxdzw8i5rP7loNeliUgIKvYLoxcmGZmOTxZs4dmJqzmUls7NbWow4NL6lIuO9Lo0ESnitDB6IRQeZvz1ovOYNjCJG1tX5705G+k0PIUxqVvI1HKNIlIAFPoeqFC6BE9d1ZSv+7fjvIqlGDx2Kde8/gPLth7wujQRKeYU+h5qEleOsXe1Zdh1zdjy61F6vDKLR75Yxr4jJ7wuTUSKKYW+x8LCjGtbxTN1UBK3ta3Fxwu20HF4Ch/O20SGpnxEJMgU+oVE2ahI/nNFY769rz0NqsbwyBfLufKVWSzctM/r0kSkGFHoFzINzonh435tePHGFuw+dJxrXvuBwWOWsOfwca9LE5FiQKFfCJkZPZqdy9SBSdyVWIcvF2+j47AU3pm9gfQMLdcoImdOoV+IlS4ZwcPdGjLhgQ40r16ex79eyeUvzWLe+r1elyYiRZRCvwioE1uG0be35vXerTiUls4NI+dy/8eL2HkwzevSRKSIUegXEWZG1ybnMPnBRO7rVI/vlv9C8rAU3pj+MyfSNeUjIoFR6Bcx0SXCefDS+kwa0IGL61Time9W023EDGb+VHQfSS0iBUehX0TVqFSat269kFF9EkjPdNz89nzu/mAh2/Yf87o0ESnE9HD3Ii65YVXa1qnM27M28NLUn5i2Zhf9O9bljva1iYoM97o8ESlkdKZfDERFhnNPx7pMGZhEcsMqDPt+LV1emMHU1cVqOWIRCQKFfjESVz6aV29qxQd9LyIizLj93VT6vruATXuPeF2aiBQSCv1iqF29ynx3fwf+2b0hc9fv5dLnZ/Ccf9lGEQltCv1iqkREGP061GHqoCS6NzmHF6euo/Nz05mwfIeWaxQJYQr9Yq5q2She6NWCT/q1ISYqgrs++JFbRs3n592HvS5NRDyg0A8RF9WuxPh72/HYFY1ZvGU/XV+YwTPfreLw8XSvSxORAqTQDyER4WH0uaQW0wYl0bNFHG9MX0+n4SmMW7xNUz4iISLX0DezKDObb2ZLzGyFmT2eTZ8aZjbFzJaaWYqZxZ+27zwz+97MVpnZSjOrGdwhSF5VLlOSZ69txud/b0uVmCju/3gxvUbOZfUvB70uTUTyWSBn+seBZOdcM6A50NXM2mTpMwwY7Zy7AHgCeOa0faOBoc65RkBrYNfZly3B0PK8Cnx5zyX8t2dT1uw8xF9enMXjX6/gwLGTXpcmIvkk19B3Pqeu+kX6v7LOBTQGpvi3pwFXAphZYyDCOTfJf6zDzrmjwShcgiM8zPjrRecxbWASN7auzrs/bKTT8BTGpG4hU8s1ihQ7Ac3pm1m4mS3Gd5Y+yTk3L0uXJcA1/u2eQIyZVQLqA/vN7HMzW2RmQ83sD88GMLN+ZpZqZqm7d+vBYV6oULoET13VlK/7t6N6xVIMHruUa1//geXbDnhdmogEUUCh75zLcM41B+KB1mbWJEuXQUCimS0CEoFtQDq+Z/u09++/EKgN9Mnm+COdcwnOuYTY2NgzHYsEQZO4cnx2V1uGXdeMzb8e5YqXZ/HIF8vYd+SE16WJSBDk6e4d59x+IAXomqV9u3PuaudcC+ARf9sBYCuwyDm33jmXDnwJtAxG4ZJ/wsKMa1vFM3VQEn3a1uTjBVvoODyFD+dtIkNTPiJFWiB378SaWXn/djTQGVidpU9lMzt1rCHAKP/2AqCCmZ06fU8GVgajcMl/ZaMiefSK8/nmvnY0qBrDI18s56pXZvPj5n1elyYiZyiQM/1qwDQzW4ovxCc558ab2RNm1sPfJwlYY2ZrgarA0+CbFsI3tTPFzJYBBrwZ5DFIPmt4Tlk+7teGF29swa5DaVz96g8MHrOEPYePe12aiOSRFbYP5SQkJLjU1FSvy5AcHD6ezktTf2LUrA1ERYYz8NL69G5Tg4hwfc5PxEtmttA5l5BbP/1LlTwpUzKCId0a8d39HWhevTyPfb2Sy1+axbz1e70uTUQCoNCXM1K3ShlG396a13u34lBaOjeMnMsDHy9i58E0r0sTkT+h0JczZmZ0bXIOkx9M5L7kuny7/BeSh6UwcsbPnEjP9Lo8EcmGQl/OWnSJcB68rAGTBnTg4jqV+O+3q+k2YgazftrjdWkikoVCX4KmRqXSvHXrhYzqk0B6pqP32/O4+4OFbNt/zOvSRMQvwusCpPhJbliVtnUq89bM9bw8bR3T1uyif8e63NG+NlGRf3gKh4gUIJ3pS76Iigynf3I9pgxMomODKgz7fi1dXpjB1NU7vS5NJKQp9CVfxZWP5rXerXi/b2siwozb303ljvcWsGnvEa9LEwlJCn0pEO3rxfLd/R34Z/eGzPl5L5c+P4Pnvl/DsRMZXpcmElIU+lJgSkSE0a9DHaYMTKJbk3N4ceo6Oj83nQnLf9FyjSIFRKEvBe6cclGM6NWCT/q1ISYqgrs+WMgto+bz8+7Dub9YRM6KQl88c1HtSoy/tx2PXdGYxVv20/WFGTzz3SqOHE/3ujSRYkuhL56KCA+jzyW1mDowiauax/HG9PUkD0/hqyXbNeUjkg8U+lIoxMaUZOh1zfj8722JjSnJfR8totfIuaz55ZDXpYkUKwp9KVRanleBcfe04+meTViz8xDdX5zJ41+v4GDaSa9LEykWFPpS6ISHGTddVINpA5PodWF13v1hI8nDUhi7cCuZWq5R5Kwo9KXQqlC6BE/3bMrX/dtRvWIpBo1ZwrWv/8DybQe8Lk2kyFLoS6HXJK4cn93VlqHXXsDmX49yxcuz+NeXy9h/9ITXpYkUOQp9KRLCwozrEqozZWASfdrW5KP5W+g4LIX/zdtMhqZ8RAKWa+ibWZSZzTezJWa2wswez6ZPDTObYmZLzSzFzOKz7C9rZtvM7OVgFi+hp1x0JI9ecT7j721Hvaox/POLZfR8dTaLNu/zujSRIiGQM/3jQLJzrhnQHOhqZm2y9BkGjHbOXQA8ATyTZf+TwPSzLVbklEbVyvJJvzaM6NWcnQfT6PnqD/xj7BL2HD7udWkihVquoe98Tn0+PtL/lfX9dGNgin97GnDlqR1m1gqoCnx/1tWKnMbMuLJ5HFMGJvG3xNp8/uM2Og5L4d3ZG0jP0HKNItkJaE7fzMLNbDGwC5jknJuXpcsS4Br/dk8gxswqmVkYMBwYnMvx+5lZqpml7t69O28jkJBXpmQEQ7o1YsIDHWgWX57Hvl7J5S/NYv6GX70uTaTQCSj0nXMZzrnmQDzQ2syaZOkyCEg0s0VAIrANSAf+DnzrnNuSy/FHOucSnHMJsbGxeR6ECEDdKmV4v29rXu/dkkNp6Vz/xhwe+HgROw+meV2aSKFheX2+iZk9Chxxzg3LYX8ZYLVzLt7MPgTaA5lAGaAE8Kpz7uGcjp+QkOBSU1PzVJNIVsdOZPBqyjremL6eyHDj/s71uO2SWkSG64Y1KZ7MbKFzLiG3foHcvRNrZuX929FAZ2B1lj6V/VM5AEOAUQDOuZucc+c552riezcw+s8CXyRYokuEM/CyBnw/oANtalfiv9+uptuImcz6aY/XpYl4KpDTnmrANDNbCizAN6c/3syeMLMe/j5JwBozW4vvou3T+VKtSB7VrFyat/tcyNu3JnAiPZPeb8/j7x8uZNv+Y16XJuKJPE/v5DdN70h+STuZwZsz1vNKyjoMo39yXe5oX4uSEeFelyZy1oI2vSNSXERFhnNvp3pMfjCRpAaxDJ24hi7Pz2Da6l1elyZSYBT6EnLiK5Titd6teL9va8LCjNveXcAd7y1g896jXpcmku80vSMh7UR6Ju/M3sCIKT+Rnum4K7EOdyfWIbqEb8rny0XbGDpxDdv3H+Pc8tEM7tKAq1rEeVy1yB8FOr2j0BcBfjmQxjPfrWLc4u3ElY/m35c35tiJdP75xXKOncz4rV90ZDjPXN1UwS+FjkJf5AzMXb+XR8etYM3OQ5SMCON4+h8f5xBXPprZDyd7UJ1IznQhV+QMtKldiW/ua8ejVzTONvABtut2TynCFPoiWUSEh3HbJbU4p2xUtvvPLR9dwBWJBI9CXyQHD3drSHTk7+/hDzPofdF5HlUkcvYivC5ApLA6dbF26MQ1bNt/jPLRkZzIyGTYpLXsPnyCBy6tR9moSI+rFMkbXcgVyYN9R04w9Ps1fDR/M5VKl2RIt4b0bBFHWJh5XZqEOF3IFckHFUqX4L89m/LVPe2IrxDNwDFLuO6NOSzfdsDr0kQCotAXOQNN48vx+d1tGXrtBWzcc4QeL8/iX18uY//RE16XJvKnFPoiZygszLguoTpTByVxy8U1+d+8zXQclsJH8zeTkVm4pk1FTlHoi5ylctGRPNbjfL65rz31qsYw5PNl9Hx1Nos27/O6NJE/UOiLBEmjamX5pF8bRvRqzi8H0uj56g/8Y+wS9h4+7nVpIr9R6IsEkZlxZfM4pg5K4m8davP5j9voOCyF937YSHpG9p/wFSlICn2RfFCmZARDujdiwgMduCC+PI9+tYLLX5rF/A2/el2ahDiFvkg+qlulDO/3bc1rN7Xk4LGTXP/GHAZ8sphdB9O8Lk1ClEJfJJ+ZGd2aVmPKwCTuTa7LN0t30HFYCm/OWM9JTflIAcs19M0syszmm9kSM1thZo9n06eGmU0xs6VmlmJm8f725mY2x/+6pWZ2Q34MQqQoiC4RzsDLGvD9gA5cVLsST3+7im4jZjJ73R6vS5MQEsiZ/nEg2TnXDGgOdDWzNln6DANGO+cuAJ4AnvG3HwVucc6dD3QFXjCz8sEpXaRoqlm5NKP6XMjbtyZwIj2Tm96axz0f/qhHNkuByPWBa873cJ7D/m8j/V9ZP3nSGBjg354GfOl/7drTjrPdzHYBscD+sytbpOjr1Kgql9StzJsz1vNKyjqmrt5F/+S63NG+FiUjwrVUo+SLgB64ZmbhwEKgLvCKc+6hLPv/B8xzzo0ws6uBz4DKzrm9p/VpDbwHnO+cy8zy+n5AP4Dzzjuv1aZNm85uVCJFzNZ9R3lq/ComrPiFmpVKcdn55/D+nE1aqlECFtQHrjnnMpxzzYF4oLWZNcnSZRCQaGaLgERgG5B+WjHVgPeB27IGvv/4I51zCc65hNjY2EBKEilW4iuU4vWbWzH69taEhRkjZ6z/XeADHDuZwdCJazyqUIqLPN2945zbD6Tgm58/vX27c+5q51wL4BF/2wEAMysLfAP8yzk3NxhFixRXHerHMuH+Djnu17y/nK1A7t6JPXXx1cyigc7A6ix9KpvZqWMNAUb520sAX+C7yDsmmIWLFFclIsKIy2FJxmrlsl/CUSRQgZzpVwOmmdlSYAEwyTk33syeMLMe/j5JwBozWwtUBZ72t18PdAD6mNli/1fz4A5BpPgZ3KXBH5ZqBCgbHcn63YezeYVIYLRylkghdfrdO9XKRXFR7UpMXrmTtPQM7mhfm/4d61K6pFY8FZ9AL+Qq9EWKkN2HjvN/E1YzduFWzikbxSN/acTlF1TDTMs1hjotlyhSDMXGlGTYdc347O62VI4pwb0fLeLGN+ey5pdDXpcmRYRCX6QIalWjAuPuacdTVzVh1Y5DdH9xJk+OX8nBtJNelyaFnEJfpIgKDzN6t6nBtEFJ3HBhdUbN3kDysOl8tnArmVquUXKg0Bcp4iqWLsF/ezZl3D2XEF8hmoFjlnDdG3NYsf2A16VJIaTQFykmLogvz+d3t+XZay9g454jXPHSLP795XL2Hz3hdWlSiCj0RYqRsDDj+oTqTB2UxC0X1+TDeZvoOCyFj+Zv1pSPAAp9kWKpXHQkj/U4n2/ua0+9KjEM+XwZPV+dzeItesBtqFPoixRjjaqV5ZO/tWFEr+bsOJDGVa/M5qGxS9l7+LjXpYlHFPoixZyZcWXzOKYOSqJfh9p89uNWOg5LYfScjaRrucaQo9AXCRFlSkbwz+6NmPBAe5rGl+M/41ZwxcuzWbDxV69LkwKk0BcJMXWrxPBB34t47aaWHDh6guten8OATxaz62Ca16VJAVDoi4QgM6Nb02pMHphI/451+WbpDpKHT+etmes5qSmfYk2hLxLCSpWIYFCXBnw/oAMX1qzAU9+sotuImcxet8fr0iSf6CmbIvKbySt38tBnS9l7xPeBrqplSzKkWyOty1sE6CmbIpJnh4+nc+T4b8tbs/PgcQaNWcLY1C0eViXBpNAXkd8MnbiGtPTfz+mnZzoe+nwZ09bs8qgqCSaFvoj8JqeF1zMyHbe9s4A7R6ey5dejBVyVBJNCX0R+c24OC7KfWy6Kh7s1ZPa6PXR6bjrPT1pL2smMAq5OgiHX0DezKDObb2ZLzGyFmT2eTZ8aZjbFzJaaWYqZxZ+271Yz+8n/dWuwByAiwZPdguzRkeH8o2tD7kqsw9SBSXQ9/xxGTPmJzs9NZ+KKXyhsN4PIn8v17h3zLb5Z2jl32MwigVnA/c65uaf1GQOMd869Z2bJwG3OuZvNrCKQCiQADlgItHLO7cvp5+nuHRFvnb4g+7nloxncpcEf7t6Z8/NeHv1qOWt3HiaxfiyPXtGY2rFlPKpYIJ8WRjezUvhC/27n3LzT2lcAXZxzW/2/JA4458qa2Y1AknPub/5+bwApzrmPcvoZCn2RouFkRibvz9nkm+pJz+CO9rXp37EupUtGeF1aSArqLZtmFm5mi4FdwKTTA99vCXCNf7snEGNmlYA44PR7vbb620SkiIsMD+P2drWYOiiJHs3ieC3lZzo/N53xS7dryqcQCyj0nXMZzrnmQDzQ2syaZOkyCEg0s0VAIrANSAcsu8NlbTCzfmaWamapu3fvztMARMRbsTElGX59Mz67+2Iqli5B//8t4q9vzmPtzkNelybZyNPdO865/UAK0DVL+3bn3NXOuRbAI/62A/jO7Kuf1jUe2J7NcUc65xKccwmxsbF5G4GIFAqtalTkq/7teOqqJqzccZBuI2by5PiVHEo76XVpcppA7t6JNbPy/u1ooDOwOkufymZ26lhDgFH+7YnAZWZWwcwqAJf520SkGAoPM3q3qcG0QUlcn1CdUbM30HHYdD7/caumfAqJQM70qwHTzGwpsADfnP54M3vCzHr4+yQBa8xsLVAVeBrAOfcr8KT/dQuAJ/xtIlKMVSxdgmeubsq4ey4hvkI0D366hOten8OK7Qe8Li3k6YFrIpKvMjMdYxdu5f8mrGbf0RP0blODBy+tT/lSJbwurVjRA9dEpFAICzOuv7A6UwcmccvFNflg7iaSh0/n4/mbycwsXCedoUChLyIFolypSB7rcT7j721P3dgyPPz5Mnq+OpvFW/Z7XVpIUeiLSIFqfG5ZPvlbG164oTnbD6TR89XZPPzZUvYePu51aSFBoS8iBc7MuKpFHFMHJnJn+9qMXbiVjsNSGD1nI+larjFfKfRFxDMxUZH8s3sjJjzQnqbx5fjPuBVc8fJsUjfqJr/8otAXEc/VrRLDB30v4tWbWnLg6AmufX0OD36ymF0H07wurdhR6ItIoTBu8Xae/mYV2w+kEVMygnFLtpM8fDpvzVzPSU35BI1CX0Q89+WibQz5fBnb/Ct3HTqeTkSYUb1iNE99s4ruI2byw7o9HldZPCj0RcRzQyeu4ViWlbiOp2dy4OhJ3rolgbT0DP761jzu+d+POS7pKIFR6IuI53IK8h0H0ujcuCqTBiTy4KX1mbxyJ52GT+eVaes4nq7lGs+EQl9EPJfj2rz+9qjIcO7rVI/JDybSoX5lhk5cQ9cXZpKyZldBllksKPRFxHM5rc07uEuD37VVr1iKN25O4L3bW2NAn3cWcOfoVLb8erQAqy3aFPoi4rmrWsTxzNVNiSsfjQFx5aN55uqmf1ib95TE+rFMeKADD3VtyOx1e+j83HTfso0nNeWTGz1lU0SKtB0HjvHfb1fz9ZLtxFeI5j+XN+bSxlXxLdcdOvSUTREJCdXKRfPSjS346M42lCoRTr/3F9LnnQVs2HPE69IKJYW+iBQLF9epxDf3tefflzfmx0376PL8DJ6dsJqjJ9K9Lq1QUeiLSLERGR5G33a1mDIokSuancurKT/Tafh0xi/druUa/TSnLyLFVurGX/nPuBWs3HGQtnUq8XiP86lXNeZ3fb5ctI2hE9ewff8xzi0fzeAuDXK8gFyYaU5fREJeQs2KfH1vO568qgkrth+k24iZPDV+JYfSTgK/f/yDA7btP8aQz5fx5aJt3haej3INfTOLMrP5ZrbEzFaY2ePZ9DnPzKaZ2SIzW2pm3f3tkWb2npktM7NVZjYkPwYhIpKT8DDj5jY1mDYoiesSqvP27A0kD5/OF4u28uyE1X94/MOxkxkMnbjGo2rzXyBn+seBZOdcM6A50NXM2mTp8y/gU+dcC6AX8Kq//TqgpHOuKdAK+JuZ1QxG4SIieVGxdAmeubopX/79Es4tH82AT5aw/UD2j24uzs/3yTX0nc9h/7eR/q+sFwIcUNa/XQ7Yflp7aTOLAKKBE8DBsy1aRORMNateni/ubsuz11xAWA638uf0WIjiIKA5fTMLN7PFwC5gknNuXpaUArRCAAAJgUlEQVQujwG9zWwr8C1wr799LHAE2AFsBoY557Qkjoh4KizMuP7C6jx1VRPCsyR/VETYHx7/UJwEFPrOuQznXHMgHmhtZk2ydLkReNc5Fw90B943szCgNZABnAvUAgaaWe2sxzezfmaWamapu3fvPovhiIgE7q8X1WD4dc2oElPyt7YqZaOoVbm0h1XlrzzfsmlmjwJHnHPDTmtbAXR1zm3xf78eaAM8Csx1zr3vbx8FTHDOfZrT8XXLpoh4wTnnW73r21XsOXycXhdWZ3CXhlQsXcLr0gIStFs2zSzWzMr7t6OBzsDqLN02A538fRoBUcBuf3uy+ZTG94sg62tFRDxnZlzVIo6pAxO5o10txqRupeOwFN6fs5GMzML1eaazEcj0TjVgmpktBRbgm9Mfb2ZPmFkPf5+BwJ1mtgT4COjjfG8hXgHKAMv9r33HObc06KMQEQmSmKhIHvlLY767vz3nn1uWf49bwRUvzSJ1Y/G4HKlP5IqI5MA5x3fLf+Gp8SvZfiCNq1vG8XC3hlSJifK6tD/QJ3JFRM6SmdG9aTUmD0zkno51GL9kB8nDpvPWzPWczMj0urwzotAXEclFqRIRDO7SkIkDOpBQswJPfbOKv7w4kx9+3uN1aXmm0BcRCVCtyqV5p8+FvHlLAsdOZvDXN+fR/38/suNA0fkEr0JfRCQPzIxLG1dl0oBEBnSuz6SVO0keNp1XU9ZxPL3wL9eo0BcROQNRkeHc37kekx9MpH29yjw7YQ3dXpjJ9LWF+wOmCn0RkbNQvWIpRt6SwLu3XYgDbh01n36jU9ny61GvS8uWQl9EJAiSGlRhwgPteahrQ2at20Pn56YzYvJPpJ0sXFM+Cn0RkSApGRHO3Ul1mDIwkUsbV+X5yWu59PnpTFq5s9As16jQFxEJsmrlonn5ry35350XERURzp2jU7nt3QVs2HPE69L0iVwRkfx0MiOT937YyAuTf+JEeiZ3dqjFPR3r8v2KnUFdmzfQT+Qq9EVECsCuQ2n8v+9W8/mP2ygfHcnRExmcOO1TvdGR4TxzddMzDn49hkFEpBCpEhPFc9c3Z+xdF3PkRPrvAh8Kbm1ehb6ISAFKqFmRkxnZz7AUxNq8Cn0RkQIWl8MavAWxNq9CX0SkgA3u0oDoyPDftUVHhhfI2rwR+f4TRETkd05drA3m3TuBUuiLiHjgqhZxBRLyWWl6R0QkhCj0RURCSK6hb2ZRZjbfzJaY2QozezybPueZ2TQzW2RmS82s+2n7LjCzOf7XLjOzwre4pIhIiAhkTv84kOycO2xmkcAsM/vOOTf3tD7/Aj51zr1mZo2Bb4GaZhYBfADc7JxbYmaVgJPBHoSIiAQm19B3vuc0HPZ/G+n/yvrJAgeU9W+XA7b7ty8DljrnlviPtfdsCxYRkTMX0Jy+mYWb2WJgFzDJOTcvS5fHgN5mthXfWf69/vb6gDOziWb2o5n9I0h1i4jIGQgo9J1zGc655kA80NrMmmTpciPwrnMuHugOvG9mYfjeSbQDbvL/t6eZdcp6fDPrZ2apZpa6e3fhXmpMRKQoy9PdO865/UAK0DXLrr7Ap/4+c4AooDKwFZjunNvjnDuK711Ay2yOO9I5l+CcS4iNjc3zIEREJDCB3L0Ta2bl/dvRQGdgdZZum4FO/j6N8IX+bmAicIGZlfJf1E0EVgavfBERyYtA7t6pBrxnZuH4fkl86pwbb2ZPAKnOua+AgcCbZjYA30XdPv4LwPvM7Dlggb/9W+fcN/kyEhERyZUWURERKQa0iIqIiPyBQl9EJIQo9EVEQohCX0QkhCj0RURCiEJfRCSEKPRFREKIQl9EJIQo9EVEQohCX0QkhCj0RURCiEJfRCSEKPRFREJIoXvKppntBjadxSEqA3uCVE5RoPEWf6E2Zo33zNRwzuW6ClWhC/2zZWapgTxetLjQeIu/UBuzxpu/NL0jIhJCFPoiIiGkOIb+SK8LKGAab/EXamPWePNRsZvTFxGRnBXHM30REclBkQx9M+tqZmvMbJ2ZPZzN/pJm9ol//zwzq1nwVQZXAGN+0MxWmtlSM5tiZjW8qDNYchvvaf2uNTNnZkX6bo9Axmtm1/v/jFeY2f8KusZgC+Dv9HlmNs3MFvn/Xnf3os5gMbNRZrbLzJbnsN/M7EX//4+lZtYyXwpxzhWpLyAc+BmoDZQAlgCNs/T5O/C6f7sX8InXdRfAmDsCpfzbdxflMQcyXn+/GGAGMBdI8LrufP7zrQcsAir4v6/idd0FMOaRwN3+7cbARq/rPssxdwBaAstz2N8d+A4woA0wLz/qKIpn+q2Bdc659c65E8DHwJVZ+lwJvOffHgt0MjMrwBqDLdcxO+emOeeO+r+dC8QXcI3BFMifMcCTwLNAWkEWlw8CGe+dwCvOuX0AzrldBVxjsAUyZgeU9W+XA7YXYH1B55ybAfz6J12uBEY7n7lAeTOrFuw6imLoxwFbTvt+q78t2z7OuXTgAFCpQKrLH4GM+XR98Z0xFFW5jtfMWgDVnXPjC7KwfBLIn299oL6ZzTazuWbWtcCqyx+BjPkxoLeZbQW+Be4tmNI8k9d/52ckItgHLADZnbFnvQUpkD5FScDjMbPeQAKQmK8V5a8/Ha+ZhQHPA30KqqB8FsifbwS+KZ4kfO/iZppZE+fc/nyuLb8EMuYbgXedc8PN7GLgff+YM/O/PE8USG4VxTP9rUD1076P549v+37rY2YR+N4a/tnbqsIukDFjZp2BR4AezrnjBVRbfshtvDFAEyDFzDbim//8qghfzA307/Q459xJ59wGYA2+XwJFVSBj7gt8CuCcmwNE4XtOTXEV0L/zs1UUQ38BUM/MaplZCXwXar/K0ucr4Fb/9rXAVOe/UlJE5Tpm/3THG/gCv6jP9/7peJ1zB5xzlZ1zNZ1zNfFdw+jhnEv1ptyzFsjf6S/xXazHzCrjm+5ZX6BVBlcgY94MdAIws0b4Qn93gVZZsL4CbvHfxdMGOOCc2xHsH1Lkpnecc+lm1h+YiO8OgFHOuRVm9gSQ6pz7Cngb31vBdfjO8Ht5V/HZC3DMQ4EywBj/NevNzrkenhV9FgIcb7ER4HgnApeZ2UogAxjsnNvrXdVnJ8AxDwTeNLMB+KY5+hTlkzcz+wjf9Fxl/3WKR4FIAOfc6/iuW3QH1gFHgdvypY4i/P9QRETyqChO74iIyBlS6IuIhBCFvohICFHoi4iEEIW+iEgIUeiLiIQQhb6ISAhR6IuIhJD/D7yjfi12Dc7hAAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(variances, betas)\n",
"xr = np.array([0,1])\n",
"plt.plot(xr, np.array(gv[1] + gv[0]*xr))\n",
"plt.plot([0], [gv[1]], marker='x', markersize=6)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Do not believe there is a `simex` package for Python.\n",
"\n",
"## Changes of Scale\n",
"\n",
"Read in the data and fit the model:"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | sr | R-squared: | 0.338 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.280 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 5.756 | \n",
"
\n",
"\n",
" Date: | Tue, 25 Sep 2018 | Prob (F-statistic): | 0.000790 | \n",
"
\n",
"\n",
" Time: | 15:57:32 | Log-Likelihood: | -135.10 | \n",
"
\n",
"\n",
" No. Observations: | 50 | AIC: | 280.2 | \n",
"
\n",
"\n",
" Df Residuals: | 45 | BIC: | 289.8 | \n",
"
\n",
"\n",
" Df Model: | 4 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 28.5661 | 7.355 | 3.884 | 0.000 | 13.753 | 43.379 | \n",
"
\n",
"\n",
" pop15 | -0.4612 | 0.145 | -3.189 | 0.003 | -0.753 | -0.170 | \n",
"
\n",
"\n",
" pop75 | -1.6915 | 1.084 | -1.561 | 0.126 | -3.874 | 0.491 | \n",
"
\n",
"\n",
" dpi | -0.0003 | 0.001 | -0.362 | 0.719 | -0.002 | 0.002 | \n",
"
\n",
"\n",
" ddpi | 0.4097 | 0.196 | 2.088 | 0.042 | 0.015 | 0.805 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.866 | Durbin-Watson: | 1.934 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.649 | Jarque-Bera (JB): | 0.493 | \n",
"
\n",
"\n",
" Skew: | 0.241 | Prob(JB): | 0.782 | \n",
"
\n",
"\n",
" Kurtosis: | 3.064 | Cond. No. | 2.04e+04 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 2.04e+04. This might indicate that there are
strong multicollinearity or other numerical problems."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: sr R-squared: 0.338\n",
"Model: OLS Adj. R-squared: 0.280\n",
"Method: Least Squares F-statistic: 5.756\n",
"Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790\n",
"Time: 15:57:32 Log-Likelihood: -135.10\n",
"No. Observations: 50 AIC: 280.2\n",
"Df Residuals: 45 BIC: 289.8\n",
"Df Model: 4 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 28.5661 7.355 3.884 0.000 13.753 43.379\n",
"pop15 -0.4612 0.145 -3.189 0.003 -0.753 -0.170\n",
"pop75 -1.6915 1.084 -1.561 0.126 -3.874 0.491\n",
"dpi -0.0003 0.001 -0.362 0.719 -0.002 0.002\n",
"ddpi 0.4097 0.196 2.088 0.042 0.015 0.805\n",
"==============================================================================\n",
"Omnibus: 0.866 Durbin-Watson: 1.934\n",
"Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493\n",
"Skew: 0.241 Prob(JB): 0.782\n",
"Kurtosis: 3.064 Cond. No. 2.04e+04\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 2.04e+04. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"savings = pd.read_csv(\"data/savings.csv\",index_col=0)\n",
"lmod = smf.ols(formula = 'sr ~ pop15 + pop75 + dpi + ddpi', data=savings).fit()\n",
"lmod.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Rescale one of the predictors:"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | sr | R-squared: | 0.338 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.280 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 5.756 | \n",
"
\n",
"\n",
" Date: | Tue, 25 Sep 2018 | Prob (F-statistic): | 0.000790 | \n",
"
\n",
"\n",
" Time: | 15:57:32 | Log-Likelihood: | -135.10 | \n",
"
\n",
"\n",
" No. Observations: | 50 | AIC: | 280.2 | \n",
"
\n",
"\n",
" Df Residuals: | 45 | BIC: | 289.8 | \n",
"
\n",
"\n",
" Df Model: | 4 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 28.5661 | 7.355 | 3.884 | 0.000 | 13.753 | 43.379 | \n",
"
\n",
"\n",
" pop15 | -0.4612 | 0.145 | -3.189 | 0.003 | -0.753 | -0.170 | \n",
"
\n",
"\n",
" pop75 | -1.6915 | 1.084 | -1.561 | 0.126 | -3.874 | 0.491 | \n",
"
\n",
"\n",
" I(dpi / 1000) | -0.3369 | 0.931 | -0.362 | 0.719 | -2.212 | 1.538 | \n",
"
\n",
"\n",
" ddpi | 0.4097 | 0.196 | 2.088 | 0.042 | 0.015 | 0.805 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.866 | Durbin-Watson: | 1.934 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.649 | Jarque-Bera (JB): | 0.493 | \n",
"
\n",
"\n",
" Skew: | 0.241 | Prob(JB): | 0.782 | \n",
"
\n",
"\n",
" Kurtosis: | 3.064 | Cond. No. | 503. | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: sr R-squared: 0.338\n",
"Model: OLS Adj. R-squared: 0.280\n",
"Method: Least Squares F-statistic: 5.756\n",
"Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790\n",
"Time: 15:57:32 Log-Likelihood: -135.10\n",
"No. Observations: 50 AIC: 280.2\n",
"Df Residuals: 45 BIC: 289.8\n",
"Df Model: 4 \n",
"Covariance Type: nonrobust \n",
"=================================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"---------------------------------------------------------------------------------\n",
"Intercept 28.5661 7.355 3.884 0.000 13.753 43.379\n",
"pop15 -0.4612 0.145 -3.189 0.003 -0.753 -0.170\n",
"pop75 -1.6915 1.084 -1.561 0.126 -3.874 0.491\n",
"I(dpi / 1000) -0.3369 0.931 -0.362 0.719 -2.212 1.538\n",
"ddpi 0.4097 0.196 2.088 0.042 0.015 0.805\n",
"==============================================================================\n",
"Omnibus: 0.866 Durbin-Watson: 1.934\n",
"Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493\n",
"Skew: 0.241 Prob(JB): 0.782\n",
"Kurtosis: 3.064 Cond. No. 503.\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lmod = smf.ols(formula = 'sr ~ pop15 + pop75 + I(dpi/1000) + ddpi', data=savings).fit()\n",
"lmod.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Standardize all the variables:"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | sr | R-squared: | 0.338 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.280 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 5.756 | \n",
"
\n",
"\n",
" Date: | Tue, 25 Sep 2018 | Prob (F-statistic): | 0.000790 | \n",
"
\n",
"\n",
" Time: | 15:57:32 | Log-Likelihood: | -60.617 | \n",
"
\n",
"\n",
" No. Observations: | 50 | AIC: | 131.2 | \n",
"
\n",
"\n",
" Df Residuals: | 45 | BIC: | 140.8 | \n",
"
\n",
"\n",
" Df Model: | 4 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 1.665e-16 | 0.121 | 1.37e-15 | 1.000 | -0.244 | 0.244 | \n",
"
\n",
"\n",
" pop15 | -0.9420 | 0.295 | -3.189 | 0.003 | -1.537 | -0.347 | \n",
"
\n",
"\n",
" pop75 | -0.4873 | 0.312 | -1.561 | 0.126 | -1.116 | 0.141 | \n",
"
\n",
"\n",
" dpi | -0.0745 | 0.206 | -0.362 | 0.719 | -0.489 | 0.340 | \n",
"
\n",
"\n",
" ddpi | 0.2624 | 0.126 | 2.088 | 0.042 | 0.009 | 0.516 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.866 | Durbin-Watson: | 1.934 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.649 | Jarque-Bera (JB): | 0.493 | \n",
"
\n",
"\n",
" Skew: | 0.241 | Prob(JB): | 0.782 | \n",
"
\n",
"\n",
" Kurtosis: | 3.064 | Cond. No. | 5.42 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: sr R-squared: 0.338\n",
"Model: OLS Adj. R-squared: 0.280\n",
"Method: Least Squares F-statistic: 5.756\n",
"Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000790\n",
"Time: 15:57:32 Log-Likelihood: -60.617\n",
"No. Observations: 50 AIC: 131.2\n",
"Df Residuals: 45 BIC: 140.8\n",
"Df Model: 4 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 1.665e-16 0.121 1.37e-15 1.000 -0.244 0.244\n",
"pop15 -0.9420 0.295 -3.189 0.003 -1.537 -0.347\n",
"pop75 -0.4873 0.312 -1.561 0.126 -1.116 0.141\n",
"dpi -0.0745 0.206 -0.362 0.719 -0.489 0.340\n",
"ddpi 0.2624 0.126 2.088 0.042 0.009 0.516\n",
"==============================================================================\n",
"Omnibus: 0.866 Durbin-Watson: 1.934\n",
"Prob(Omnibus): 0.649 Jarque-Bera (JB): 0.493\n",
"Skew: 0.241 Prob(JB): 0.782\n",
"Kurtosis: 3.064 Cond. No. 5.42\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scsav = savings.apply(sp.stats.zscore)\n",
"lmod = smf.ols(formula = 'sr ~ pop15 + pop75 + dpi + ddpi', data=scsav).fit()\n",
"lmod.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Set up data frame and make a plot of the estimates and confidence intervals:"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYYAAAD8CAYAAABzTgP2AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAEChJREFUeJzt3X1sXfV9x/HPp0kIhsEcGhjgLhhKZaCgYdWLBGnZSrqFTjwE1gnCxNOYAtVaVeualZQ/FqYhgphU/kFrsio8rBpFZMFNtQcGZEAbHoKDKS4P4akk4LTgFgxqsXhIvvvDx/R+HSc+Jr73XMfvl2Sd49/53d/9+qfjfO7vnnNjR4QAABjxsaoLAAA0F4IBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAACSmVUX8FHMnTs32tvbqy4DwF56aeA3kqRjDj2w4kqmh82bN/8yIg4dr9+UDIb29nb19PRUXQaAvXT+qoclSXdccUrFlUwPtreW6cdbSQCAhGAAACQEAwAgIRgAAAnBAABI6hIMtlfY/saotnbbPy3x2IfqURMAoJymu101Ik6tugYA01t3b79uuHuLtg8O6cjWFi1b1KHFnW1Vl9UwkxYMtq+WdLGkVyQNSNps+zOS1kh6R9KPa/peKulcSbMlHS3p3yPimuLYryPidyarLgCYiO7efi1f16eh93dIkvoHh7R8XZ8kTZtwmJRgKALgAkmdxZiPS9os6WZJX42IB2zfMOph8yWdqOHQeMz2f0YEn1oDppn3tm3T1ov+peoyfmvboFZ8sGPX9gdnaOu81sbXM8rs44/T4d/6Vl2fY7KuMXxO0l0R8U5EvC1pvaQDJbVGxANFn38b9Zh7IuJXETEkaZ2kz+7pCWwvtd1ju2dgYGCSygaA7N2xQmEP7fuiybzGEKO+/80YbXvqv6e+iojVklZLUldX1x77Apg69ps3T0dde1vVZXzowpUb1D84tEt7W2uLzr/q9AoqarzJWjE8KOlc2y22D5J0VtH+lu2RlcBfjnrMn9g+xHaLpMWSNk5SLQDwkS1b1KGWWTNSW8usGVq2qKOiihpvUlYMEfG47TskPSFpq6QfFYcuk7TG9juS7h71sB9r+O2lYzV88ZnrCwAqN3KBmbuSJkFEXCvp2jEO/UHN/oqa/dcj4itjjMMdSQAqtbizbVoFwWh88hkAkFTyAbeIuEXSLVU8NwBgz1gxAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEhmNvLJbK+Q9OuI+OfdHD9b0gkRsbKRdQGYHN29/brh7i3aPjikI1tbtGxRhxZ3tlVdFiaoocEwnohYL2l91XUAmLju3n4tX9enofd3SJL6B4e0fF2fJBEOU0zdg8H21ZIulvSKpAFJm23fL+kJSfMlHSzpryJik+1LJXVFxFfqXRdQb9dvul7PvvFs1WU0TO+2QemIHWoZ1b7isRn6wWutYz5myxt/JEm67H9W17m6+jvukOP0zfnfrLqMSVHXYLD9GUkXSOosnutxSZuLwwdGxKm2T5O0RtKJ44y1VNJSSZo3b17dagbw0bz3wY4JtaN51XvF8DlJd0XEO5Jku/ZtotslKSIetH2w7bFfUhQiYrWk1ZLU1dUVdaoXmDT7yqvHshas3KD+waFd2ttaW3TzGaeP+Zjztz4sSbr5jEvrWRomqBF3Je3uH/HR7fxjD0xhyxZ1qGXWjNTWMmuGli3qqKgifFT1DoYHJZ1ru8X2QZLOqjl2viTZ/qyktyLirTrXAqCOFne26brzTlJba4us4ZXCdeedxIXnKaiubyVFxOO279Dwheatkn5Uc/hN2w+puPhczzoANMbizjaCYB9Q97uSIuJaSdfWttk+U9J/RMTyUX1vkXRLvWsCAOwen3wGACSVfMAtIv64iucFAIyPFQMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJDMbNQT2f62pM8X3x4g6bCIaC2O7ZDUVxzbFhFnN6ouAEDWsGCIiL8d2bf9VUmdNYeHIuLkRtUC1Ft3b79uuHuLtg8O6cjWFi1b1KHFnW1VlwWUMu5bSbbbbT9r+1bbT9pea/sA2wtt99rus73G9uyi/8u2r7e9qfg6doxhl0i6fbJ/GKAZdPf2a/m6PvUPDikk9Q8Oafm6PnX39lddGlBK2RVDh6TLI2Kj7TWSvi7pCkkLI+I527dJ+rKkG4v+b0fEfNsXF21njgxk+yhJR0vaUDP+/rZ7JH0gaWVEdO/VT4Vd/fdV0i/6xu+HvTZv25u62Tul/XL7fus/Jj0xp5qimtUvzhne3vxPkzvu4SdJX1w5uWNOI2UvPr8SERuL/e9JWijpZxHxXNF2q6TTavrfXrM9ZdRYF0haGxE7atrmRUSXpAsl3Wj7k6MLsL3Udo/tnoGBgZJlA4333o6dE2oHmk3ZFUNMcNzYzb40HAx/kzpHbC+2L9m+X8PXH14c1We1pNWS1NXVNdF6wKunhvm7lRvUPzi0S3tba4s2XnZ6BRU1sVUPD28vW1ptHUjKrhjm2R555b9E0r2S2muuH1wk6YGa/ufXbB8eabTdIWnOqLY5Ndcn5kpaIOnpCf4cQNNYtqhDLbNmpLaWWTO0bFFHRRUBE1N2xfCMpEtsr5L0vKSvSXpE0p22Z0p6TNJ3avrPtv2ohoNnSU37Eknfj4jaV/zHS1ple2fRf2VEEAyYskbuPuKuJExVZYNhZ0RcOartPuVbTmvdFBHXjG6MiBVjtD0k6aSSdQBTwuLONoIAUxaffAYAJOOuGCLiZUknlh0wItr3oh4AQMVYMQAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACAhGAAACcEAAEgIBgBAQjAAABKCAQCQEAwAgIRgAAAkBAMAICEYAAAJwQAASAgGAEBCMAAAEoIBAJAQDACAhGAAACQEAwAgIRgAAEnDgsH2abYft/2B7S+NOrbD9hPF1/pG1QQA2NXMBj7XNkmXSvrGGMeGIuLkBtaCOuju7dcNd2/R9sEhHdnaomWLOrS4s63qsgBM0LgrBtvttp+1favtJ22vtX2A7YW2e2332V5je3bR/2Xb19veVHwdK0kR8XJEPClpZ51/JlSgu7dfy9f1qX9wSCGpf3BIy9f1qbu3v+rSAExQ2RVDh6TLI2Kj7TWSvi7pCkkLI+I527dJ+rKkG4v+b0fEfNsXF21njjP+/rZ7JH0gaWVEdE/4Jynpmh8+pae3v12v4aet3m2Dem9Hzvyh93fo79c+qds3bauoqn3TCUcerH8469NVl4F9WNlrDK9ExMZi/3uSFkr6WUQ8V7TdKum0mv6312xPKTH+vIjoknShpBttf3J0B9tLbffY7hkYGChZNhpldCiM1w6geZVdMcQEx43d7I/dOWJ7sX3J9v2SOiW9OKrPakmrJamrq2ui9XyIV1r1sWDlBvUPDu3S3tbaojuuKPPaAECzKLtimGd75Ld7iaR7JbWPXD+QdJGkB2r6n1+zfXhPA9ueU3N9Yq6kBZKeLlkXmsSyRR1qmTUjtbXMmqFlizoqqgjAR1V2xfCMpEtsr5L0vKSvSXpE0p22Z0p6TNJ3avrPtv2ohoNniSTZ/kNJd0maI+ks29dExKclHS9ple2dRf+VEUEwTDEjdx9xVxIw9ZUNhp0RceWotvs0/JbPWG6KiGtqGyLiMUmfGN0xIh6SdFLJOtDEFne2EQTAPoBPPgMAknFXDBHxsqQTyw4YEe17UQ8AoGKsGAAACcEAAEgIBgBAQjAAABJHfOQPEVfG9oCkrZM87FxJv5zkMfdFzFM5zFN5zFU5kzFPR0XEoeN1mpLBUA+2e4r/rwl7wDyVwzyVx1yV08h54q0kAEBCMAAAEoLht1ZXXcAUwTyVwzyVx1yV07B54hoDACBhxQAASKZtMNj+C9tP2d5pe7dX+ou/Yd1n+4niz49OKxOYpzNsb7H9gu2rGlljM7B9iO17bD9fbOfspt+O4lx6wvb6RtdZlfHOD9uzbd9RHH/Udnvjq6xeiXm61PZAzTn01/WoY9oGg6SfSjpP0oMl+n4+Ik6eprfUjTtPtmdIuknSFyWdIGmJ7RMaU17TuErSfRHxKQ3/l/S7C8eh4lw6OSLOblx51Sl5flwu6c2IOFbStyVd39gqqzeB36M7as6h79ajlmkbDBHxTERsqbqOZldynuZLeiEiXoqI9yR9X9I59a+uqZyj4b99rmK7uMJamk2Z86N2/tZKWmjbDayxGTTN79G0DYYJCEn/a3uz7aVVF9Ok2iS9UvP9q0XbdPJ7EfFzSSq2h+2m3/62e2w/Ynu6hEeZ8+PDPhHxgaS3JH28IdU1j7K/R39u+0nba23/fj0KKfsX3KYk2/dKOnyMQ1dHxA9KDrMgIrbbPkzSPbafjYgybz9NGZMwT2O9stvnbnfb0zxNYJh5xfl0jKQNtvsi4sXJqbBplTk/psU5NI4yc/BDSbdHxLu2r9TwKuv0yS5knw6GiPjCJIyxvdi+bvsuDS/39qlgmIR5elVS7SuXT0javpdjNp09zZPt12wfERE/t32EpNd3M8bI+fSS7fs1/Odx9/VgKHN+jPR5tfg78r8r6Y3GlNc0xp2niPhVzbf/qjpdi+GtpD2wfaDtg0b2Jf2phi/GIntM0qdsH217P0kXSJo2d9wU1ku6pNi/RNIuKy3bc2zPLvbnSlog6emGVVidMudH7fx9SdKGmH4fshp3nooXHSPOlvRMXSqJiGn5JelcDSf0u5Jek3R30X6kpP8q9o+R9JPi6ykNv7VSee3NNk/F938m6TkNv/qdjvP0cQ3fjfR8sT2kaO+S9N1i/1RJfcX51Cfp8qrrbuD87HJ+SPpHSWcX+/tLulPSC5I2STqm6pqbdJ6uK/4t+omk/5N0XD3q4JPPAICEt5IAAAnBAABICAYAQEIwAAASggEAkBAMAICEYAAAJAQDACD5f0BsJFew6IE6AAAAAElFTkSuQmCC\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"edf = pd.concat([lmod.params, lmod.conf_int()],axis=1).iloc[1:,]\n",
"edf.columns = ['estimate','lb','ub']\n",
"npreds = edf.shape[0]\n",
"fig, ax = plt.subplots()\n",
"ax.scatter(edf.estimate,np.arange(npreds))\n",
"for i in range(npreds):\n",
" ax.plot([edf.lb[i], edf.ub[i]], [i, i])\n",
"ax.set_yticks(np.arange(npreds))\n",
"ax.set_yticklabels(edf.index)\n",
"ax.axvline(0)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Do Gelman's rescaling:"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | sr | R-squared: | 0.325 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.281 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 7.374 | \n",
"
\n",
"\n",
" Date: | Tue, 25 Sep 2018 | Prob (F-statistic): | 0.000391 | \n",
"
\n",
"\n",
" Time: | 15:57:32 | Log-Likelihood: | -135.61 | \n",
"
\n",
"\n",
" No. Observations: | 50 | AIC: | 279.2 | \n",
"
\n",
"\n",
" Df Residuals: | 46 | BIC: | 286.9 | \n",
"
\n",
"\n",
" Df Model: | 3 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 6.8176 | 1.011 | 6.746 | 0.000 | 4.783 | 8.852 | \n",
"
\n",
"\n",
" age | 5.2841 | 1.585 | 3.334 | 0.002 | 2.094 | 8.474 | \n",
"
\n",
"\n",
" dpis | -1.5485 | 1.593 | -0.972 | 0.336 | -4.755 | 1.658 | \n",
"
\n",
"\n",
" ddpis | 2.4433 | 1.097 | 2.227 | 0.031 | 0.235 | 4.651 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 2.962 | Durbin-Watson: | 2.035 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.227 | Jarque-Bera (JB): | 1.987 | \n",
"
\n",
"\n",
" Skew: | 0.433 | Prob(JB): | 0.370 | \n",
"
\n",
"\n",
" Kurtosis: | 3.451 | Cond. No. | 4.92 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: sr R-squared: 0.325\n",
"Model: OLS Adj. R-squared: 0.281\n",
"Method: Least Squares F-statistic: 7.374\n",
"Date: Tue, 25 Sep 2018 Prob (F-statistic): 0.000391\n",
"Time: 15:57:32 Log-Likelihood: -135.61\n",
"No. Observations: 50 AIC: 279.2\n",
"Df Residuals: 46 BIC: 286.9\n",
"Df Model: 3 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 6.8176 1.011 6.746 0.000 4.783 8.852\n",
"age 5.2841 1.585 3.334 0.002 2.094 8.474\n",
"dpis -1.5485 1.593 -0.972 0.336 -4.755 1.658\n",
"ddpis 2.4433 1.097 2.227 0.031 0.235 4.651\n",
"==============================================================================\n",
"Omnibus: 2.962 Durbin-Watson: 2.035\n",
"Prob(Omnibus): 0.227 Jarque-Bera (JB): 1.987\n",
"Skew: 0.433 Prob(JB): 0.370\n",
"Kurtosis: 3.451 Cond. No. 4.92\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"savings['age'] = np.where(savings.pop15 > 35, 0, 1)\n",
"savings['dpis'] = sp.stats.zscore(savings.dpi)/2\n",
"savings['ddpis'] = sp.stats.zscore(savings.ddpi)/2\n",
"smf.ols(formula='sr ~ age + dpis + ddpis', data=savings).fit().summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Collinearity\n",
"\n",
"Read in the data:"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Age | \n",
" Weight | \n",
" HtShoes | \n",
" Ht | \n",
" Seated | \n",
" Arm | \n",
" Thigh | \n",
" Leg | \n",
" hipcenter | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 46 | \n",
" 180 | \n",
" 187.2 | \n",
" 184.9 | \n",
" 95.2 | \n",
" 36.1 | \n",
" 45.3 | \n",
" 41.3 | \n",
" -206.300 | \n",
"
\n",
" \n",
" 1 | \n",
" 31 | \n",
" 175 | \n",
" 167.5 | \n",
" 165.5 | \n",
" 83.8 | \n",
" 32.9 | \n",
" 36.5 | \n",
" 35.9 | \n",
" -178.210 | \n",
"
\n",
" \n",
" 2 | \n",
" 23 | \n",
" 100 | \n",
" 153.6 | \n",
" 152.2 | \n",
" 82.9 | \n",
" 26.0 | \n",
" 36.6 | \n",
" 31.0 | \n",
" -71.673 | \n",
"
\n",
" \n",
" 3 | \n",
" 19 | \n",
" 185 | \n",
" 190.3 | \n",
" 187.4 | \n",
" 97.3 | \n",
" 37.4 | \n",
" 44.1 | \n",
" 41.0 | \n",
" -257.720 | \n",
"
\n",
" \n",
" 4 | \n",
" 23 | \n",
" 159 | \n",
" 178.0 | \n",
" 174.1 | \n",
" 93.9 | \n",
" 29.5 | \n",
" 40.1 | \n",
" 36.9 | \n",
" -173.230 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Age Weight HtShoes Ht Seated Arm Thigh Leg hipcenter\n",
"0 46 180 187.2 184.9 95.2 36.1 45.3 41.3 -206.300\n",
"1 31 175 167.5 165.5 83.8 32.9 36.5 35.9 -178.210\n",
"2 23 100 153.6 152.2 82.9 26.0 36.6 31.0 -71.673\n",
"3 19 185 190.3 187.4 97.3 37.4 44.1 41.0 -257.720\n",
"4 23 159 178.0 174.1 93.9 29.5 40.1 36.9 -173.230"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"seatpos = pd.read_csv(\"data/seatpos.csv\")\n",
"seatpos.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Fit a model with all the predictors:"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | hipcenter | R-squared: | 0.687 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.600 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 7.940 | \n",
"
\n",
"\n",
" Date: | Tue, 25 Sep 2018 | Prob (F-statistic): | 1.31e-05 | \n",
"
\n",
"\n",
" Time: | 15:57:32 | Log-Likelihood: | -186.73 | \n",
"
\n",
"\n",
" No. Observations: | 38 | AIC: | 391.5 | \n",
"
\n",
"\n",
" Df Residuals: | 29 | BIC: | 406.2 | \n",
"
\n",
"\n",
" Df Model: | 8 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 436.4321 | 166.572 | 2.620 | 0.014 | 95.755 | 777.109 | \n",
"
\n",
"\n",
" Age | 0.7757 | 0.570 | 1.360 | 0.184 | -0.391 | 1.942 | \n",
"
\n",
"\n",
" Weight | 0.0263 | 0.331 | 0.080 | 0.937 | -0.651 | 0.703 | \n",
"
\n",
"\n",
" HtShoes | -2.6924 | 9.753 | -0.276 | 0.784 | -22.640 | 17.255 | \n",
"
\n",
"\n",
" Ht | 0.6013 | 10.130 | 0.059 | 0.953 | -20.117 | 21.319 | \n",
"
\n",
"\n",
" Seated | 0.5338 | 3.762 | 0.142 | 0.888 | -7.160 | 8.228 | \n",
"
\n",
"\n",
" Arm | -1.3281 | 3.900 | -0.341 | 0.736 | -9.305 | 6.649 | \n",
"
\n",
"\n",
" Thigh | -1.1431 | 2.660 | -0.430 | 0.671 | -6.583 | 4.297 | \n",
"
\n",
"\n",
" Leg | -6.4390 | 4.714 | -1.366 | 0.182 | -16.080 | 3.202 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.543 | Durbin-Watson: | 1.769 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.762 | Jarque-Bera (JB): | 0.664 | \n",
"
\n",
"\n",
" Skew: | 0.157 | Prob(JB): | 0.717 | \n",
"
\n",
"\n",
" Kurtosis: | 2.434 | Cond. No. | 8.44e+03 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 8.44e+03. This might indicate that there are
strong multicollinearity or other numerical problems."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: hipcenter R-squared: 0.687\n",
"Model: OLS Adj. R-squared: 0.600\n",
"Method: Least Squares F-statistic: 7.940\n",
"Date: Tue, 25 Sep 2018 Prob (F-statistic): 1.31e-05\n",
"Time: 15:57:32 Log-Likelihood: -186.73\n",
"No. Observations: 38 AIC: 391.5\n",
"Df Residuals: 29 BIC: 406.2\n",
"Df Model: 8 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 436.4321 166.572 2.620 0.014 95.755 777.109\n",
"Age 0.7757 0.570 1.360 0.184 -0.391 1.942\n",
"Weight 0.0263 0.331 0.080 0.937 -0.651 0.703\n",
"HtShoes -2.6924 9.753 -0.276 0.784 -22.640 17.255\n",
"Ht 0.6013 10.130 0.059 0.953 -20.117 21.319\n",
"Seated 0.5338 3.762 0.142 0.888 -7.160 8.228\n",
"Arm -1.3281 3.900 -0.341 0.736 -9.305 6.649\n",
"Thigh -1.1431 2.660 -0.430 0.671 -6.583 4.297\n",
"Leg -6.4390 4.714 -1.366 0.182 -16.080 3.202\n",
"==============================================================================\n",
"Omnibus: 0.543 Durbin-Watson: 1.769\n",
"Prob(Omnibus): 0.762 Jarque-Bera (JB): 0.664\n",
"Skew: 0.157 Prob(JB): 0.717\n",
"Kurtosis: 2.434 Cond. No. 8.44e+03\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 8.44e+03. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lmod = smf.ols(formula = 'hipcenter ~ Age+Weight+HtShoes+Ht+Seated+Arm+Thigh+Leg', data=seatpos).fit()\n",
"lmod.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Look at the correlations of the predictors:"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Age | \n",
" Weight | \n",
" HtShoes | \n",
" Ht | \n",
" Seated | \n",
" Arm | \n",
" Thigh | \n",
" Leg | \n",
"
\n",
" \n",
" \n",
" \n",
" Age | \n",
" 1.000 | \n",
" 0.081 | \n",
" -0.079 | \n",
" -0.090 | \n",
" -0.170 | \n",
" 0.360 | \n",
" 0.091 | \n",
" -0.042 | \n",
"
\n",
" \n",
" Weight | \n",
" 0.081 | \n",
" 1.000 | \n",
" 0.828 | \n",
" 0.829 | \n",
" 0.776 | \n",
" 0.698 | \n",
" 0.573 | \n",
" 0.784 | \n",
"
\n",
" \n",
" HtShoes | \n",
" -0.079 | \n",
" 0.828 | \n",
" 1.000 | \n",
" 0.998 | \n",
" 0.930 | \n",
" 0.752 | \n",
" 0.725 | \n",
" 0.908 | \n",
"
\n",
" \n",
" Ht | \n",
" -0.090 | \n",
" 0.829 | \n",
" 0.998 | \n",
" 1.000 | \n",
" 0.928 | \n",
" 0.752 | \n",
" 0.735 | \n",
" 0.910 | \n",
"
\n",
" \n",
" Seated | \n",
" -0.170 | \n",
" 0.776 | \n",
" 0.930 | \n",
" 0.928 | \n",
" 1.000 | \n",
" 0.625 | \n",
" 0.607 | \n",
" 0.812 | \n",
"
\n",
" \n",
" Arm | \n",
" 0.360 | \n",
" 0.698 | \n",
" 0.752 | \n",
" 0.752 | \n",
" 0.625 | \n",
" 1.000 | \n",
" 0.671 | \n",
" 0.754 | \n",
"
\n",
" \n",
" Thigh | \n",
" 0.091 | \n",
" 0.573 | \n",
" 0.725 | \n",
" 0.735 | \n",
" 0.607 | \n",
" 0.671 | \n",
" 1.000 | \n",
" 0.650 | \n",
"
\n",
" \n",
" Leg | \n",
" -0.042 | \n",
" 0.784 | \n",
" 0.908 | \n",
" 0.910 | \n",
" 0.812 | \n",
" 0.754 | \n",
" 0.650 | \n",
" 1.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Age Weight HtShoes Ht Seated Arm Thigh Leg\n",
"Age 1.000 0.081 -0.079 -0.090 -0.170 0.360 0.091 -0.042\n",
"Weight 0.081 1.000 0.828 0.829 0.776 0.698 0.573 0.784\n",
"HtShoes -0.079 0.828 1.000 0.998 0.930 0.752 0.725 0.908\n",
"Ht -0.090 0.829 0.998 1.000 0.928 0.752 0.735 0.910\n",
"Seated -0.170 0.776 0.930 0.928 1.000 0.625 0.607 0.812\n",
"Arm 0.360 0.698 0.752 0.752 0.625 1.000 0.671 0.754\n",
"Thigh 0.091 0.573 0.725 0.735 0.607 0.671 1.000 0.650\n",
"Leg -0.042 0.784 0.908 0.910 0.812 0.754 0.650 1.000"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"seatpos.iloc[:,:-1].corr().round(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Do the eigendecomposition. Gives similar but not identical results to R calculation."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([3.6536714e+06, 2.1479480e+04, 9.0432253e+03, 2.9895260e+02,\n",
" 1.4839482e+02, 7.2982092e+00, 8.1173974e+01, 5.3361943e+01])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X = seatpos.iloc[:,:-1]\n",
"XTX = np.dot(X.T,X)\n",
"evals, evecs = np.linalg.eig(XTX)\n",
"evals"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Calculate the condition numbers;"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1.0000000e+00, 1.7010055e+02, 4.0402304e+02, 1.2221574e+04,\n",
" 2.4621286e+04, 5.0062574e+05, 4.5010379e+04, 6.8469608e+04])"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"evals[0]/evals"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compute the variance inflation factors:"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.49948233386392404"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from patsy import dmatrix\n",
"X = dmatrix(\"Age+Weight+HtShoes+Ht+Seated+Arm+Thigh+Leg\", data=seatpos, return_type='dataframe')\n",
"lmod = sm.OLS(X['Age'],X.drop('Age',axis=1)).fit()\n",
"lmod.rsquared"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.9979314770642473"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"1/(1-lmod.rsquared)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Get them all:"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Intercept 741.029693\n",
"Age 1.997931\n",
"Weight 3.647030\n",
"HtShoes 307.429378\n",
"Ht 333.137832\n",
"Seated 8.951054\n",
"Arm 4.496368\n",
"Thigh 2.762886\n",
"Leg 6.694291\n",
"dtype: float64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from statsmodels.stats.outliers_influence import variance_inflation_factor\n",
"vif = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]\n",
"pd.Series(vif, X.columns)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Change in coefficients due to perturbing the response"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Intercept | \n",
" Age | \n",
" Weight | \n",
" HtShoes | \n",
" Ht | \n",
" Seated | \n",
" Arm | \n",
" Thigh | \n",
" Leg | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 436.432128 | \n",
" 0.775716 | \n",
" 0.026313 | \n",
" -2.692408 | \n",
" 0.601345 | \n",
" 0.533752 | \n",
" -1.328069 | \n",
" -1.143119 | \n",
" -6.439046 | \n",
"
\n",
" \n",
" 1 | \n",
" 389.146330 | \n",
" 0.751927 | \n",
" -0.001767 | \n",
" -2.779268 | \n",
" 1.629064 | \n",
" -0.167238 | \n",
" -0.757036 | \n",
" -1.740402 | \n",
" -7.524041 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Intercept Age Weight HtShoes Ht Seated Arm \\\n",
"0 436.432128 0.775716 0.026313 -2.692408 0.601345 0.533752 -1.328069 \n",
"1 389.146330 0.751927 -0.001767 -2.779268 1.629064 -0.167238 -0.757036 \n",
"\n",
" Thigh Leg \n",
"0 -1.143119 -6.439046 \n",
"1 -1.740402 -7.524041 "
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"seatpos['hiperb'] = seatpos.hipcenter+np.random.normal(scale=10,size=38)\n",
"lmod = smf.ols(formula = 'hipcenter ~ Age+Weight+HtShoes+Ht+Seated+Arm+Thigh+Leg', data=seatpos).fit()\n",
"lmodp = smf.ols(formula = 'hiperb ~ Age+Weight+HtShoes+Ht+Seated+Arm+Thigh+Leg', data=seatpos).fit()\n",
"pd.DataFrame([lmod.params, lmodp.params])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Change in R-squared"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(0.6865534760253379, 0.6520350104273814)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"lmod.rsquared, lmodp.rsquared"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Correlations of the length variables"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" HtShoes | \n",
" Ht | \n",
" Seated | \n",
" Arm | \n",
" Thigh | \n",
" Leg | \n",
"
\n",
" \n",
" \n",
" \n",
" HtShoes | \n",
" 1.000 | \n",
" 0.998 | \n",
" 0.930 | \n",
" 0.722 | \n",
" 0.710 | \n",
" 0.896 | \n",
"
\n",
" \n",
" Ht | \n",
" 0.998 | \n",
" 1.000 | \n",
" 0.929 | \n",
" 0.724 | \n",
" 0.720 | \n",
" 0.898 | \n",
"
\n",
" \n",
" Seated | \n",
" 0.930 | \n",
" 0.929 | \n",
" 1.000 | \n",
" 0.603 | \n",
" 0.576 | \n",
" 0.803 | \n",
"
\n",
" \n",
" Arm | \n",
" 0.722 | \n",
" 0.724 | \n",
" 0.603 | \n",
" 1.000 | \n",
" 0.670 | \n",
" 0.723 | \n",
"
\n",
" \n",
" Thigh | \n",
" 0.710 | \n",
" 0.720 | \n",
" 0.576 | \n",
" 0.670 | \n",
" 1.000 | \n",
" 0.626 | \n",
"
\n",
" \n",
" Leg | \n",
" 0.896 | \n",
" 0.898 | \n",
" 0.803 | \n",
" 0.723 | \n",
" 0.626 | \n",
" 1.000 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" HtShoes Ht Seated Arm Thigh Leg\n",
"HtShoes 1.000 0.998 0.930 0.722 0.710 0.896\n",
"Ht 0.998 1.000 0.929 0.724 0.720 0.898\n",
"Seated 0.930 0.929 1.000 0.603 0.576 0.803\n",
"Arm 0.722 0.724 0.603 1.000 0.670 0.723\n",
"Thigh 0.710 0.720 0.576 0.670 1.000 0.626\n",
"Leg 0.896 0.898 0.803 0.723 0.626 1.000"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame.corr(X.iloc[3:,3:]).round(3)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Recommended smaller model:"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | hipcenter | R-squared: | 0.656 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.626 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 21.63 | \n",
"
\n",
"\n",
" Date: | Tue, 25 Sep 2018 | Prob (F-statistic): | 5.13e-08 | \n",
"
\n",
"\n",
" Time: | 15:57:32 | Log-Likelihood: | -188.49 | \n",
"
\n",
"\n",
" No. Observations: | 38 | AIC: | 385.0 | \n",
"
\n",
"\n",
" Df Residuals: | 34 | BIC: | 391.5 | \n",
"
\n",
"\n",
" Df Model: | 3 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 528.2977 | 135.313 | 3.904 | 0.000 | 253.309 | 803.287 | \n",
"
\n",
"\n",
" Age | 0.5195 | 0.408 | 1.273 | 0.212 | -0.310 | 1.349 | \n",
"
\n",
"\n",
" Weight | 0.0043 | 0.312 | 0.014 | 0.989 | -0.629 | 0.638 | \n",
"
\n",
"\n",
" Ht | -4.2119 | 0.999 | -4.216 | 0.000 | -6.242 | -2.182 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 0.932 | Durbin-Watson: | 1.742 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.628 | Jarque-Bera (JB): | 0.868 | \n",
"
\n",
"\n",
" Skew: | -0.341 | Prob(JB): | 0.648 | \n",
"
\n",
"\n",
" Kurtosis: | 2.713 | Cond. No. | 5.36e+03 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.36e+03. This might indicate that there are
strong multicollinearity or other numerical problems."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: hipcenter R-squared: 0.656\n",
"Model: OLS Adj. R-squared: 0.626\n",
"Method: Least Squares F-statistic: 21.63\n",
"Date: Tue, 25 Sep 2018 Prob (F-statistic): 5.13e-08\n",
"Time: 15:57:32 Log-Likelihood: -188.49\n",
"No. Observations: 38 AIC: 385.0\n",
"Df Residuals: 34 BIC: 391.5\n",
"Df Model: 3 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 528.2977 135.313 3.904 0.000 253.309 803.287\n",
"Age 0.5195 0.408 1.273 0.212 -0.310 1.349\n",
"Weight 0.0043 0.312 0.014 0.989 -0.629 0.638\n",
"Ht -4.2119 0.999 -4.216 0.000 -6.242 -2.182\n",
"==============================================================================\n",
"Omnibus: 0.932 Durbin-Watson: 1.742\n",
"Prob(Omnibus): 0.628 Jarque-Bera (JB): 0.868\n",
"Skew: -0.341 Prob(JB): 0.648\n",
"Kurtosis: 2.713 Cond. No. 5.36e+03\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"[2] The condition number is large, 5.36e+03. This might indicate that there are\n",
"strong multicollinearity or other numerical problems.\n",
"\"\"\""
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"smf.ols(formula = 'hipcenter ~ Age+Weight+Ht', data=seatpos).fit().summary()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"application/json": {
"Software versions": [
{
"module": "Python",
"version": "3.7.0 64bit [Clang 4.0.1 (tags/RELEASE_401/final)]"
},
{
"module": "IPython",
"version": "6.5.0"
},
{
"module": "OS",
"version": "Darwin 17.7.0 x86_64 i386 64bit"
},
{
"module": "pandas",
"version": "0.23.4"
},
{
"module": "numpy",
"version": "1.15.1"
},
{
"module": "matplotlib",
"version": "2.2.3"
},
{
"module": "seaborn",
"version": "0.9.0"
},
{
"module": "scipy",
"version": "1.1.0"
},
{
"module": "patsy",
"version": "0.5.0"
},
{
"module": "statsmodels",
"version": "0.9.0"
}
]
},
"text/html": [
"Software | Version |
---|
Python | 3.7.0 64bit [Clang 4.0.1 (tags/RELEASE_401/final)] |
IPython | 6.5.0 |
OS | Darwin 17.7.0 x86_64 i386 64bit |
pandas | 0.23.4 |
numpy | 1.15.1 |
matplotlib | 2.2.3 |
seaborn | 0.9.0 |
scipy | 1.1.0 |
patsy | 0.5.0 |
statsmodels | 0.9.0 |
Tue Sep 25 15:57:32 2018 BST |
"
],
"text/latex": [
"\\begin{tabular}{|l|l|}\\hline\n",
"{\\bf Software} & {\\bf Version} \\\\ \\hline\\hline\n",
"Python & 3.7.0 64bit [Clang 4.0.1 (tags/RELEASE\\_401/final)] \\\\ \\hline\n",
"IPython & 6.5.0 \\\\ \\hline\n",
"OS & Darwin 17.7.0 x86\\_64 i386 64bit \\\\ \\hline\n",
"pandas & 0.23.4 \\\\ \\hline\n",
"numpy & 1.15.1 \\\\ \\hline\n",
"matplotlib & 2.2.3 \\\\ \\hline\n",
"seaborn & 0.9.0 \\\\ \\hline\n",
"scipy & 1.1.0 \\\\ \\hline\n",
"patsy & 0.5.0 \\\\ \\hline\n",
"statsmodels & 0.9.0 \\\\ \\hline\n",
"\\hline \\multicolumn{2}{|l|}{Tue Sep 25 15:57:32 2018 BST} \\\\ \\hline\n",
"\\end{tabular}\n"
],
"text/plain": [
"Software versions\n",
"Python 3.7.0 64bit [Clang 4.0.1 (tags/RELEASE_401/final)]\n",
"IPython 6.5.0\n",
"OS Darwin 17.7.0 x86_64 i386 64bit\n",
"pandas 0.23.4\n",
"numpy 1.15.1\n",
"matplotlib 2.2.3\n",
"seaborn 0.9.0\n",
"scipy 1.1.0\n",
"patsy 0.5.0\n",
"statsmodels 0.9.0\n",
"Tue Sep 25 15:57:32 2018 BST"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%load_ext version_information\n",
"%version_information pandas, numpy, matplotlib, seaborn, scipy, patsy, statsmodels"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}