Commit 7da51ea7 authored by vincentvigon's avatar vincentvigon
Browse files

grec

parent fe49e909
No preview for this file type
No preview for this file type
......@@ -26,7 +26,7 @@
"***Mathématiquement 1:*** On imagine une fonction $L(x,y,w)$ et l'on décrète que la proba d'observer $y_i$ en présence de $x_i$ est: \n",
"\n",
"$$\n",
"\t L( x_i , y_i , w ) \\qquad \\qquad \\mathrm{ pour un certain } w\n",
"\t L( x_i , y_i , w ) \\qquad \\qquad \\text{ pour un certain } w\n",
"$$\n",
"\n",
"En supposant les observations indépendantes, la proba d'observer simultanément $y[0],y[1],y[2]...$ est donc de\n",
......@@ -62,7 +62,7 @@
"$$\n",
"et le paramètre qui rend le plus vraissemblable l'ensemble de nos données est toujours: \n",
"$$\n",
"\t\t\\hat w =\\hbox{argmax}_w \\prod_i L( x_i , y_i , w ) \n",
"\t\t\\hat w =\\mathrm{argmax}_w \\prod_i L( x_i , y_i , w ) \n",
"$$\n",
"La seule différence c'est que la fonction $L$ peut dépasser 1. \n",
"\n",
......@@ -76,15 +76,12 @@
"* le $y'$ qui maximise la vraissemblance $y\\to L(x',y,\\hat w)$ \n",
"* l'espérance de la v.a $Y$ dont la densité est $y\\to L(x',y,\\hat w)$ \n",
"\n",
"On choisi en général la seconde estimation (qui coincide parfois avec la première). \n",
"\n",
"\n",
"\n"
"On choisi en général la seconde estimation (qui coincide parfois avec la première). \n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -115,7 +112,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 5,
"metadata": {
"scrolled": false
},
......@@ -132,11 +129,11 @@
}
],
"source": [
"x0=np.loadtxt(\"data/data0_x.csv\")\n",
"y0=np.loadtxt(\"data/data0_y.csv\")\n",
"x0=np.loadtxt(\"data/dataGLM/data0_x.csv\")\n",
"y0=np.loadtxt(\"data/dataGLM/data0_y.csv\")\n",
"\n",
"x1=np.loadtxt(\"data/data1_x.csv\")\n",
"y1=np.loadtxt(\"data/data1_y.csv\")\n",
"x1=np.loadtxt(\"data/dataGLM/data1_x.csv\")\n",
"y1=np.loadtxt(\"data/dataGLM/data1_y.csv\")\n",
"\n",
"plt.figure(figsize=(12,6))\n",
"plt.subplot(1,2,1)\n",
......@@ -182,7 +179,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 6,
"metadata": {},
"outputs": [
{
......@@ -212,7 +209,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 7,
"metadata": {},
"outputs": [
{
......@@ -250,7 +247,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 8,
"metadata": {
"scrolled": true
},
......@@ -267,7 +264,7 @@
"Link Function: identity Scale: 90.926\n",
"Method: IRLS Log-Likelihood: -3673.0\n",
"Date: Mon, 18 Jun 2018 Deviance: 90745.\n",
"Time: 15:47:54 Pearson chi2: 9.07e+04\n",
"Time: 16:01:57 Pearson chi2: 9.07e+04\n",
"No. Iterations: 3 Covariance Type: nonrobust\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
......@@ -311,7 +308,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"metadata": {},
"outputs": [
{
......@@ -347,7 +344,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 10,
"metadata": {},
"outputs": [
{
......@@ -401,7 +398,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 11,
"metadata": {},
"outputs": [
{
......@@ -416,8 +413,8 @@
}
],
"source": [
"x2=np.loadtxt(\"data/data2_x.csv\")\n",
"y2=np.loadtxt(\"data/data2_y.csv\")\n",
"x2=np.loadtxt(\"data/dataGLM/data2_x.csv\")\n",
"y2=np.loadtxt(\"data/dataGLM/data2_y.csv\")\n",
"plt.plot(x2,y2,'.');"
]
},
......@@ -444,7 +441,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 12,
"metadata": {
"scrolled": true
},
......@@ -504,7 +501,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 14,
"metadata": {},
"outputs": [
{
......@@ -519,8 +516,8 @@
}
],
"source": [
"x=np.loadtxt(\"data/accident_x.csv\")\n",
"y=np.loadtxt(\"data/accident_y.csv\")\n",
"x=np.loadtxt(\"data/dataGLM/accident_x.csv\")\n",
"y=np.loadtxt(\"data/dataGLM/accident_y.csv\")\n",
"plt.plot(x,y,'.');"
]
},
......@@ -539,7 +536,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 15,
"metadata": {},
"outputs": [
{
......@@ -560,7 +557,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"metadata": {
"scrolled": true
},
......
This source diff could not be displayed because it is too large. You can view the blob instead.
......@@ -26,7 +26,7 @@
"***Mathématiquement 1:*** On imagine une fonction $L(x,y,w)$ et l'on décrète que la proba d'observer $y_i$ en présence de $x_i$ est: \n",
"\n",
"$$\n",
"\t L( x_i , y_i , w ) \\qquad \\qquad \\mathrm{ pour un certain } w\n",
"\t L( x_i , y_i , w ) \\qquad \\qquad \\text{ pour un certain } w\n",
"$$\n",
"\n",
"En supposant les observations indépendantes, la proba d'observer simultanément $y[0],y[1],y[2]...$ est donc de\n",
......@@ -62,7 +62,7 @@
"$$\n",
"et le paramètre qui rend le plus vraissemblable l'ensemble de nos données est toujours: \n",
"$$\n",
"\t\t\\hat w =\\hbox{argmax}_w \\prod_i L( x_i , y_i , w ) \n",
"\t\t\\hat w =\\mathrm{argmax}_w \\prod_i L( x_i , y_i , w ) \n",
"$$\n",
"La seule différence c'est que la fonction $L$ peut dépasser 1. \n",
"\n",
......@@ -76,15 +76,12 @@
"* le $y'$ qui maximise la vraissemblance $y\\to L(x',y,\\hat w)$ \n",
"* l'espérance de la v.a $Y$ dont la densité est $y\\to L(x',y,\\hat w)$ \n",
"\n",
"On choisi en général la seconde estimation (qui coincide parfois avec la première). \n",
"\n",
"\n",
"\n"
"On choisi en général la seconde estimation (qui coincide parfois avec la première). \n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -115,7 +112,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 5,
"metadata": {
"scrolled": false
},
......@@ -132,11 +129,11 @@
}
],
"source": [
"x0=np.loadtxt(\"data/data0_x.csv\")\n",
"y0=np.loadtxt(\"data/data0_y.csv\")\n",
"x0=np.loadtxt(\"data/dataGLM/data0_x.csv\")\n",
"y0=np.loadtxt(\"data/dataGLM/data0_y.csv\")\n",
"\n",
"x1=np.loadtxt(\"data/data1_x.csv\")\n",
"y1=np.loadtxt(\"data/data1_y.csv\")\n",
"x1=np.loadtxt(\"data/dataGLM/data1_x.csv\")\n",
"y1=np.loadtxt(\"data/dataGLM/data1_y.csv\")\n",
"\n",
"plt.figure(figsize=(12,6))\n",
"plt.subplot(1,2,1)\n",
......@@ -182,7 +179,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 6,
"metadata": {},
"outputs": [
{
......@@ -212,7 +209,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 7,
"metadata": {},
"outputs": [
{
......@@ -250,7 +247,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 8,
"metadata": {
"scrolled": true
},
......@@ -267,7 +264,7 @@
"Link Function: identity Scale: 90.926\n",
"Method: IRLS Log-Likelihood: -3673.0\n",
"Date: Mon, 18 Jun 2018 Deviance: 90745.\n",
"Time: 15:47:54 Pearson chi2: 9.07e+04\n",
"Time: 16:01:57 Pearson chi2: 9.07e+04\n",
"No. Iterations: 3 Covariance Type: nonrobust\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
......@@ -311,7 +308,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"metadata": {},
"outputs": [
{
......@@ -347,7 +344,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 10,
"metadata": {},
"outputs": [
{
......@@ -401,7 +398,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 11,
"metadata": {},
"outputs": [
{
......@@ -416,8 +413,8 @@
}
],
"source": [
"x2=np.loadtxt(\"data/data2_x.csv\")\n",
"y2=np.loadtxt(\"data/data2_y.csv\")\n",
"x2=np.loadtxt(\"data/dataGLM/data2_x.csv\")\n",
"y2=np.loadtxt(\"data/dataGLM/data2_y.csv\")\n",
"plt.plot(x2,y2,'.');"
]
},
......@@ -444,7 +441,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 12,
"metadata": {
"scrolled": true
},
......@@ -504,7 +501,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 14,
"metadata": {},
"outputs": [
{
......@@ -519,8 +516,8 @@
}
],
"source": [
"x=np.loadtxt(\"data/accident_x.csv\")\n",
"y=np.loadtxt(\"data/accident_y.csv\")\n",
"x=np.loadtxt(\"data/dataGLM/accident_x.csv\")\n",
"y=np.loadtxt(\"data/dataGLM/accident_y.csv\")\n",
"plt.plot(x,y,'.');"
]
},
......@@ -539,7 +536,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 15,
"metadata": {},
"outputs": [
{
......@@ -560,7 +557,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"metadata": {
"scrolled": true
},
......@@ -615,10 +612,10 @@
}
],
"source": [
"xt=np.loadtxt(\"data/accident_exposure_x.csv\")\n",
"xt=np.loadtxt(\"data/dataGLM/accident_exposure_x.csv\")\n",
"x=xt[:,0] #indice fangio\n",
"t=xt[:,1] #exposition\n",
"y=np.loadtxt(\"data/accident_exposure_y.csv\")\n",
"y=np.loadtxt(\"data/dataGLM/accident_exposure_y.csv\")\n",
"plt.plot(t,y,'.');"
]
},
......@@ -733,7 +730,7 @@
},
{
"cell_type": "code",
"execution_count": 21,
"execution_count": 22,
"metadata": {},
"outputs": [
{
......@@ -759,8 +756,8 @@
}
],
"source": [
"x=np.loadtxt(\"data/bacteria_alone_x.csv\")\n",
"y=np.loadtxt(\"data/bacteria_alone_y.csv\")\n",
"x=np.loadtxt(\"data/dataGLM/bacteria_alone_x.csv\")\n",
"y=np.loadtxt(\"data/dataGLM/bacteria_alone_y.csv\")\n",
"\n",
"print(\"matrice des inputs (transposée)\")\n",
"print(x[:10].T)\n",
......@@ -792,7 +789,7 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 23,
"metadata": {},
"outputs": [
{
......@@ -819,7 +816,7 @@
" <th>Date:</th> <td>Mon, 18 Jun 2018</td> <th> Deviance: </th> <td> 341.63</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>15:01:05</td> <th> Pearson chi2: </th> <td> 889.</td> \n",
" <th>Time:</th> <td>16:02:48</td> <th> Pearson chi2: </th> <td> 889.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Iterations:</th> <td>8</td> <th> Covariance Type: </th> <td>nonrobust</td>\n",
......@@ -851,7 +848,7 @@
"Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -170.81\n",
"Date: Mon, 18 Jun 2018 Deviance: 341.63\n",
"Time: 15:01:05 Pearson chi2: 889.\n",
"Time: 16:02:48 Pearson chi2: 889.\n",
"No. Iterations: 8 Covariance Type: nonrobust\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
......@@ -863,7 +860,7 @@
"\"\"\""
]
},
"execution_count": 22,
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
......@@ -926,7 +923,7 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 25,
"metadata": {},
"outputs": [
{
......@@ -943,8 +940,8 @@
}
],
"source": [
"x=np.loadtxt(\"data/bacteria_grouped_x.csv\")\n",
"y=np.loadtxt(\"data/bacteria_grouped_y.csv\")\n",
"x=np.loadtxt(\"data/dataGLM/bacteria_grouped_x.csv\")\n",
"y=np.loadtxt(\"data/dataGLM/bacteria_grouped_y.csv\")\n",
"\n",
"print(\"matrice des inputs (transposée)\")\n",
"print(x[:10].T)\n",
......@@ -965,7 +962,7 @@
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 26,
"metadata": {},
"outputs": [
{
......@@ -992,7 +989,7 @@
" <th>Date:</th> <td>Mon, 18 Jun 2018</td> <th> Deviance: </th> <td> 528.92</td> \n",
"</tr>\n",
"<tr>\n",
" <th>Time:</th> <td>15:01:08</td> <th> Pearson chi2: </th> <td> 652.</td> \n",
" <th>Time:</th> <td>16:02:59</td> <th> Pearson chi2: </th> <td> 652.</td> \n",
"</tr>\n",
"<tr>\n",
" <th>No. Iterations:</th> <td>8</td> <th> Covariance Type: </th> <td>nonrobust</td>\n",
......@@ -1024,7 +1021,7 @@
"Link Function: logit Scale: 1.0000\n",
"Method: IRLS Log-Likelihood: -557.92\n",
"Date: Mon, 18 Jun 2018 Deviance: 528.92\n",
"Time: 15:01:08 Pearson chi2: 652.\n",
"Time: 16:02:59 Pearson chi2: 652.\n",
"No. Iterations: 8 Covariance Type: nonrobust\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
......@@ -1036,7 +1033,7 @@
"\"\"\""
]
},
"execution_count": 24,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
......@@ -1077,7 +1074,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 27,
"metadata": {},
"outputs": [
{
......@@ -1116,7 +1113,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 28,
"metadata": {},
"outputs": [
{
......@@ -1190,7 +1187,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
......@@ -1250,7 +1247,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 31,
"metadata": {},
"outputs": [
{
......@@ -1446,20 +1443,20 @@
"9 10 1.51755 13.00 3.60 1.36 72.99 0.57 8.40 0.0 0.11 1"
]
},
"execution_count": 28,
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"glass_data_headers = [\"Id\", \"RI\", \"Na\", \"Mg\", \"Al\", \"Si\", \"K\", \"Ca\", \"Ba\", \"Fe\", \"glass-type\"]\n",
"glass_data = pd.read_csv(\"data/glass.csv\", names=glass_data_headers)\n",
"glass_data = pd.read_csv(\"data/dataGLM/glass.csv\", names=glass_data_headers)\n",
"glass_data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 32,
"metadata": {},
"outputs": [
{
......@@ -1499,23 +1496,15 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"model_1 Accuracy : 0.9846153846153847\n",
"model_2 Accuracy : 0.8615384615384616\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/site-packages/sklearn/model_selection/_split.py:2026: FutureWarning: From version 0.21, test_size will always complement train_size unless both are specified.\n",
" FutureWarning)\n"
"model_1 Accuracy : 1.0\n",
"model_2 Accuracy : 0.7846153846153846\n"
]
}
],
......@@ -1528,7 +1517,7 @@
"x=glass_data[glass_data_headers[:-1]]\n",
"y=glass_data[glass_data_headers[-1]]\n",
"\n",
"train_x, test_x, train_y, test_y = train_test_split(x,y, train_size=0.7)\n",
"train_x, test_x, train_y, test_y = train_test_split(x,y, test_size=0.3)\n",
"\n",
"\n",
"# modèle multinomial\n",
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
,vigon,MacBook-Pro-de-irma.local,17.06.2018 21:17,file:///Users/vigon/Library/Application%20Support/OpenOffice/4;
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment