### grec

parent fe49e909
No preview for this file type
No preview for this file type
 ... ... @@ -26,7 +26,7 @@ "***Mathématiquement 1:*** On imagine une fonction $L(x,y,w)$ et l'on décrète que la proba d'observer $y_i$ en présence de $x_i$ est: \n", "\n", "$$\n", "\t L( x_i , y_i , w ) \\qquad \\qquad \\mathrm{ pour un certain } w\n", "\t L( x_i , y_i , w ) \\qquad \\qquad \\text{ pour un certain } w\n", "$$\n", "\n", "En supposant les observations indépendantes, la proba d'observer simultanément $y,y,y...$ est donc de\n", ... ... @@ -62,7 +62,7 @@ "$$\n", "et le paramètre qui rend le plus vraissemblable l'ensemble de nos données est toujours: \n", "$$\n", "\t\t\\hat w =\\hbox{argmax}_w \\prod_i L( x_i , y_i , w ) \n", "\t\t\\hat w =\\mathrm{argmax}_w \\prod_i L( x_i , y_i , w ) \n", "\n", "La seule différence c'est que la fonction $L$ peut dépasser 1. \n", "\n", ... ... @@ -76,15 +76,12 @@ "* le $y'$ qui maximise la vraissemblance $y\\to L(x',y,\\hat w)$ \n", "* l'espérance de la v.a $Y$ dont la densité est $y\\to L(x',y,\\hat w)$ \n", "\n", "On choisi en général la seconde estimation (qui coincide parfois avec la première). \n", "\n", "\n", "\n" "On choisi en général la seconde estimation (qui coincide parfois avec la première). \n" ] }, { "cell_type": "code", "execution_count": 33, "execution_count": 4, "metadata": {}, "outputs": [], "source": [ ... ... @@ -115,7 +112,7 @@ }, { "cell_type": "code", "execution_count": 34, "execution_count": 5, "metadata": { "scrolled": false }, ... ... @@ -132,11 +129,11 @@ } ], "source": [ "x0=np.loadtxt(\"data/data0_x.csv\")\n", "y0=np.loadtxt(\"data/data0_y.csv\")\n", "x0=np.loadtxt(\"data/dataGLM/data0_x.csv\")\n", "y0=np.loadtxt(\"data/dataGLM/data0_y.csv\")\n", "\n", "x1=np.loadtxt(\"data/data1_x.csv\")\n", "y1=np.loadtxt(\"data/data1_y.csv\")\n", "x1=np.loadtxt(\"data/dataGLM/data1_x.csv\")\n", "y1=np.loadtxt(\"data/dataGLM/data1_y.csv\")\n", "\n", "plt.figure(figsize=(12,6))\n", "plt.subplot(1,2,1)\n", ... ... @@ -182,7 +179,7 @@ }, { "cell_type": "code", "execution_count": 35, "execution_count": 6, "metadata": {}, "outputs": [ { ... ... @@ -212,7 +209,7 @@ }, { "cell_type": "code", "execution_count": 36, "execution_count": 7, "metadata": {}, "outputs": [ { ... ... @@ -250,7 +247,7 @@ }, { "cell_type": "code", "execution_count": 37, "execution_count": 8, "metadata": { "scrolled": true }, ... ... @@ -267,7 +264,7 @@ "Link Function: identity Scale: 90.926\n", "Method: IRLS Log-Likelihood: -3673.0\n", "Date: Mon, 18 Jun 2018 Deviance: 90745.\n", "Time: 15:47:54 Pearson chi2: 9.07e+04\n", "Time: 16:01:57 Pearson chi2: 9.07e+04\n", "No. Iterations: 3 Covariance Type: nonrobust\n", "==============================================================================\n", " coef std err z P>|z| [0.025 0.975]\n", ... ... @@ -311,7 +308,7 @@ }, { "cell_type": "code", "execution_count": 10, "execution_count": 9, "metadata": {}, "outputs": [ { ... ... @@ -347,7 +344,7 @@ }, { "cell_type": "code", "execution_count": 12, "execution_count": 10, "metadata": {}, "outputs": [ { ... ... @@ -401,7 +398,7 @@ }, { "cell_type": "code", "execution_count": 13, "execution_count": 11, "metadata": {}, "outputs": [ { ... ... @@ -416,8 +413,8 @@ } ], "source": [ "x2=np.loadtxt(\"data/data2_x.csv\")\n", "y2=np.loadtxt(\"data/data2_y.csv\")\n", "x2=np.loadtxt(\"data/dataGLM/data2_x.csv\")\n", "y2=np.loadtxt(\"data/dataGLM/data2_y.csv\")\n", "plt.plot(x2,y2,'.');" ] }, ... ... @@ -444,7 +441,7 @@ }, { "cell_type": "code", "execution_count": 14, "execution_count": 12, "metadata": { "scrolled": true }, ... ... @@ -504,7 +501,7 @@ }, { "cell_type": "code", "execution_count": 15, "execution_count": 14, "metadata": {}, "outputs": [ { ... ... @@ -519,8 +516,8 @@ } ], "source": [ "x=np.loadtxt(\"data/accident_x.csv\")\n", "y=np.loadtxt(\"data/accident_y.csv\")\n", "x=np.loadtxt(\"data/dataGLM/accident_x.csv\")\n", "y=np.loadtxt(\"data/dataGLM/accident_y.csv\")\n", "plt.plot(x,y,'.');" ] }, ... ... @@ -539,7 +536,7 @@ }, { "cell_type": "code", "execution_count": 16, "execution_count": 15, "metadata": {}, "outputs": [ { ... ... @@ -560,7 +557,7 @@ }, { "cell_type": "code", "execution_count": 17, "execution_count": 16, "metadata": { "scrolled": true }, ... ...
This source diff could not be displayed because it is too large. You can view the blob instead.
This diff is collapsed.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
 ,vigon,MacBook-Pro-de-irma.local,17.06.2018 21:17,file:///Users/vigon/Library/Application%20Support/OpenOffice/4; \ No newline at end of file

46.3 KB

1.43 MB

No preview for this file type
No preview for this file type
 import numpy as np import statsmodels.api as sm from scipy import stats from matplotlib import pyplot as plt np.set_printoptions(linewidth=50000) import scipy.stats #as stats def createData_gauss(nbData: int): x = np.random.random(nbData) * 2 w = 10 b = 5 y = w * x + b + np.random.normal(0, 10, size=[nbData]) np.savetxt("data/data0_x.csv",x,fmt="%.2f") np.savetxt("data/data0_y.csv",y,fmt="%.2f") def createData_gamma(nbData: int): x = np.random.random(nbData) * 2 w = 10 b = 5 ''' y[i]= sum_j x[ij] w[j] ''' mu= w * x + b y=np.zeros(shape=[nbData]) k=1. for i in range (nbData): y[i]=np.random.gamma( shape=k, scale=mu[i]/k, size=1) np.savetxt("data/data1_x.csv",x,fmt="%.2f") np.savetxt("data/data1_y.csv",y,fmt="%.2f") def createData_gamma_exp(nbData: int): x = np.random.random(nbData) * 2 w = 2 b = 3 mu= np.exp(w * x + b) y=np.zeros(shape=[nbData]) k=1 for i in range (nbData): y[i]=np.random.gamma( shape=k, scale=mu[i]/k, size=1) np.savetxt("data/data2_x.csv",x,fmt="%.2f") np.savetxt("data/data2_y.csv",y,fmt="%.2f") def sigmoid(x): return np.exp(x) / (1 + np.exp(x)) """ Voici un jeu de donnée fabriqué qui colle exactement au modèle logistique. Chaque ligne représente une bactérie. # y : 1-> alive, 0-> dead # x1 : food # x2 : oxygen """ def create_data_bernoulli(): nb_sample = 1000 """ paramètres cachés """ w0 = -15 w1 = 4 w2 = 2 """ descripteurs = variables explicatives """ x1= np.random.uniform(low=0.0, high=5.0, size=nb_sample) x2= np.random.uniform(low=0.0, high=5.0, size=nb_sample) mu = sigmoid(w0 + w1 * x1 + w2 * x2) y=np.zeros(shape=nb_sample,dtype=np.int64) for i in range(nb_sample): y[i]=np.random.binomial(n=1,p=mu[i],size=1) x=np.array([x1,x2]).T np.savetxt("data/bacteria_alone_x.csv",x,fmt="%.2f",header="food, oxygen") np.savetxt("data/bacteria_alone_y.csv",y,fmt="%d",header="1-> alive, 0-> dead ") def create_data_binomiale(): n_sample = 1000 b0 = -15 b1 = 4 b2 = 2 x0= np.ones(shape=n_sample) x1= np.random.uniform(low=0.0, high=5.0, size=n_sample) x2= np.random.uniform(low=0.0, high=5.0, size=n_sample) effectif=np.random.randint(low=5, high=11, size=n_sample) probs = sigmoid(b0 + b1 * x1 + b2 * x2) y1=np.zeros(shape=n_sample,dtype=np.int64) for i in range(n_sample): y1[i]=np.random.binomial(n=effectif[i],p=probs[i],size=1) y2 = effectif - y1 x=np.array([x1,x2]).T y=np.array([y1,y2]).T np.savetxt("data/bacteria_grouped_x.csv", x, fmt="%.2f", header="food, oxygen") np.savetxt("data/bacteria_grouped_y.csv", y, fmt="%d", header="1-> alive, 0-> dead ") """ # y : nombre d'accident # x : indice fangio du conducteur """ def create_data_poisson(): nb_sample = 1000 w0 = -4 w1 = 5 """ descripteurs = variables explicatives """ x = np.random.beta(a=0.5,b=1.5, size=nb_sample) mu = np.exp(w0 + w1 * x) y=np.random.poisson(lam=mu,size=nb_sample) #print(x) #print(y) np.savetxt("data/accident_x.csv", x, fmt="%.2f") np.savetxt("data/accident_y.csv", y, fmt="%d") def create_data_poisson_exposure(): nb_sample = 1000 w0 = -4 w1 = 5 """ descripteurs = variables explicatives """ x0 = np.random.beta(a=0.5,b=1.5, size=nb_sample) x1 = np.random.randint(30,365*3,size=nb_sample) mu = np.exp(w0 + w1 * x0) * x1/365 y=np.random.poisson(lam=mu,size=nb_sample) #print(x) #print(y) np.savetxt("data/accident_exposure_x.csv", np.stack([x0,x1]).T, fmt="%.2f %d") np.savetxt("data/accident_exposure_y.csv", y, fmt="%d") create_data_poisson_exposure()
This diff is collapsed.

44.5 KB

13.7 KB

18.1 KB

19.8 KB

5.6 KB

7.99 KB

6.71 KB

9.79 KB

24.2 KB

40 KB

24 KB

File deleted
File deleted