Commit 7da51ea7 authored by vincentvigon's avatar vincentvigon

grec

parent fe49e909
No preview for this file type
No preview for this file type
......@@ -26,7 +26,7 @@
"***Mathématiquement 1:*** On imagine une fonction $L(x,y,w)$ et l'on décrète que la proba d'observer $y_i$ en présence de $x_i$ est: \n",
"\n",
"$$\n",
"\t L( x_i , y_i , w ) \\qquad \\qquad \\mathrm{ pour un certain } w\n",
"\t L( x_i , y_i , w ) \\qquad \\qquad \\text{ pour un certain } w\n",
"$$\n",
"\n",
"En supposant les observations indépendantes, la proba d'observer simultanément $y[0],y[1],y[2]...$ est donc de\n",
......@@ -62,7 +62,7 @@
"$$\n",
"et le paramètre qui rend le plus vraissemblable l'ensemble de nos données est toujours: \n",
"$$\n",
"\t\t\\hat w =\\hbox{argmax}_w \\prod_i L( x_i , y_i , w ) \n",
"\t\t\\hat w =\\mathrm{argmax}_w \\prod_i L( x_i , y_i , w ) \n",
"$$\n",
"La seule différence c'est que la fonction $L$ peut dépasser 1. \n",
"\n",
......@@ -76,15 +76,12 @@
"* le $y'$ qui maximise la vraissemblance $y\\to L(x',y,\\hat w)$ \n",
"* l'espérance de la v.a $Y$ dont la densité est $y\\to L(x',y,\\hat w)$ \n",
"\n",
"On choisi en général la seconde estimation (qui coincide parfois avec la première). \n",
"\n",
"\n",
"\n"
"On choisi en général la seconde estimation (qui coincide parfois avec la première). \n"
]
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
......@@ -115,7 +112,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 5,
"metadata": {
"scrolled": false
},
......@@ -132,11 +129,11 @@
}
],
"source": [
"x0=np.loadtxt(\"data/data0_x.csv\")\n",
"y0=np.loadtxt(\"data/data0_y.csv\")\n",
"x0=np.loadtxt(\"data/dataGLM/data0_x.csv\")\n",
"y0=np.loadtxt(\"data/dataGLM/data0_y.csv\")\n",
"\n",
"x1=np.loadtxt(\"data/data1_x.csv\")\n",
"y1=np.loadtxt(\"data/data1_y.csv\")\n",
"x1=np.loadtxt(\"data/dataGLM/data1_x.csv\")\n",
"y1=np.loadtxt(\"data/dataGLM/data1_y.csv\")\n",
"\n",
"plt.figure(figsize=(12,6))\n",
"plt.subplot(1,2,1)\n",
......@@ -182,7 +179,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 6,
"metadata": {},
"outputs": [
{
......@@ -212,7 +209,7 @@
},
{
"cell_type": "code",
"execution_count": 36,
"execution_count": 7,
"metadata": {},
"outputs": [
{
......@@ -250,7 +247,7 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 8,
"metadata": {
"scrolled": true
},
......@@ -267,7 +264,7 @@
"Link Function: identity Scale: 90.926\n",
"Method: IRLS Log-Likelihood: -3673.0\n",
"Date: Mon, 18 Jun 2018 Deviance: 90745.\n",
"Time: 15:47:54 Pearson chi2: 9.07e+04\n",
"Time: 16:01:57 Pearson chi2: 9.07e+04\n",
"No. Iterations: 3 Covariance Type: nonrobust\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
......@@ -311,7 +308,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"metadata": {},
"outputs": [
{
......@@ -347,7 +344,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 10,
"metadata": {},
"outputs": [
{
......@@ -401,7 +398,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 11,
"metadata": {},
"outputs": [
{
......@@ -416,8 +413,8 @@
}
],
"source": [
"x2=np.loadtxt(\"data/data2_x.csv\")\n",
"y2=np.loadtxt(\"data/data2_y.csv\")\n",
"x2=np.loadtxt(\"data/dataGLM/data2_x.csv\")\n",
"y2=np.loadtxt(\"data/dataGLM/data2_y.csv\")\n",
"plt.plot(x2,y2,'.');"
]
},
......@@ -444,7 +441,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 12,
"metadata": {
"scrolled": true
},
......@@ -504,7 +501,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 14,
"metadata": {},
"outputs": [
{
......@@ -519,8 +516,8 @@
}
],
"source": [
"x=np.loadtxt(\"data/accident_x.csv\")\n",
"y=np.loadtxt(\"data/accident_y.csv\")\n",
"x=np.loadtxt(\"data/dataGLM/accident_x.csv\")\n",
"y=np.loadtxt(\"data/dataGLM/accident_y.csv\")\n",
"plt.plot(x,y,'.');"
]
},
......@@ -539,7 +536,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 15,
"metadata": {},
"outputs": [
{
......@@ -560,7 +557,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 16,
"metadata": {
"scrolled": true
},
......
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
,vigon,MacBook-Pro-de-irma.local,17.06.2018 21:17,file:///Users/vigon/Library/Application%20Support/OpenOffice/4;
\ No newline at end of file
No preview for this file type
No preview for this file type
import numpy as np
import statsmodels.api as sm
from scipy import stats
from matplotlib import pyplot as plt
np.set_printoptions(linewidth=50000)
import scipy.stats #as stats
def createData_gauss(nbData: int):
x = np.random.random(nbData) * 2
w = 10
b = 5
y = w * x + b + np.random.normal(0, 10, size=[nbData])
np.savetxt("data/data0_x.csv",x,fmt="%.2f")
np.savetxt("data/data0_y.csv",y,fmt="%.2f")
def createData_gamma(nbData: int):
x = np.random.random(nbData) * 2
w = 10
b = 5
''' y[i]= sum_j x[ij] w[j] '''
mu= w * x + b
y=np.zeros(shape=[nbData])
k=1.
for i in range (nbData):
y[i]=np.random.gamma( shape=k, scale=mu[i]/k, size=1)
np.savetxt("data/data1_x.csv",x,fmt="%.2f")
np.savetxt("data/data1_y.csv",y,fmt="%.2f")
def createData_gamma_exp(nbData: int):
x = np.random.random(nbData) * 2
w = 2
b = 3
mu= np.exp(w * x + b)
y=np.zeros(shape=[nbData])
k=1
for i in range (nbData):
y[i]=np.random.gamma( shape=k, scale=mu[i]/k, size=1)
np.savetxt("data/data2_x.csv",x,fmt="%.2f")
np.savetxt("data/data2_y.csv",y,fmt="%.2f")
def sigmoid(x):
return np.exp(x) / (1 + np.exp(x))
"""
Voici un jeu de donnée fabriqué qui colle exactement au modèle logistique.
Chaque ligne représente une bactérie.
# y : 1-> alive, 0-> dead
# x1 : food
# x2 : oxygen
"""
def create_data_bernoulli():
nb_sample = 1000
""" paramètres cachés """
w0 = -15
w1 = 4
w2 = 2
""" descripteurs = variables explicatives """
x1= np.random.uniform(low=0.0, high=5.0, size=nb_sample)
x2= np.random.uniform(low=0.0, high=5.0, size=nb_sample)
mu = sigmoid(w0 + w1 * x1 + w2 * x2)
y=np.zeros(shape=nb_sample,dtype=np.int64)
for i in range(nb_sample):
y[i]=np.random.binomial(n=1,p=mu[i],size=1)
x=np.array([x1,x2]).T
np.savetxt("data/bacteria_alone_x.csv",x,fmt="%.2f",header="food, oxygen")
np.savetxt("data/bacteria_alone_y.csv",y,fmt="%d",header="1-> alive, 0-> dead ")
def create_data_binomiale():
n_sample = 1000
b0 = -15
b1 = 4
b2 = 2
x0= np.ones(shape=n_sample)
x1= np.random.uniform(low=0.0, high=5.0, size=n_sample)
x2= np.random.uniform(low=0.0, high=5.0, size=n_sample)
effectif=np.random.randint(low=5, high=11, size=n_sample)
probs = sigmoid(b0 + b1 * x1 + b2 * x2)
y1=np.zeros(shape=n_sample,dtype=np.int64)
for i in range(n_sample):
y1[i]=np.random.binomial(n=effectif[i],p=probs[i],size=1)
y2 = effectif - y1
x=np.array([x1,x2]).T
y=np.array([y1,y2]).T
np.savetxt("data/bacteria_grouped_x.csv", x, fmt="%.2f", header="food, oxygen")
np.savetxt("data/bacteria_grouped_y.csv", y, fmt="%d", header="1-> alive, 0-> dead ")
"""
# y : nombre d'accident
# x : indice fangio du conducteur
"""
def create_data_poisson():
nb_sample = 1000
w0 = -4
w1 = 5
""" descripteurs = variables explicatives """
x = np.random.beta(a=0.5,b=1.5, size=nb_sample)
mu = np.exp(w0 + w1 * x)
y=np.random.poisson(lam=mu,size=nb_sample)
#print(x)
#print(y)
np.savetxt("data/accident_x.csv", x, fmt="%.2f")
np.savetxt("data/accident_y.csv", y, fmt="%d")
def create_data_poisson_exposure():
nb_sample = 1000
w0 = -4
w1 = 5
""" descripteurs = variables explicatives """
x0 = np.random.beta(a=0.5,b=1.5, size=nb_sample)
x1 = np.random.randint(30,365*3,size=nb_sample)
mu = np.exp(w0 + w1 * x0) * x1/365
y=np.random.poisson(lam=mu,size=nb_sample)
#print(x)
#print(y)
np.savetxt("data/accident_exposure_x.csv", np.stack([x0,x1]).T, fmt="%.2f %d")
np.savetxt("data/accident_exposure_y.csv", y, fmt="%d")
create_data_poisson_exposure()
This diff is collapsed.
import numpy as np
def readX():
x=np.loadtxt("data/data0_x.csv")
print(x)
from data_analysis.D12_GLM.pack.toto import readX
import scipy.stats as st
import numpy as np
np.random.normal()
readX()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment