The normal distribution (demos)

Normal Approximation to Binomial Distribution

N tosses of a coin with N=50,80,100; p=0.2,0.4,0.5

In [1]:
from pylab import *
from scipy.special import binom
matplotlib.rcParams['figure.figsize'] = [10,8]
dist1=array([binom(50,j)*0.2**j*0.8**(50-j)for j in range(51)])
dist2=array([binom(80,j)*0.4**j*0.6**(80-j)for j in range(81)])
dist3=array([binom(100,j)*0.5**j*0.5**(100-j)for j in range(101)])
stem(range(len(dist1)),dist1,linefmt='g-',markerfmt='go')
stem(range(len(dist2)),dist2,linefmt='b-',markerfmt='bo')
stem(range(len(dist3)),dist3,linefmt='r-',markerfmt='ro')
show()
<Figure size 1000x800 with 1 Axes>

Mean-adjusted (set all means to 0)

In [2]:
base1=arange(len(dist1))-10
base2=arange(len(dist2))-32
base3=arange(len(dist3))-50
stem(base1, dist1,linefmt='g-',markerfmt='go')
stem(base2, dist2,linefmt='b-',markerfmt='bo')
stem(base3, dist3,linefmt='r-',markerfmt='ro')
xlim(-20,20)
show()

Variance adjust (all variances = 1)

In [3]:
#divide the values of each binomial random variable by its standard deviation
base1=base1/sqrt(8)
base2=base2/sqrt(19.2)
base3=base3/sqrt(25)
stem(base1, dist1,linefmt='g-',markerfmt='go')
stem(base2, dist2,linefmt='b-',markerfmt='bo')
stem(base3, dist3,linefmt='r-',markerfmt='ro')
xlim(-4,4)
show()

Height adjust (make area under each plot equal to 1)

In [4]:
#multiply back by the standard deviation
#this sets the area under each curve back to 1
#so now they all look like densities again
stem(base1, sqrt(8)*dist1,linefmt='g-',markerfmt='go')
stem(base2, sqrt(19.2)*dist2,linefmt='b-',markerfmt='bo')
stem(base3, sqrt(25)*dist3,linefmt='r-',markerfmt='ro')
xlim(-4,4)
show()

Superimpose the graph of standard normal density ${1\over {\sqrt{2\pi}}}e^{-x^2/2}$

In [5]:
stem(base1, sqrt(8)*dist1,linefmt='g-',markerfmt='go')
stem(base2, sqrt(19.2)*dist2,linefmt='b-',markerfmt='bo')
stem(base3, sqrt(25)*dist3,linefmt='r-',markerfmt='ro')
x=arange(-4,4.01,0.2)
y=exp(-x**2/2)/sqrt(2*pi)
plot(x, y,color='k',linewidth=3)
xlim(-4,4)
show()

Sum of identically distributed random variables (Central Limit Theorem)

Convolution

In [6]:
#v and w are lists giving the distributions of independent random
#variables X and Y, so that v[i] is P_X(i)=P(X=i),
#and likewise for Y.  What is returned is the distribution
#of X+Y, called the convolution of P_X and P_y

from pylab import *
def convolution(v,w):
    conv=[]
    for j in range(len(v)+len(w)):
        s=0
        for m in range(min(len(v),j)):
            if j-m in range(len(w)):
                s+=v[m]*w[j-m]
        conv.append(s)
    return conv

Compute distribution of the sum of two dice.

In [7]:
onedie=[0]+[1/6]*6
print(convolution(onedie,onedie))
[0, 0.0, 0.027777777777777776, 0.05555555555555555, 0.08333333333333333, 0.1111111111111111, 0.1388888888888889, 0.16666666666666669, 0.1388888888888889, 0.1111111111111111, 0.08333333333333333, 0.05555555555555555, 0.027777777777777776, 0]

Plot sums of identically distributed independent random variables

In [8]:
def plotd(dist,n):
    X=[1]
    for j in range(n):
        X=convolution(X,dist)
    stem(range(len(X)),X,linefmt='C0-',markerfmt='C0.')
    title('Sum of '+str(n)+' trials')
    show()

Sums of n dice

In [9]:
plotd([0]+[1/6]*6,2)
plotd([0]+[1/6]*6,3)
plotd([0]+[1/6]*6,4)
plotd([0]+[1/6]*6,10)
plotd([0]+[1/6]*6,20)

Something very asymmetric

In [10]:
dist=array([4,19,0,0,0,17,2,0,0,0,6,0,0,0,23])
dist=dist/sum(dist)
plotd(dist,1)
plotd(dist,2)
plotd(dist,4)
plotd(dist,10)
plotd(dist,20)
plotd(dist,40)

A continuous example: Sums of Exponential Distributions

In [3]:
#This is based on the following formula for the density of the
#sum of n independent exponential
#densities with lambda = 1:
#x^(n-1)* exp(-x)/(n-1)!
def expdist(x,j):
    if x<0:
        return 0
    else:
        return x**(j-1)*exp(-x)/math.factorial(j-1)
In [4]:
matplotlib.rcParams['figure.figsize'] = [10,8]
x=arange(0,20.05,0.01)
for j in [1,2,3,4,5,15]:
    u=[expdist(y,j) for y in x]
    plot(x,u)
xlim(0,20)
title("""densities of sum of n independent exponential random variables\n
for n=1,2,3,4,15""")
show()

#mean-adjust

xx=x-10
for j in [1,2,3,4,5,15]:
    u=[expdist(y+j,j) for y in xx]
    plot(xx,u)
xlim(-10,10)
title('the same, with means adjusted to 0')
show()

for j in [1,2,3,4,5,15]:
    u=[expdist((y+j),j) for y in xx]
    v=[z*sqrt(j)for z in u]
    plot(xx/sqrt(j),v)
    
xlim(-5,5)
ylim(0,1)
title('the same, with variances adjusted to 1 and heights adjusted to make densities')
show()

for j in [1,2,3,4,5,15]:
    u=[expdist((y+j),j) for y in xx]
    v=[z*sqrt(j)for z in u]
    plot(xx/sqrt(j),v)
    
xlim(-5,5)
ylim(0,.6)
title('the same, with variances adjusted to 1 and heights adjusted to make densities')
plot(xx, exp(-xx**2/2)/sqrt(2*3.141562),'k--',linewidth=2)
xlim(-5,5)
ylim(0,0.6)
title('and with the standard normal distribution and the case n=100 superimposed')
show()