## *Elements of Machine Learning* 2024
## Alan Reyes-Figueroa

# Generación de datos aleatorios

In [None]:
import numpy as np             # cálculo numérico
import scipy.stats as st       # estadística

import matplotlib.pyplot as plt  # graficación
import seaborn as sns            # graficación

In [None]:
import warnings
warnings.filterwarnings('ignore')

## Ejemplo Bernoulli

In [None]:
np.random.seed(14927)

In [None]:
# Generar una muestra de una variable Bernoulli Ber(0.5)
# de tamaño N = 1000

N = 1000
p = 0.5
sample = np.random.choice(2, p=[1-p,p], size=N)

In [None]:
sample.shape

In [None]:
sample[:36]

In [None]:
plt.figure()
plt.hist(sample)
plt.show()

In [None]:
(sample==0).sum() / N

In [None]:
(sample==1).sum() / N

In [None]:
# Otra forma

N = 1000
p = 0.5
sample2 = st.bernoulli.rvs(p, size=N)

In [None]:
plt.figure()
plt.hist(sample2)
plt.show()

In [None]:
print((sample2==0).sum())
print((sample2==1).sum())

## Ejemplo Uniforme

In [None]:
# Generar una muestra de una variable Uniforme U[a,b]
# de tamaño N = 5000

N = 5000
a = 8
b = 18
sample = a + np.random.choice(b-a, size=N)

In [None]:
sample.shape

In [None]:
print(sample.min())
print(sample.max())

In [None]:
bns = sample.max() - sample.min() + 1
plt.figure()
plt.hist(sample, bins=bns)
plt.show()

In [None]:
bins = np.arange(sample.min(), sample.max()+2)
bins

In [None]:
x, y = np.histogram(sample, bins=bins)

In [None]:
print(x.shape, y.shape)

In [None]:
plt.figure()
plt.plot(y[:-1], x/N, '--', label='mass')
plt.plot(y[:-1], x.cumsum()/N, '-', label='distribution')
plt.legend()
plt.show()

In [None]:
# imprimiendo el valor exacto de la probabilidad para cada x
for i in range(sample.min(), sample.max()+1):
    print(i, (sample==i).sum() / N)

## Ejemplo Binomial

In [None]:
# Generar una muestra de una variable Binomial Bin(k, p)
# de tamaño N = 10000

N = 10000
k = 25
p = 0.5
sample = np.random.binomial(k, p=p, size=N)

In [None]:
sample.shape

In [None]:
sample[:44]

In [None]:
bns = sample.max() - sample.min() + 1
plt.figure()
plt.hist(sample, bins=bns)
plt.xlim([0,k])
plt.show()

In [None]:
bins = np.arange(sample.min(), sample.max()+1)
x, y = np.histogram(sample, bins=bns)

In [None]:
plt.figure()
plt.plot(y[1:], x/N, '--', label='mass')
plt.plot(y[1:], x.cumsum()/N, '-', label='distribution')
plt.legend()
plt.show()

In [None]:
# imprimiendo el valor de probabilidad para cada x
for i in range(0, k+1):
    print(i, (sample==i).sum() / N)

In [None]:
sample.mean()    #media teórica

In [None]:
k*p              #media empírica

In [None]:
error = np.abs(sample.mean() - k*p) / (k*p)
print(error)
print('error = {}%'.format(error * 100))

## Ejemplo Geométrica

In [None]:
# Generar una muestra de una variable Binomial Bin(k, p)
# de tamaño N = 10000

N = 10000
p = 0.25
sample = np.random.geometric(p=p, size=N)

In [None]:
sample.shape

In [None]:
sample[:44]

In [None]:
bns = sample.max() - sample.min() + 1
plt.figure()
plt.hist(sample, bins=bns)
plt.show()

In [None]:
bins = np.arange(sample.min(), sample.max()+1)
x, y = np.histogram(sample, bins=bns)

In [None]:
plt.figure()
plt.plot(y[1:], x/N, '--', label='mass')
plt.plot(y[1:], x.cumsum()/N, '-', label='distribution')
plt.legend()
plt.show()

In [None]:
# imprimiendo los valores de probabilidad
for i in range(1, sample.max()+1):
    print(i, (sample==i).sum() / N)

In [None]:
sample.mean()   #media teórica

In [None]:
1/p             #media empírica

In [None]:
error = np.abs(sample.mean() - 1/p) * p
print(error)
print('error = {}%'.format(error * 100))

## Ejemplo Gamma

In [None]:
# Generar una muestra de una variable gaussiana N(mu, sigma)
# de tamaño N = 10000

N = 10000
a = 2      # parámetro de forma
b = 5      # parámetro de localización
sample = np.random.gamma(a, b, size=N)

In [None]:
sample.shape

In [None]:
sample[:44]

In [None]:
bns = int(3.3*(1 + np.log(N)))
print(bns)
plt.figure()
plt.hist(sample, bins=bns)
plt.show()

In [None]:
x, y = np.histogram(sample, bins=bns)

In [None]:
plt.figure()
plt.plot(y[1:], x/N, '--', label='mass')
plt.plot(y[1:], x.cumsum()/N, '-', label='distribution')
plt.legend()
plt.show()

In [None]:
# densidad aproximada
plt.figure()
sns.distplot(sample, kde=True)
plt.show()