## *Elements of Machine Learning* 2024
### <font size=3 color='gray'>Alan Reyes-Figueroa</font>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB, MultinomialNB

from sklearn.datasets import load_iris

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

## Load data

In [None]:
iris = load_iris()
X = iris['data']
y = iris['target']

In [None]:
colors = {0:'red', 1:'blue', 2:'green'}
c = []
for i in range(0, len(y)):
    c.append(colors[y[i]])

In [None]:
plt.figure()
plt.scatter(X[:,0], X[:,1], c=c)
plt.show()

In [None]:
# split de forma manuel (cuidando el balanceo de clases)

# reordenamiento aleatorio
n = X.shape[0]
idx = np.random.permutation(np.arange(0, n))
X = X[idx]
y = y[idx]

# separamos en clases 0, 1, 2
y0 = np.argwhere(y==0)
y1 = np.argwhere(y==1)
y2 = np.argwhere(y==2)

X0 = X[y0]
X1 = X[y1]
X2 = X[y2]

# construimos train / test (40 de cada clase en train, 10 en test)
p = 0.8
q = int(n*p/3)
Xtrain = np.vstack([X0[:q], X1[:q], X2[:q]]).reshape(-1,4)
Xtest  = np.vstack([X0[q:], X1[q:], X2[q:]]).reshape(-1,4)

Ytrain = np.stack(np.array(q*[0] + q*[1] + q*[2]))
Ytest  = np.stack(np.array((50-q)*[0] + (50-q)*[1] + (50-q)*[2]))

In [None]:
# split de forma automática (no cuida balanceo de clases)
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, y, test_size=0.2, stratify=y, random_state=1)

In [None]:
print(Xtrain.shape, Xtest.shape, Ytrain.shape, Ytest.shape)

## Naive Bayes

In [None]:
gnb = GaussianNB()
model = gnb.fit(Xtrain, Ytrain)

y_pred = model.predict(Xtest)

In [None]:
cfmatrix = confusion_matrix(Ytest, y_pred)
print(cfmatrix)

In [None]:
plt.figure()
sns.heatmap(cfmatrix, annot=True)
plt.show()

In [None]:
print(classification_report(Ytest, y_pred))

## Regiones de clasificación

In [None]:
nb = GaussianNB()
model = gnb.fit(X[:,[0,1]], y)

In [None]:
# Plotting decision regions
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1),
                     np.arange(y_min, y_max, 0.1))

In [None]:
plt.figure(figsize=(8,8))
Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, alpha=0.4)
plt.scatter(X[:,0], X[:,1], c=y, s=20, edgecolor='k')
plt.title('Clasificador Bayesiano')
plt.show()

In [None]:
ypred = model.predict(X[:,:2])

In [None]:
cfmatrix = confusion_matrix(y, ypred)
print(cfmatrix)

In [None]:
plt.figure()
sns.heatmap(cfmatrix, annot=True)
plt.show()

In [None]:
print(classification_report(y, ypred))

## Comparación regiones

In [None]:
cis = [0,0,0,1,1,2]
cjs = [1,2,3,2,3,3]

plt.figure(figsize=(15,10))

for i in range(0,6):
    plt.subplot(2,3,1+i)

    # Gaussian model
    nb = GaussianNB()
    model = gnb.fit(X[:,[cis[i],cjs[i]]], y)
    
    # Decision regions
    x_min, x_max = X[:, cis[i]].min() - 1, X[:, cis[i]].max() + 1
    y_min, y_max = X[:, cjs[i]].min() - 1, X[:, cjs[i]].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.1), np.arange(y_min, y_max, 0.1))
    
    # plot
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    plt.contourf(xx, yy, Z, alpha=0.4)
    plt.scatter(X[:,cis[i]], X[:,cjs[i]], c=y, s=20, edgecolor='k')
    plt.title('Variables {} y {}'.format(cis[i], cjs[i]))
plt.show()

