# **Elements of Machine Learning 2024** <font size=4 color='gray'>Alan Reyes-Figueroa</font>
#### <font color='gray'>Técnicas de Visualización</font>

In [None]:
import numpy as np
import pandas as pd      # trabajar con Dataframes

import matplotlib.pyplot as plt
import seaborn as sns
#import plotly
#import bokeh

# en R: library(ggplot2)

import warnings
warnings.filterwarnings('ignore')

## Load Dataset

Vamos a trabajar con el dataset *Iris*.

In [None]:
#data = sns.load_dataset('tips')
data = sns.load_dataset('iris')

In [None]:
data.tail(15)

In [None]:
data.shape

In [None]:
data.dtypes

In [None]:
data.describe()

In [None]:
data.isna().sum()

In [None]:
data.isnull().sum()

# Pandas *built in* Graphics

In [None]:
data.boxplot()

In [None]:
fig = plt.figure(figsize=(6,6))
data.boxplot()
plt.title('Iris Dataset')
plt.ylabel('cm')
plt.show()

In [None]:
# guardar una gráfica de matplotlib en disco
#fig.savefig('boxplot.pdf', bbox_inches='tight', dpi=1200)
fig.savefig('boxplot.pdf', bbox_inches='tight', dpi=100)
#fig.savefig('boxplot.pdf')

In [None]:
fig = plt.figure(figsize=(6,6))     # crear ambiente de graficación
data.hist(bins=15)
plt.show()

kind
 |      - 'line' : line plot (default)
 |      - 'bar' : vertical bar plot
 |      - 'barh' : horizontal bar plot
 |      - 'hist' : histogram
 |      - 'box' : boxplot
 |      - 'kde' : Kernel Density Estimation plot
 |      - 'density' : same as 'kde'
 |      - 'area' : area plot
 |      - 'pie' : pie plot
 |      - 'scatter' : scatter plot
 |      - 'hexbin' : hexbin plot.

In [None]:
data.plot.hist(stacked=False, bins=25)

In [None]:
fig = plt.figure(figsize=(6,6))
data.plot.scatter(x='petal_length', y='sepal_length')
plt.show()

In [None]:
data.plot.scatter(x='petal_length', y='sepal_length', color=pd.get_dummies(data['species']).values)

In [None]:
data.plot.scatter(x='petal_length', y='sepal_length', s=25*data['petal_width'], 
                  color=pd.get_dummies(data['species']).values)

In [None]:
data.plot.hexbin(x='petal_length', y='sepal_length', gridsize=25, color='gray')

In [None]:
from pandas.plotting import scatter_matrix

plt.figure()
scatter_matrix(data, diagonal='kde', color=pd.get_dummies(data['species']).values, figsize=(8,8))
plt.show()

In [None]:
from pandas.plotting import parallel_coordinates

plt.figure(figsize=(6,6))
parallel_coordinates(data, class_column='species')
plt.show()

In [None]:
from pandas.plotting import andrews_curves

plt.figure(figsize=(6,6))
andrews_curves(data, class_column='species')
plt.show()

In [None]:
# no funciona
#data.plot.pie(subplots=True, figsize=(6,6))

# Seaborn

In [None]:
tips = sns.load_dataset('tips')

In [None]:
tips.head()

In [None]:
tips.shape

In [None]:
tips.dtypes

In [None]:
tips.describe()

In [None]:
tips.isna().sum()

In [None]:
tips.isnull().sum()

In [None]:
# gráficos en Seaborn

In [None]:
fig = plt.figure(figsize=(6,3))
sns.boxplot(x=tips['total_bill'])
plt.show()

In [None]:
ax = sns.boxplot(x=tips['tip'])

In [None]:
ax = sns.boxplot(x=tips['size'])

In [None]:
ax = sns.boxplot(data=tips, x='day', y='total_bill')

In [None]:
ax = sns.boxplot(data=tips, x='day', y='total_bill', hue='smoker')

In [None]:
ax = sns.boxplot(data=tips, x='day', y='total_bill', hue='time')

In [None]:
# Create a visualization
ax = sns.relplot(data=tips, x="total_bill", y="tip", col="time", hue="smoker", size="size")
#ax = sns.relplot(data=tips, x="total_bill", y="tip", col="time", hue="smoker", style="smoker", size="size")

In [None]:
dots = sns.load_dataset("dots")

ax = sns.relplot(data=dots, kind="line", x="time", y="firing_rate", col="align",
            hue="choice", size="coherence", style="choice", facet_kws=dict(sharex=False))

In [None]:
dots.head()

In [None]:
fmri = sns.load_dataset("fmri")

ax = sns.relplot(data=fmri, kind="line", x="timepoint", y="signal", col="region", hue="event", style="event")

In [None]:
ax = sns.lmplot(data=tips, x="total_bill", y="tip", col="time", hue="smoker")

In [None]:
ax = sns.distplot(tips["total_bill"], kde=True)

In [None]:
ax = sns.distplot(np.log10(1.+tips["total_bill"]), kde=True)

In [None]:
ax = sns.distplot(tips["total_bill"], rug=True)

In [None]:
data = np.array([0.2, 0.7, 1., 1.2, 3., 3.5, 4.2, 5., 5.4, 5.5])

In [None]:
def gaussian(x0, x, sigma=1.):
    z = np.exp(-(x-x0)**2/(2.*sigma**2))
    return z

In [None]:
def kde(data, x, sigma=1.):
    y = 0
    n = data.shape[0]
    for i in range(0, n):
        y = y + gaussian(data[i], x, sigma)
    y = y/n
    return y

In [None]:
xx = tips["total_bill"].values
#xx = data.copy()
print(xx) 

In [None]:
x = np.linspace(-10,60,501)
#x = np.linspace(-10,10,201)
y = kde(xx, sigma=3., x=x)

In [None]:
plt.figure()
plt.plot(x, y)
plt.plot(xx, np.zeros(xx.shape), 'k.')
plt.show()

In [None]:
penguins = sns.load_dataset("penguins")

ax = sns.jointplot(data=penguins, x="flipper_length_mm", y="bill_length_mm")

In [None]:
ax = sns.pairplot(data=penguins)

In [None]:
ax = sns.pairplot(data=penguins, hue="species")

In [None]:
g = sns.PairGrid(penguins, hue="species", corner=True)
g.map_lower(sns.kdeplot, hue=None, levels=5, color=".2")
g.map_lower(sns.scatterplot, marker="+")
g.map_diag(sns.distplot, kde=True)
g.add_legend(frameon=True)
#g.legend.set_bbox_to_anchor((.61, .6))
plt.show()

#### FacetGrid

In [None]:
g = sns.FacetGrid(tips, col="sex", hue="smoker")
g.map(sns.scatterplot, "total_bill", "tip", alpha=.7)
g.add_legend()