{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Ciencia de datos 2022\n", "\n", "## Alan Reyes-Figueroa" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Factoración no negativa de matrices (NNMF)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "En este experimento ilustramos cómo funciona un sistema de recomendación (*toy example*)." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import pandas as pd\n", "\n", "from sklearn.decomposition import NMF\n", "\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Data" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "X1 = np.array([[0,1,0,1,2,2],\n", " [2,3,1,1,2,2],\n", " [1,1,1,0,1,1],\n", " [0,2,3,4,1,1],\n", " [0,0,0,0,1,0]])\n", "\n", "X2 = np.array([[0,1,0,1,2,2],\n", " [2,3,0,1,2,2],\n", " [1,1,0,0,1,1],\n", " [0,2,3,4,1,1],\n", " [0,0,0,0,1,0],\n", " [0,0,2,0,2,2],\n", " [2,1,0,2,0,1],\n", " [0,3,1,1,1,0],\n", " [0,3,2,2,1,0]])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experimento 1" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "names = ['Alice', 'Bob', 'Cadú', 'Didier', 'Ely', 'Fabi']\n", "D1 = pd.DataFrame(X1, columns=names)\n", "D1.index = ['Vegetales', 'Frutas', 'Dulces', 'Pan', 'Café']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AliceBobCadúDidierElyFabi
Vegetales010122
Frutas231122
Dulces111011
Pan023411
Café000010
\n", "
" ], "text/plain": [ " Alice Bob Cadú Didier Ely Fabi\n", "Vegetales 0 1 0 1 2 2\n", "Frutas 2 3 1 1 2 2\n", "Dulces 1 1 1 0 1 1\n", "Pan 0 2 3 4 1 1\n", "Café 0 0 0 0 1 0" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "D1" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "NMF(n_components=3)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k = 3 # número de clústers\n", "\n", "nmf = NMF(k) # NNMF\n", "nmf.fit(D1) # training u optimización" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(5, 3) (3, 6)\n" ] } ], "source": [ "H = nmf.components_\n", "W = nmf.transform(D1)\n", "print(W. shape, H.shape)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "W = pd.DataFrame(np.round(W, 4), columns=np.arange(0, k))\n", "W.index = D1.index" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
012
Vegetales0.00300.03722.7447
Frutas1.92720.15250.4676
Dulces0.97360.00030.0000
Pan0.00032.65691.1803
Café0.00000.00000.5900
\n", "
" ], "text/plain": [ " 0 1 2\n", "Vegetales 0.0030 0.0372 2.7447\n", "Frutas 1.9272 0.1525 0.4676\n", "Dulces 0.9736 0.0003 0.0000\n", "Pan 0.0003 2.6569 1.1803\n", "Café 0.0000 0.0000 0.5900" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "W" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0 1 2
Vegetales0.0030000.0372002.744700
Frutas1.9272000.1525000.467600
Dulces0.9736000.0003000.000000
Pan0.0003002.6569001.180300
Café0.0000000.0000000.590000
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# mostramos W con mapa de colores\n", "\n", "WW = W.style.background_gradient(cmap=plt.cm.Blues)\n", "display(WW)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Vegetales Frutas Dulces Pan Café
Vegetales1.000000-0.359096-0.509614-0.0534740.999941
Frutas-0.3590961.0000000.986015-0.912763-0.348930
Dulces-0.5096140.9860151.000000-0.831921-0.500231
Pan-0.053474-0.912763-0.8319211.000000-0.064326
Café0.999941-0.348930-0.500231-0.0643261.000000
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(W.T.corr().style.background_gradient(cmap=plt.cm.seismic))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "H = pd.DataFrame(np.round(nmf.components_, 3), columns=D1.columns)\n", "#H.index = ['Veggies', 'Meat lovers', 'Bread eaters', 'Yogurth pickers']\n", "H.index = ['Fruits pickers', 'Bread eaters', 'Veggies']" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Alice Bob Cadú Didier Ely Fabi
Fruits pickers1.0360001.3440000.5510000.2630000.8850000.898000
Bread eaters0.0000000.6020001.1240001.3590000.0340000.068000
Veggies0.0000000.3470000.0000000.3370000.7670000.691000
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "HH = H.style.background_gradient(cmap=plt.cm.Blues)\n", "display(HH)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Experimento 2" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "D2 = pd.DataFrame(X2, columns=names)\n", "D2.index = ['Vegetales', 'Frutas', 'Dulces', 'Pan', 'Café', 'Carnes', 'Lácteos', 'Pastas', 'Salsa']" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AliceBobCadúDidierElyFabi
Vegetales010122
Frutas230122
Dulces110011
Pan023411
Café000010
Carnes002022
Lácteos210201
Pastas031110
Salsa032210
\n", "
" ], "text/plain": [ " Alice Bob Cadú Didier Ely Fabi\n", "Vegetales 0 1 0 1 2 2\n", "Frutas 2 3 0 1 2 2\n", "Dulces 1 1 0 0 1 1\n", "Pan 0 2 3 4 1 1\n", "Café 0 0 0 0 1 0\n", "Carnes 0 0 2 0 2 2\n", "Lácteos 2 1 0 2 0 1\n", "Pastas 0 3 1 1 1 0\n", "Salsa 0 3 2 2 1 0" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "D2" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "NMF(n_components=7)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k = 7\n", "nmf = NMF(k)\n", "nmf.fit(D2)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(9, 7) (7, 6)\n" ] } ], "source": [ "H = nmf.components_\n", "W = nmf.transform(D2)\n", "print(W. shape, H.shape)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "W = pd.DataFrame(np.round(W, 4), columns=np.arange(0, k))\n", "W.index = D2.index" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456
Vegetales1.00390.00000.00012.02910.00000.00000.8849
Frutas4.17580.00001.16581.05560.00000.42430.8808
Dulces1.56750.00000.58290.53990.00000.22300.0000
Pan0.39191.96770.00040.98470.00010.00001.4617
Café0.00000.00000.00000.00000.00000.57010.0000
Carnes0.00000.00190.00000.00671.58390.00390.0000
Lácteos0.00000.00001.16640.00180.00000.00001.7724
Pastas4.43930.65600.00000.00000.00000.41420.1860
Salsa3.64871.31200.00000.00000.00000.43390.3788
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6\n", "Vegetales 1.0039 0.0000 0.0001 2.0291 0.0000 0.0000 0.8849\n", "Frutas 4.1758 0.0000 1.1658 1.0556 0.0000 0.4243 0.8808\n", "Dulces 1.5675 0.0000 0.5829 0.5399 0.0000 0.2230 0.0000\n", "Pan 0.3919 1.9677 0.0004 0.9847 0.0001 0.0000 1.4617\n", "Café 0.0000 0.0000 0.0000 0.0000 0.0000 0.5701 0.0000\n", "Carnes 0.0000 0.0019 0.0000 0.0067 1.5839 0.0039 0.0000\n", "Lácteos 0.0000 0.0000 1.1664 0.0018 0.0000 0.0000 1.7724\n", "Pastas 4.4393 0.6560 0.0000 0.0000 0.0000 0.4142 0.1860\n", "Salsa 3.6487 1.3120 0.0000 0.0000 0.0000 0.4339 0.3788" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "W" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0 1 2 3 4 5 6
Vegetales1.0039000.0000000.0001002.0291000.0000000.0000000.884900
Frutas4.1758000.0000001.1658001.0556000.0000000.4243000.880800
Dulces1.5675000.0000000.5829000.5399000.0000000.2230000.000000
Pan0.3919001.9677000.0004000.9847000.0001000.0000001.461700
Café0.0000000.0000000.0000000.0000000.0000000.5701000.000000
Carnes0.0000000.0019000.0000000.0067001.5839000.0039000.000000
Lácteos0.0000000.0000001.1664000.0018000.0000000.0000001.772400
Pastas4.4393000.6560000.0000000.0000000.0000000.4142000.186000
Salsa3.6487001.3120000.0000000.0000000.0000000.4339000.378800
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "WW = W.style.background_gradient(cmap=plt.cm.Blues)\n", "display(WW)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Vegetales Frutas Dulces Pan Café Carnes Lácteos Pastas Salsa
Vegetales1.0000000.4130370.3882990.263646-0.313655-0.311727-0.0211650.1876200.122146
Frutas0.4130371.0000000.958917-0.192280-0.207433-0.339042-0.0491660.9038130.820137
Dulces0.3882990.9589171.000000-0.314498-0.150372-0.324727-0.2164950.8510100.755354
Pan0.263646-0.192280-0.3144981.000000-0.378907-0.3787360.162200-0.0784110.094285
Café-0.313655-0.207433-0.150372-0.3789071.000000-0.165325-0.251052-0.108854-0.129648
Carnes-0.311727-0.339042-0.324727-0.378736-0.1653251.000000-0.253360-0.223278-0.275210
Lácteos-0.021165-0.049166-0.2164950.162200-0.251052-0.2533601.000000-0.287974-0.298005
Pastas0.1876200.9038130.851010-0.078411-0.108854-0.223278-0.2879741.0000000.977065
Salsa0.1221460.8201370.7553540.094285-0.129648-0.275210-0.2980050.9770651.000000
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "display(W.T.corr().style.background_gradient(cmap=plt.cm.seismic))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "H = pd.DataFrame(np.round(H, 4), columns=D2.columns)\n", "#H.index = ['Veggies', 'Meat lovers', 'Bread eaters', 'Yogurth pickers']\n", "#H.index = ['Fruits pickers', 'Bread eaters', 'Veggies']\n", "#H.index = ['Veggies', 'Meat lovers', 'Bread eaters', 'Yogurth pickers']\n", "#H.index = ['Veggies', 'Meat lovers', 'Bread eaters', 'Yogurth pickers']\n", "H.index = ['Italians', 'Fruit pickers', 'Meat lovers', 'Yogurth pickers', 'Bread eaters', 'Veggies', '?']" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Alice Bob Cadú Didier Ely Fabi
Italians0.0000000.5742000.0000000.0015000.0589000.000000
Fruit pickers0.0000000.5577001.5245001.1943000.0184000.000000
Meat lovers1.7154000.1627000.0000000.0000000.0000000.813600
Yogurth pickers0.0000000.0093000.0000000.0000000.9567000.973700
Bread eaters0.0000000.0000001.2599000.0000001.2544001.259700
Veggies0.0004000.0000000.0000000.0000001.7542000.000000
?0.0000000.4571000.0000001.1284000.0000000.026800
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "HH = H.style.background_gradient(cmap=plt.cm.Blues)\n", "display(HH)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# otra forma de visualizar los pesos de cada persona en cada categoría\n", "plt.figure()\n", "#plt.imshow(H, cmap=plt.cm.jet)\n", "H.T.plot()\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "approX = W.values @ H.values\n", "reconstructed = pd.DataFrame(np.round(approX, 3), columns=D2.columns)\n", "reconstructed.index = D2.index" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
AliceBobCadúDidierElyFabi
Vegetales0.0001.0000.0001.0002.0002.000
Frutas2.0003.0000.0001.0002.0002.000
Dulces1.0001.0000.0000.0021.0001.000
Pan0.0012.0003.0004.0001.0010.998
Café0.0000.0000.0000.0001.0000.000
Carnes0.0000.0011.9980.0022.0002.002
Lácteos2.0011.0000.0002.0000.0020.998
Pastas0.0003.0001.0001.0001.0000.005
Salsa0.0003.0002.0002.0001.0000.010
\n", "
" ], "text/plain": [ " Alice Bob Cadú Didier Ely Fabi\n", "Vegetales 0.000 1.000 0.000 1.000 2.000 2.000\n", "Frutas 2.000 3.000 0.000 1.000 2.000 2.000\n", "Dulces 1.000 1.000 0.000 0.002 1.000 1.000\n", "Pan 0.001 2.000 3.000 4.000 1.001 0.998\n", "Café 0.000 0.000 0.000 0.000 1.000 0.000\n", "Carnes 0.000 0.001 1.998 0.002 2.000 2.002\n", "Lácteos 2.001 1.000 0.000 2.000 0.002 0.998\n", "Pastas 0.000 3.000 1.000 1.000 1.000 0.005\n", "Salsa 0.000 3.000 2.000 2.000 1.000 0.010" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Sistema de recomendación\n", "\n", "display(reconstructed)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Error en función de k" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "Hs = []\n", "Ws = []\n", "Xs = []\n", "errs = []\n", "\n", "for k in range(1, 1+D2.shape[1]):\n", " nmf = NMF(k)\n", " nmf.fit(D2)\n", " H = nmf.components_\n", " W = nmf.transform(D2)\n", " X = W @ H\n", " err = ((D2.values - X)**2).sum()\n", " \n", " Hs.append(H)\n", " Ws.append(W)\n", " Xs.append(X)\n", " errs.append(err)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure()\n", "plt.plot(np.arange(1,1+D2.shape[1]), errs /np.prod(D2.shape))\n", "plt.xlabel(k)\n", "plt.ylabel('MSE')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "NMF(n_components=6)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "k = 6\n", "nmf = NMF(k)\n", "nmf.fit(D2)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 4 }