{ "cells": [ { "cell_type": "markdown", "id": "8c14ea22", "metadata": {}, "source": [ "# Computing PCA\n", "\n", "Here I'll be taking data from [Geeks4Geeks](https://www.geeksforgeeks.org/machine-learning/mathematical-approach-to-pca/)" ] }, { "cell_type": "code", "execution_count": null, "id": "0b32eb5c", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1.8 1.87777778]\n", "[[ 0.7 0.52222222]\n", " [-1.3 -1.17777778]\n", " [ 0.4 1.02222222]\n", " [ 1.3 1.12222222]\n", " [ 0.5 0.82222222]\n", " [ 0.2 -0.27777778]\n", " [-0.8 -0.77777778]\n", " [-0.3 -0.27777778]\n", " [-0.7 -0.97777778]]\n", "[[0.6925 0.68875 ]\n", " [0.68875 0.79444444]]\n" ] } ], "source": [ "import numpy as np\n", "\n", "X : np.ndarray = np.array([\n", " [2.5, 2.4],\n", " [0.5, 0.7],\n", " [2.2, 2.9],\n", " [3.1, 3.0],\n", " [2.3, 2.7],\n", " [2.0, 1.6],\n", " [1.0, 1.1],\n", " [1.5, 1.6],\n", " [1.1, 0.9]\n", "])\n", "\n", "# Compute mean values for features\n", "mu_X = np.mean(X, 0)\n", "\n", "print(mu_X)\n", "# \"Normalize\" Features\n", "X = X - mu_X\n", "print(X)\n", "\n", "# Compute covariance matrix applying\n", "# Bessel's correction (n-1) instead of n\n", "Cov = (X.T @ X) / (X.shape[0] - 1)\n", "\n", "print(Cov)" ] }, { "cell_type": "markdown", "id": "78e9429f", "metadata": {}, "source": [ "As you can notice, we did $X^T \\times X$ instead of $X \\times X^T$. This is because our \n", "dataset had datapoints over rows instead of features." ] }, { "cell_type": "code", "execution_count": 84, "id": "f93b7a92", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.05283865 1.43410579]\n", "[[-0.73273632 -0.68051267]\n", " [ 0.68051267 -0.73273632]]\n" ] } ], "source": [ "# Computing eigenvalues\n", "eigen = np.linalg.eig(Cov)\n", "eigen_values = eigen.eigenvalues\n", "eigen_vectors = eigen.eigenvectors\n", "\n", "print(eigen_values)\n", "print(eigen_vectors)" ] }, { "cell_type": "markdown", "id": "bfbdd9c3", "metadata": {}, "source": [ "Now we'll generate the new X matrix by only using the first eigen vector" ] }, { "cell_type": "code", "execution_count": 85, "id": "7ce6c540", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(9, 1)\n", "Compressed\n", "[[-0.85901005]\n", " [ 1.74766702]\n", " [-1.02122441]\n", " [-1.70695945]\n", " [-0.94272842]\n", " [ 0.06743533]\n", " [ 1.11431616]\n", " [ 0.40769167]\n", " [ 1.19281215]]\n", "Reconstruction\n", "[[ 0.58456722 0.62942786]\n", " [-1.18930955 -1.28057909]\n", " [ 0.69495615 0.74828821]\n", " [ 1.16160753 1.25075117]\n", " [ 0.64153863 0.69077135]\n", " [-0.0458906 -0.04941232]\n", " [-0.75830626 -0.81649992]\n", " [-0.27743934 -0.29873049]\n", " [-0.81172378 -0.87401678]]\n", "Difference\n", "[[0.11543278 0.10720564]\n", " [0.11069045 0.10280131]\n", " [0.29495615 0.27393401]\n", " [0.13839247 0.12852895]\n", " [0.14153863 0.13145088]\n", " [0.2458906 0.22836546]\n", " [0.04169374 0.03872214]\n", " [0.02256066 0.02095271]\n", " [0.11172378 0.10376099]]\n" ] } ], "source": [ "# Computing X coming from only 1st eigen vector\n", "Z_pca = X @ eigen_vectors[:,1]\n", "Z_pca = Z_pca.reshape([Z_pca.shape[0], 1])\n", "\n", "print(Z_pca.shape)\n", "\n", "\n", "# X reconstructed\n", "eigen_v = (eigen_vectors[:, 1].reshape([eigen_vectors[:, 1].shape[0], 1]))\n", "X_rec = Z_pca @ eigen_v.T\n", "\n", "print(\"Compressed\")\n", "print(Z_pca)\n", "\n", "print(\"Reconstruction\")\n", "print(X_rec)\n", "\n", "print(\"Difference\")\n", "print(abs(X - X_rec))" ] } ], "metadata": { "kernelspec": { "display_name": "deep_learning", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.7" } }, "nbformat": 4, "nbformat_minor": 5 }