200 lines
4.7 KiB
Plaintext
200 lines
4.7 KiB
Plaintext
|
|
{
|
||
|
|
"cells": [
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"id": "8c14ea22",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"# Computing PCA\n",
|
||
|
|
"\n",
|
||
|
|
"Here I'll be taking data from [Geeks4Geeks](https://www.geeksforgeeks.org/machine-learning/mathematical-approach-to-pca/)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": null,
|
||
|
|
"id": "0b32eb5c",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"[1.8 1.87777778]\n",
|
||
|
|
"[[ 0.7 0.52222222]\n",
|
||
|
|
" [-1.3 -1.17777778]\n",
|
||
|
|
" [ 0.4 1.02222222]\n",
|
||
|
|
" [ 1.3 1.12222222]\n",
|
||
|
|
" [ 0.5 0.82222222]\n",
|
||
|
|
" [ 0.2 -0.27777778]\n",
|
||
|
|
" [-0.8 -0.77777778]\n",
|
||
|
|
" [-0.3 -0.27777778]\n",
|
||
|
|
" [-0.7 -0.97777778]]\n",
|
||
|
|
"[[0.6925 0.68875 ]\n",
|
||
|
|
" [0.68875 0.79444444]]\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"import numpy as np\n",
|
||
|
|
"\n",
|
||
|
|
"X : np.ndarray = np.array([\n",
|
||
|
|
" [2.5, 2.4],\n",
|
||
|
|
" [0.5, 0.7],\n",
|
||
|
|
" [2.2, 2.9],\n",
|
||
|
|
" [3.1, 3.0],\n",
|
||
|
|
" [2.3, 2.7],\n",
|
||
|
|
" [2.0, 1.6],\n",
|
||
|
|
" [1.0, 1.1],\n",
|
||
|
|
" [1.5, 1.6],\n",
|
||
|
|
" [1.1, 0.9]\n",
|
||
|
|
"])\n",
|
||
|
|
"\n",
|
||
|
|
"# Compute mean values for features\n",
|
||
|
|
"mu_X = np.mean(X, 0)\n",
|
||
|
|
"\n",
|
||
|
|
"print(mu_X)\n",
|
||
|
|
"# \"Normalize\" Features\n",
|
||
|
|
"X = X - mu_X\n",
|
||
|
|
"print(X)\n",
|
||
|
|
"\n",
|
||
|
|
"# Compute covariance matrix applying\n",
|
||
|
|
"# Bessel's correction (n-1) instead of n\n",
|
||
|
|
"Cov = (X.T @ X) / (X.shape[0] - 1)\n",
|
||
|
|
"\n",
|
||
|
|
"print(Cov)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"id": "78e9429f",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"As you can notice, we did $X^T \\times X$ instead of $X \\times X^T$. This is because our \n",
|
||
|
|
"dataset had datapoints over rows instead of features."
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 84,
|
||
|
|
"id": "f93b7a92",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"[0.05283865 1.43410579]\n",
|
||
|
|
"[[-0.73273632 -0.68051267]\n",
|
||
|
|
" [ 0.68051267 -0.73273632]]\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Computing eigenvalues\n",
|
||
|
|
"eigen = np.linalg.eig(Cov)\n",
|
||
|
|
"eigen_values = eigen.eigenvalues\n",
|
||
|
|
"eigen_vectors = eigen.eigenvectors\n",
|
||
|
|
"\n",
|
||
|
|
"print(eigen_values)\n",
|
||
|
|
"print(eigen_vectors)"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "markdown",
|
||
|
|
"id": "bfbdd9c3",
|
||
|
|
"metadata": {},
|
||
|
|
"source": [
|
||
|
|
"Now we'll generate the new X matrix by only using the first eigen vector"
|
||
|
|
]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"cell_type": "code",
|
||
|
|
"execution_count": 85,
|
||
|
|
"id": "7ce6c540",
|
||
|
|
"metadata": {},
|
||
|
|
"outputs": [
|
||
|
|
{
|
||
|
|
"name": "stdout",
|
||
|
|
"output_type": "stream",
|
||
|
|
"text": [
|
||
|
|
"(9, 1)\n",
|
||
|
|
"Compressed\n",
|
||
|
|
"[[-0.85901005]\n",
|
||
|
|
" [ 1.74766702]\n",
|
||
|
|
" [-1.02122441]\n",
|
||
|
|
" [-1.70695945]\n",
|
||
|
|
" [-0.94272842]\n",
|
||
|
|
" [ 0.06743533]\n",
|
||
|
|
" [ 1.11431616]\n",
|
||
|
|
" [ 0.40769167]\n",
|
||
|
|
" [ 1.19281215]]\n",
|
||
|
|
"Reconstruction\n",
|
||
|
|
"[[ 0.58456722 0.62942786]\n",
|
||
|
|
" [-1.18930955 -1.28057909]\n",
|
||
|
|
" [ 0.69495615 0.74828821]\n",
|
||
|
|
" [ 1.16160753 1.25075117]\n",
|
||
|
|
" [ 0.64153863 0.69077135]\n",
|
||
|
|
" [-0.0458906 -0.04941232]\n",
|
||
|
|
" [-0.75830626 -0.81649992]\n",
|
||
|
|
" [-0.27743934 -0.29873049]\n",
|
||
|
|
" [-0.81172378 -0.87401678]]\n",
|
||
|
|
"Difference\n",
|
||
|
|
"[[0.11543278 0.10720564]\n",
|
||
|
|
" [0.11069045 0.10280131]\n",
|
||
|
|
" [0.29495615 0.27393401]\n",
|
||
|
|
" [0.13839247 0.12852895]\n",
|
||
|
|
" [0.14153863 0.13145088]\n",
|
||
|
|
" [0.2458906 0.22836546]\n",
|
||
|
|
" [0.04169374 0.03872214]\n",
|
||
|
|
" [0.02256066 0.02095271]\n",
|
||
|
|
" [0.11172378 0.10376099]]\n"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"source": [
|
||
|
|
"# Computing X coming from only 1st eigen vector\n",
|
||
|
|
"Z_pca = X @ eigen_vectors[:,1]\n",
|
||
|
|
"Z_pca = Z_pca.reshape([Z_pca.shape[0], 1])\n",
|
||
|
|
"\n",
|
||
|
|
"print(Z_pca.shape)\n",
|
||
|
|
"\n",
|
||
|
|
"\n",
|
||
|
|
"# X reconstructed\n",
|
||
|
|
"eigen_v = (eigen_vectors[:, 1].reshape([eigen_vectors[:, 1].shape[0], 1]))\n",
|
||
|
|
"X_rec = Z_pca @ eigen_v.T\n",
|
||
|
|
"\n",
|
||
|
|
"print(\"Compressed\")\n",
|
||
|
|
"print(Z_pca)\n",
|
||
|
|
"\n",
|
||
|
|
"print(\"Reconstruction\")\n",
|
||
|
|
"print(X_rec)\n",
|
||
|
|
"\n",
|
||
|
|
"print(\"Difference\")\n",
|
||
|
|
"print(abs(X - X_rec))"
|
||
|
|
]
|
||
|
|
}
|
||
|
|
],
|
||
|
|
"metadata": {
|
||
|
|
"kernelspec": {
|
||
|
|
"display_name": "deep_learning",
|
||
|
|
"language": "python",
|
||
|
|
"name": "python3"
|
||
|
|
},
|
||
|
|
"language_info": {
|
||
|
|
"codemirror_mode": {
|
||
|
|
"name": "ipython",
|
||
|
|
"version": 3
|
||
|
|
},
|
||
|
|
"file_extension": ".py",
|
||
|
|
"mimetype": "text/x-python",
|
||
|
|
"name": "python",
|
||
|
|
"nbconvert_exporter": "python",
|
||
|
|
"pygments_lexer": "ipython3",
|
||
|
|
"version": "3.13.7"
|
||
|
|
}
|
||
|
|
},
|
||
|
|
"nbformat": 4,
|
||
|
|
"nbformat_minor": 5
|
||
|
|
}
|