Revised Optimization Notes
This commit is contained in:
199
Chapters/15-Appendix-A/python-experiments/pca.ipynb
Normal file
199
Chapters/15-Appendix-A/python-experiments/pca.ipynb
Normal file
@@ -0,0 +1,199 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8c14ea22",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Computing PCA\n",
|
||||
"\n",
|
||||
"Here I'll be taking data from [Geeks4Geeks](https://www.geeksforgeeks.org/machine-learning/mathematical-approach-to-pca/)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "0b32eb5c",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[1.8 1.87777778]\n",
|
||||
"[[ 0.7 0.52222222]\n",
|
||||
" [-1.3 -1.17777778]\n",
|
||||
" [ 0.4 1.02222222]\n",
|
||||
" [ 1.3 1.12222222]\n",
|
||||
" [ 0.5 0.82222222]\n",
|
||||
" [ 0.2 -0.27777778]\n",
|
||||
" [-0.8 -0.77777778]\n",
|
||||
" [-0.3 -0.27777778]\n",
|
||||
" [-0.7 -0.97777778]]\n",
|
||||
"[[0.6925 0.68875 ]\n",
|
||||
" [0.68875 0.79444444]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"\n",
|
||||
"X : np.ndarray = np.array([\n",
|
||||
" [2.5, 2.4],\n",
|
||||
" [0.5, 0.7],\n",
|
||||
" [2.2, 2.9],\n",
|
||||
" [3.1, 3.0],\n",
|
||||
" [2.3, 2.7],\n",
|
||||
" [2.0, 1.6],\n",
|
||||
" [1.0, 1.1],\n",
|
||||
" [1.5, 1.6],\n",
|
||||
" [1.1, 0.9]\n",
|
||||
"])\n",
|
||||
"\n",
|
||||
"# Compute mean values for features\n",
|
||||
"mu_X = np.mean(X, 0)\n",
|
||||
"\n",
|
||||
"print(mu_X)\n",
|
||||
"# \"Normalize\" Features\n",
|
||||
"X = X - mu_X\n",
|
||||
"print(X)\n",
|
||||
"\n",
|
||||
"# Compute covariance matrix applying\n",
|
||||
"# Bessel's correction (n-1) instead of n\n",
|
||||
"Cov = (X.T @ X) / (X.shape[0] - 1)\n",
|
||||
"\n",
|
||||
"print(Cov)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "78e9429f",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"As you can notice, we did $X^T \\times X$ instead of $X \\times X^T$. This is because our \n",
|
||||
"dataset had datapoints over rows instead of features."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 84,
|
||||
"id": "f93b7a92",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[0.05283865 1.43410579]\n",
|
||||
"[[-0.73273632 -0.68051267]\n",
|
||||
" [ 0.68051267 -0.73273632]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Computing eigenvalues\n",
|
||||
"eigen = np.linalg.eig(Cov)\n",
|
||||
"eigen_values = eigen.eigenvalues\n",
|
||||
"eigen_vectors = eigen.eigenvectors\n",
|
||||
"\n",
|
||||
"print(eigen_values)\n",
|
||||
"print(eigen_vectors)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bfbdd9c3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Now we'll generate the new X matrix by only using the first eigen vector"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 85,
|
||||
"id": "7ce6c540",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"(9, 1)\n",
|
||||
"Compressed\n",
|
||||
"[[-0.85901005]\n",
|
||||
" [ 1.74766702]\n",
|
||||
" [-1.02122441]\n",
|
||||
" [-1.70695945]\n",
|
||||
" [-0.94272842]\n",
|
||||
" [ 0.06743533]\n",
|
||||
" [ 1.11431616]\n",
|
||||
" [ 0.40769167]\n",
|
||||
" [ 1.19281215]]\n",
|
||||
"Reconstruction\n",
|
||||
"[[ 0.58456722 0.62942786]\n",
|
||||
" [-1.18930955 -1.28057909]\n",
|
||||
" [ 0.69495615 0.74828821]\n",
|
||||
" [ 1.16160753 1.25075117]\n",
|
||||
" [ 0.64153863 0.69077135]\n",
|
||||
" [-0.0458906 -0.04941232]\n",
|
||||
" [-0.75830626 -0.81649992]\n",
|
||||
" [-0.27743934 -0.29873049]\n",
|
||||
" [-0.81172378 -0.87401678]]\n",
|
||||
"Difference\n",
|
||||
"[[0.11543278 0.10720564]\n",
|
||||
" [0.11069045 0.10280131]\n",
|
||||
" [0.29495615 0.27393401]\n",
|
||||
" [0.13839247 0.12852895]\n",
|
||||
" [0.14153863 0.13145088]\n",
|
||||
" [0.2458906 0.22836546]\n",
|
||||
" [0.04169374 0.03872214]\n",
|
||||
" [0.02256066 0.02095271]\n",
|
||||
" [0.11172378 0.10376099]]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Computing X coming from only 1st eigen vector\n",
|
||||
"Z_pca = X @ eigen_vectors[:,1]\n",
|
||||
"Z_pca = Z_pca.reshape([Z_pca.shape[0], 1])\n",
|
||||
"\n",
|
||||
"print(Z_pca.shape)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"# X reconstructed\n",
|
||||
"eigen_v = (eigen_vectors[:, 1].reshape([eigen_vectors[:, 1].shape[0], 1]))\n",
|
||||
"X_rec = Z_pca @ eigen_v.T\n",
|
||||
"\n",
|
||||
"print(\"Compressed\")\n",
|
||||
"print(Z_pca)\n",
|
||||
"\n",
|
||||
"print(\"Reconstruction\")\n",
|
||||
"print(X_rec)\n",
|
||||
"\n",
|
||||
"print(\"Difference\")\n",
|
||||
"print(abs(X - X_rec))"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "deep_learning",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.13.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
Reference in New Issue
Block a user