# Computing PCA

Here I'll be taking data from [Geeks4Geeks](https://www.geeksforgeeks.org/machine-learning/mathematical-approach-to-pca/)

In [None]:
import numpy as np

X : np.ndarray = np.array([
 [2.5, 2.4],
 [0.5, 0.7],
 [2.2, 2.9],
 [3.1, 3.0],
 [2.3, 2.7],
 [2.0, 1.6],
 [1.0, 1.1],
 [1.5, 1.6],
 [1.1, 0.9]
])

# Compute mean values for features
mu_X = np.mean(X, 0)

print(mu_X)
# "Normalize" Features
X = X - mu_X
print(X)

# Compute covariance matrix applying
# Bessel's correction (n-1) instead of n
Cov = (X.T @ X) / (X.shape[0] - 1)

print(Cov)

[1.8 1.87777778]
[[ 0.7 0.52222222]
 [-1.3 -1.17777778]
 [ 0.4 1.02222222]
 [ 1.3 1.12222222]
 [ 0.5 0.82222222]
 [ 0.2 -0.27777778]
 [-0.8 -0.77777778]
 [-0.3 -0.27777778]
 [-0.7 -0.97777778]]
[[0.6925 0.68875 ]
 [0.68875 0.79444444]]


As you can notice, we did $X^T \times X$ instead of $X \times X^T$. This is because our 
dataset had datapoints over rows instead of features.

In [84]:
# Computing eigenvalues
eigen = np.linalg.eig(Cov)
eigen_values = eigen.eigenvalues
eigen_vectors = eigen.eigenvectors

print(eigen_values)
print(eigen_vectors)

[0.05283865 1.43410579]
[[-0.73273632 -0.68051267]
 [ 0.68051267 -0.73273632]]


Now we'll generate the new X matrix by only using the first eigen vector

In [85]:
# Computing X coming from only 1st eigen vector
Z_pca = X @ eigen_vectors[:,1]
Z_pca = Z_pca.reshape([Z_pca.shape[0], 1])

print(Z_pca.shape)


# X reconstructed
eigen_v = (eigen_vectors[:, 1].reshape([eigen_vectors[:, 1].shape[0], 1]))
X_rec = Z_pca @ eigen_v.T

print("Compressed")
print(Z_pca)

print("Reconstruction")
print(X_rec)

print("Difference")
print(abs(X - X_rec))

(9, 1)
Compressed
[[-0.85901005]
 [ 1.74766702]
 [-1.02122441]
 [-1.70695945]
 [-0.94272842]
 [ 0.06743533]
 [ 1.11431616]
 [ 0.40769167]
 [ 1.19281215]]
Reconstruction
[[ 0.58456722 0.62942786]
 [-1.18930955 -1.28057909]
 [ 0.69495615 0.74828821]
 [ 1.16160753 1.25075117]
 [ 0.64153863 0.69077135]
 [-0.0458906 -0.04941232]
 [-0.75830626 -0.81649992]
 [-0.27743934 -0.29873049]
 [-0.81172378 -0.87401678]]
Difference
[[0.11543278 0.10720564]
 [0.11069045 0.10280131]
 [0.29495615 0.27393401]
 [0.13839247 0.12852895]
 [0.14153863 0.13145088]
 [0.2458906 0.22836546]
 [0.04169374 0.03872214]
 [0.02256066 0.02095271]
 [0.11172378 0.10376099]]
