# Gaussian Mixture Models: in-class demonstration

In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.mixture import GaussianMixture

## Generate toy data
We are going to generate a toy dataset to work with using sklearn's `make_blobs` function. We then add some variance the data. 

In [None]:
X, y = make_blobs(n_samples=1500, random_state=163, centers=3)

# this "stretches" the data
transformation = [[0, .50], [.4, 0.85]]
X = np.dot(X, transformation)

# plot the data
plt.scatter(X[:, 0], X[:, 1])

## Attempt k-means clustering

In [None]:
y_pred = KMeans(n_clusters=3, random_state=160).fit_predict(X)

In [None]:
plt.subplot()
plt.scatter(X[:, 0], X[:, 1], c=y_pred)

## Cluster using a GMM

In [None]:
gmm = GaussianMixture(n_components=3).fit(X)
labels = gmm.predict(X)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=labels, s=40, cmap='viridis');

## Plot AIC and BIC
Let's plot the AIC and BIC to determine whether we chose the right number of clusters.

In [None]:
n_components = np.arange(1, 10)
models = [GaussianMixture(n, covariance_type='full', random_state=0).fit(X)
          for n in n_components]

plt.plot(n_components, [m.bic(X) for m in models], label='BIC')
plt.plot(n_components, [m.aic(X) for m in models], label='AIC')
plt.legend(loc='best')
plt.xlabel('n_components');