import math
import numpy as np
import pandas as pd
from scipy.stats import entropyInformation Measures
A list of information measure formulas [1].
# generate random samples
rng = np.random.default_rng()
x = rng.choice(['strongly agree', 'agree', 'neither agree nor disagree', 'disagree', 'strongly disagree'],
size = (1000,1),
p=[0.1, 0.3, 0.2, 0.3, 0.1])
y = rng.choice(['female', 'male'],size = (1000,1))
data = np.concatenate((x,y), axis=1)
df = pd.DataFrame(data, columns=['X','Y'])
df.head()| X | Y | |
|---|---|---|
| 0 | neither agree nor disagree | male |
| 1 | strongly disagree | female |
| 2 | disagree | female |
| 3 | agree | female |
| 4 | neither agree nor disagree | female |
# probablities of X
P_X = df.X.value_counts(normalize=True)
print(P_X)X
agree 0.313
disagree 0.286
neither agree nor disagree 0.198
strongly disagree 0.102
strongly agree 0.101
Name: proportion, dtype: float64
# probablities of Y
P_Y = df.Y.value_counts(normalize=True)
print(P_Y)Y
male 0.509
female 0.491
Name: proportion, dtype: float64
# joint probabilities of X, Y
P_XY = df.value_counts(['X','Y'], normalize=True, sort=False)
print(P_XY)X Y
agree female 0.149
male 0.164
disagree female 0.144
male 0.142
neither agree nor disagree female 0.095
male 0.103
strongly agree female 0.050
male 0.051
strongly disagree female 0.053
male 0.049
Name: proportion, dtype: float64
Entropy
Given a discrete random variable \(X\), the entropy of \(X\) is:
\[ \begin{equation} \begin{split} H(X) & = - \sum_{x \in \mathcal{X}} p(x) \log p(x) \end{split} \end{equation} \]
# calculation using the above formula
H_X = -sum(P_X * np.log2(P_X))
H_Y = -sum(P_Y * np.log2(P_Y))
print(H_X, H_Y)2.1736058278387715 0.9997662707810439
# verify by scipy.stats.entropy
print(math.isclose(H_X, entropy(P_X, base=2)))
print(math.isclose(H_Y, entropy(P_Y, base=2)))True
True
Joint and Conditional Entropy
Given two discrete random variables \(X, Y\), the joint entropy of \(X\) and \(Y\) is:
\[ \begin{equation} \begin{split} H(X,Y) & = -\sum_{x\in \mathcal{X}, y\in \mathcal{Y}} p(x,y)\log p(x,y) \end{split} \end{equation} \]
# calculation using the above formula
H_XY = -sum(P_XY * np.log2(P_XY))
print(H_XY)3.172723449704413
# verify by scipy.stats.entropy method
math.isclose(H_XY, entropy(P_XY, base=2))True
The conditional entropy of Y given X is:
\[ \begin{equation} \begin{split} H(Y|X) & =-\sum_{x\in \mathcal{X}, y\in \mathcal{Y}} p(x,y) \log p(y|x) \end{split} \end{equation} \]
# compute using the formula; replace p(y|x) with p(x,y)/p(x)
H_Y_X = -sum(P_XY[x,y] * np.log2(P_XY[x,y]/P_X[x])
for x in df.X.unique() for y in df.Y.unique())
print(H_Y_X)0.999117621865641
The chain rule of entropy,
\[ \begin{equation} \begin{split} H(X,Y) & = H(X|Y) + H(Y) \\ & = H(Y|X) + H(X) \\ \end{split} \end{equation} \]
# verify H(X,Y) = H(X|Y) + H(Y)
math.isclose(H_XY, H_Y_X + H_X)True
Mutual Information
\[ \begin{equation} \begin{split} I(X,Y) & = \sum_{x \in \mathcal{X}, y \in \mathcal{Y}} p(x,y) \log \frac{p(x,y)}{p(x)p(y)} \\ & = H(X) + H(Y) - H(X,Y) \\ & = H(X) - H(X|Y) \\ & = H(Y) - H(Y|X) \\ & = H(X,Y) - H(X|Y) - H(Y|X) \end{split} \end{equation} \]
# using the probability formula
I_XY = sum(P_XY[x,y] * np.log2(P_XY[x,y] / (P_X[x] * P_Y[y]))
for x in df.X.unique() for y in df.Y.unique())
print(I_XY)0.0006486489154028196
math.isclose(I_XY, H_X + H_Y - H_XY)True
math.isclose(I_XY, H_Y - H_Y_X)True