import math
import numpy as np
import pandas as pd
from scipy.stats import entropy
Information Measures
A list of information measure formulas [1].
# generate random samples
= np.random.default_rng()
rng = rng.choice(['strongly agree', 'agree', 'neither agree nor disagree', 'disagree', 'strongly disagree'],
x = (1000,1),
size =[0.1, 0.3, 0.2, 0.3, 0.1])
p= rng.choice(['female', 'male'],size = (1000,1))
y = np.concatenate((x,y), axis=1)
data = pd.DataFrame(data, columns=['X','Y'])
df df.head()
X | Y | |
---|---|---|
0 | neither agree nor disagree | male |
1 | strongly disagree | female |
2 | disagree | female |
3 | agree | female |
4 | neither agree nor disagree | female |
# probablities of X
= df.X.value_counts(normalize=True)
P_X print(P_X)
X
agree 0.313
disagree 0.286
neither agree nor disagree 0.198
strongly disagree 0.102
strongly agree 0.101
Name: proportion, dtype: float64
# probablities of Y
= df.Y.value_counts(normalize=True)
P_Y print(P_Y)
Y
male 0.509
female 0.491
Name: proportion, dtype: float64
# joint probabilities of X, Y
= df.value_counts(['X','Y'], normalize=True, sort=False)
P_XY print(P_XY)
X Y
agree female 0.149
male 0.164
disagree female 0.144
male 0.142
neither agree nor disagree female 0.095
male 0.103
strongly agree female 0.050
male 0.051
strongly disagree female 0.053
male 0.049
Name: proportion, dtype: float64
Entropy
Given a discrete random variable \(X\), the entropy of \(X\) is:
\[ \begin{equation} \begin{split} H(X) & = - \sum_{x \in \mathcal{X}} p(x) \log p(x) \end{split} \end{equation} \]
# calculation using the above formula
= -sum(P_X * np.log2(P_X))
H_X = -sum(P_Y * np.log2(P_Y))
H_Y
print(H_X, H_Y)
2.1736058278387715 0.9997662707810439
# verify by scipy.stats.entropy
print(math.isclose(H_X, entropy(P_X, base=2)))
print(math.isclose(H_Y, entropy(P_Y, base=2)))
True
True
Joint and Conditional Entropy
Given two discrete random variables \(X, Y\), the joint entropy of \(X\) and \(Y\) is:
\[ \begin{equation} \begin{split} H(X,Y) & = -\sum_{x\in \mathcal{X}, y\in \mathcal{Y}} p(x,y)\log p(x,y) \end{split} \end{equation} \]
# calculation using the above formula
= -sum(P_XY * np.log2(P_XY))
H_XY print(H_XY)
3.172723449704413
# verify by scipy.stats.entropy method
=2)) math.isclose(H_XY, entropy(P_XY, base
True
The conditional entropy of Y given X is:
\[ \begin{equation} \begin{split} H(Y|X) & =-\sum_{x\in \mathcal{X}, y\in \mathcal{Y}} p(x,y) \log p(y|x) \end{split} \end{equation} \]
# compute using the formula; replace p(y|x) with p(x,y)/p(x)
= -sum(P_XY[x,y] * np.log2(P_XY[x,y]/P_X[x])
H_Y_X for x in df.X.unique() for y in df.Y.unique())
print(H_Y_X)
0.999117621865641
The chain rule of entropy,
\[ \begin{equation} \begin{split} H(X,Y) & = H(X|Y) + H(Y) \\ & = H(Y|X) + H(X) \\ \end{split} \end{equation} \]
# verify H(X,Y) = H(X|Y) + H(Y)
+ H_X) math.isclose(H_XY, H_Y_X
True
Mutual Information
\[ \begin{equation} \begin{split} I(X,Y) & = \sum_{x \in \mathcal{X}, y \in \mathcal{Y}} p(x,y) \log \frac{p(x,y)}{p(x)p(y)} \\ & = H(X) + H(Y) - H(X,Y) \\ & = H(X) - H(X|Y) \\ & = H(Y) - H(Y|X) \\ & = H(X,Y) - H(X|Y) - H(Y|X) \end{split} \end{equation} \]
# using the probability formula
= sum(P_XY[x,y] * np.log2(P_XY[x,y] / (P_X[x] * P_Y[y]))
I_XY for x in df.X.unique() for y in df.Y.unique())
print(I_XY)
0.0006486489154028196
+ H_Y - H_XY) math.isclose(I_XY, H_X
True
- H_Y_X) math.isclose(I_XY, H_Y
True