import scipy
from scipy.stats import chisquare, chi2_contingency
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline


ranges = [(40, 49), (50, 59), (60, 69), (70, 79), (80, 89), (90, 99), (100, 109), (110, 119)]

f_obs = [57, 330, 2132, 4584, 4604, 2119, 659, 251]

df = pd.DataFrame(index=list(range(len(f_obs))), 
                  columns=['Group','Observed Frequency'], 
                  data=list(zip(ranges, f_obs)))
df


df['Group Average'] = df['Group'].apply(np.mean)
df.head()


# plot just to see if it resembles the normal distribution at all
df.plot.bar(x='Group Average', y='Observed Frequency')

<AxesSubplot:xlabel='Group Average'>


## calculate average and standard deviation
data = np.repeat(a=df['Group Average'], 
                 repeats=df['Observed Frequency']) # create frequency count data
u = np.mean(data)
s = np.std(data)
print(u, s)

80.51248642779588 12.125199950491224


# calculate total samples
n = df['Observed Frequency'].sum()

# create normal distribution from f_obs mean and std. dev. 
norm = scipy.stats.norm(u, s)


# plot the modeled normal distribution pdf based on mean and standard deviation of sample data
r = norm.rvs(size=10000)
plt.hist(r, bins=50, density=True);


# calculate expected frequency and chi-squared test statistic
df['Expected Frequency'] = df.apply(lambda x: (norm.cdf(x['Group'][1]) - norm.cdf(x['Group'][0])) * n , axis=1)


# calculate degrees of freedom 
dof = len(df) - 2 - 1 # g - k - 1, where g = cell count, k = parameters (two: mean and std. dev)

# calculate chi-squared test statistic and associated p-value
from scipy.stats import chisquare
chisq, pvalue = chisquare(f_obs=df['Observed Frequency'], f_exp=df['Expected Frequency'], ddof=dof)

print(chisq, pvalue)

564.6737684529562 2.413510222813826e-123

	Group	Observed Frequency
0	(40, 49)	57
1	(50, 59)	330
2	(60, 69)	2132
3	(70, 79)	4584
4	(80, 89)	4604
5	(90, 99)	2119
6	(100, 109)	659
7	(110, 119)	251

	Group	Observed Frequency	Group Average
0	(40, 49)	57	44.5
1	(50, 59)	330	54.5
2	(60, 69)	2132	64.5
3	(70, 79)	4584	74.5
4	(80, 89)	4604	84.5

Test for goodness of fit of the normal-probability model¶