Module stikpetP.effect_sizes.eff_size_post_hoc_gof
Expand source code
from math import asin, log
import pandas as pd
def es_post_hoc_gof(post_hoc_results, es = "auto", bergsma=False):
'''
Effect Sizes for a Goodness-of-Fit Post-Hoc Analysis
----------------------------------------------------
Determines an effect size for each test (row) from the results of ph_pairwise_bin(), ph_pairwise_gof(), ph_residual_bin(), or ph_residual_gof().
The function is shown in this [YouTube video](https://youtu.be/u4rO00xdj7Q) and described at [PeterStatistics.com](https://peterstatistics.com/Terms/Tests/PostHocAfterGoF.html).
Parameters
----------
post_hoc_results : dataframe
the result of either ph_pairwise_bin(), ph_pairwise_gof(), ph_residual_bin(), or ph_residual_gof()
es : {'auto', 'coheng', 'cohenh', 'ar', 'cramerv', 'cohenw', 'jbme', 'fei', 'rosenthal'}, optional
the effect size to determine
bergsma : boolean, optional
use of Bergsma correction, only for Cramér V
Returns
-------
pandas.DataFrame
A dataframe with the following columns:
* for residual post-hoc
* *category*, the label of the category
* *name effect size*, the effect size value
* for pairwise post-hoc
* *category 1*, the label of the first category
* *category 2*, the label of the second category
* *name effect size*, the effect size value
Notes
-----
'auto' will use Cohen h for exact tests, Rosenthal correlation for z-tests and Cramér's V otherwise.
Cohen g ('coheng'), Cohen h ('cohenh') and Alternative Ratio ('ar') can all be used for any test.
Cramér V ('cramerv'), Cohen w ('cohenw'), Johnston-Berry-Mielke E ('jbme'), and Fei ('fei') can be used with chi-square tests (or likelihood ratio tests)
The Rosenthal Correlation ('rosenthal') can be used with a z-test (proportion/Wald/score/residual).
See the separate functions for each of these for details on the calculations.
Before, After and Alternatives
------------------------------
Before this a post-hoc test might be helpful:
* [ph_pairwise_bin](../other/poho_pairwise_bin.html#ph_pairwise_bin) for Pairwise Binary Test
* [ph_pairwise_gof](../other/poho_pairwise_gof.html#ph_pairwise_gof) for Pairwise Goodness-of-Fit Tests
* [ph_residual_gof_bin](../other/poho_residual_gof_bin.html#ph_residual_gof_bin) for Residuals Tests using Binary tests
* [ph_residual_gof_gof](../other/poho_residual_gof_gof.html#ph_residual_gof_gof) for Residuals Using Goodness-of-Fit Tests
After this you might want to use a rule-of-thumb for the interpretation:
* [th_post_hoc_gof](../other/thumb_post_hoc_gof.html#th_post_hoc_gof) for various rules-of-thumb
Effect size in this function:
* [es_cohen_g](../effect_sizes/eff_size_cohen_g.html#es_cohen_g) for Cohen g
* [es_cohen_h_os](../effect_sizes/eff_size_cohen_h_os.html#es_cohen_h_os) for Cohen h'
* [es_alt_ratio](../effect_sizes/eff_size_alt_ratio.html#es_alt_ratio) for Alternative Ratio
* [r_rosenthal](../correlations/cor_rosenthal.html#r_rosenthal) for Rosenthal Correlation if a z-value is available
* [es_cramer_v_gof](../effect_sizes/eff_size_cramer_v_gof.html#es_cramer_v_gof) for Cramer's V for Goodness-of-Fit
* [es_cohen_w](../effect_sizes/eff_size_cohen_w.html#es_cohen_w) for Cohen's w
* [es_jbm_e](../effect_sizes/eff_size_jbm_e.html#es_jbm_e) for Johnston-Berry-Mielke E
* [es_fei](../effect_sizes/eff_size_fei.html#es_fei) for Fei
note: the effect size functions are not used themselves in this function, but the same formulas are used.
Author
------
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076
Example
--------
Import pandas, the datafile and select a nominal field
>>> import pandas as pd
>>> gss_df = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'});
>>> nominal_field = gss_df['mar1'];
Obtain the post-hoc test results
>>> from ..other.poho_pairwise_bin import ph_pairwise_bin
>>> post_hoc_test = ph_pairwise_bin(nominal_field, test='binomial');
Obtain the effect size:
>>> es_post_hoc_gof(post_hoc_test, es='cohenh')
category 1 category 2 Cohen h
0 MARRIED NEVER MARRIED 0.435752
1 MARRIED DIVORCED 0.537120
2 MARRIED WIDOWED 0.756027
3 MARRIED SEPARATED 1.015353
4 NEVER MARRIED DIVORCED 0.114495
5 NEVER MARRIED WIDOWED 0.380654
6 NEVER MARRIED SEPARATED 0.729728
7 DIVORCED WIDOWED 0.272030
8 DIVORCED SEPARATED 0.640959
9 WIDOWED SEPARATED 0.403139
'''
#rename the post-hoc results
df = post_hoc_results
#determine the number of tests in the post-hoc results
n_tests = len(df)
#get the description of the test used
if 'test' in df.columns:
test_used = df['test'][0]
else:
test_used = df['test used'][0]
#determine the type of test
if any(keyword in test_used for keyword in ['binomial', 'multinomial']):
ph_test = 'exact'
elif any(keyword in test_used for keyword in ['Wald', 'score', 'adjusted', 'standardized']):
ph_test = 'z-test'
elif any(keyword in test_used for keyword in ['G test', 'likelihood']):
ph_test = 'likelihood-test'
else:
ph_test = 'chi2-test'
#find the name of the test-statistic column
if ph_test!='exact':
if 'z-statistic' in df.columns:
stat_col = 'z-statistic'
else:
stat_col = 'statistic'
#label for effect size
es_labels = {'coheng':'Cohen g',
'cohenh':'Cohen h',
'ar':'alternative ratio',
'cramerv':'Cramér V',
'cohenw':'Cohen w',
'jbme':'Johnston-Berry-Mielke E',
'fei':"Fei",
'rosenthal':'Rosenthal correlation'
}
#set the effect size measure if es='auto'
if es=='auto':
if ph_test=='exact':
es = 'cohenh'
elif ph_test=='z-test':
es = 'rosenthal'
else:
es= 'cramerv'
#determine if it was a pairwise or residual test
if 'category 1' in df.columns:
test_type = 'pairwise'
col_names = ['category 1', 'category 2', es_labels[es]]
res_col=2
else:
test_type = 'residual'
n = sum(df['obs. count'])
col_names = ['category', es_labels[es]]
res_col=1
#loop over each row (test)
results = pd.DataFrame()
for i in range(0, n_tests):
# find the observed and expected counts
if test_type == 'pairwise':
p_obs = df['obs. prop. 1'][i]
p_exp = df['exp. prop. 1'][i]
n_row = df['n1'][i] + df['n2'][i]
#add the two category names to the dataframe
results.at[i, 0] = df.iloc[i, 0]
results.at[i, 1] = df.iloc[i, 1]
else:
n_row = n
p_obs = df['obs. count'][i]/n_row
p_exp = df['exp. count'][i]/n_row
#add the category name to the dataframe
results.at[i, 0] = df.iloc[i, 0]
#for non-exact tests find the test-statistic value
if ph_test!='exact':
test_statistic = df[stat_col][i]
#determine the effect size
#effect sizes for without a test-statistic needed
if es=="coheng":
es_value = p_obs - 0.5
elif es=="cohenh":
es_value = 2*asin(p_obs**0.5) - 2*asin(p_exp**0.5)
elif es=="ar":
es_value = p_obs/p_exp
#effect sizes with a z-statistic
elif es=="rosenthal":
if ph_test == 'z-test':
es_value = df[stat_col][i]/(n_row**0.5)
else:
es_value = 'not possible with this post-hoc test'
#effect sizes with a chi-square statistic
elif es=='cramerv' or es=="cohenw":
if ph_test == 'chi2-test' or ph_test == 'likelihood-test':
es_value = (test_statistic/n_row)**0.5
if es=='cramerv' and bergsma:
phi2= test_statistic/n_row
phi2_tilde = max(0, phi2 - 1/(n_row-1))
es_value = (phi2_tilde/(2 - 1/(n_row-1)))**0.5
else:
es_value = 'not possible with this post-hoc test'
elif es=="jbme" or es=="fei":
if ph_test == 'chi2-test' or ph_test == 'likelihood-test':
if ph_test == 'chi2-test':
es_value = test_statistic*df['minExp'][i]/(n_row*(n_row - df['minExp'][i]))
else:
es_value = -1/log(df['minExp'][i]/n_row)*test_statistic/(2*n_row)
if es=="fei":
es_value= es_value**0.5
else:
es_value = 'not possible with this post-hoc test'
#add the effect size value to the dataframe
results.at[i, res_col] = es_value
#add the column names
results.columns = col_names
return results
Functions
def es_post_hoc_gof(post_hoc_results, es='auto', bergsma=False)
-
Effect Sizes for a Goodness-of-Fit Post-Hoc Analysis
Determines an effect size for each test (row) from the results of ph_pairwise_bin(), ph_pairwise_gof(), ph_residual_bin(), or ph_residual_gof().
The function is shown in this YouTube video and described at PeterStatistics.com.
Parameters
post_hoc_results
:dataframe
- the result of either ph_pairwise_bin(), ph_pairwise_gof(), ph_residual_bin(), or ph_residual_gof()
es
:{'auto', 'coheng', 'cohenh', 'ar', 'cramerv', 'cohenw', 'jbme', 'fei', 'rosenthal'}
, optional- the effect size to determine
bergsma
:boolean
, optional- use of Bergsma correction, only for Cramér V
Returns
pandas.DataFrame
-
A dataframe with the following columns:
- for residual post-hoc
- category, the label of the category
-
name effect size, the effect size value
-
for pairwise post-hoc
- category 1, the label of the first category
- category 2, the label of the second category
- name effect size, the effect size value
Notes
'auto' will use Cohen h for exact tests, Rosenthal correlation for z-tests and Cramér's V otherwise.
Cohen g ('coheng'), Cohen h ('cohenh') and Alternative Ratio ('ar') can all be used for any test.
Cramér V ('cramerv'), Cohen w ('cohenw'), Johnston-Berry-Mielke E ('jbme'), and Fei ('fei') can be used with chi-square tests (or likelihood ratio tests)
The Rosenthal Correlation ('rosenthal') can be used with a z-test (proportion/Wald/score/residual).
See the separate functions for each of these for details on the calculations.
Before, After and Alternatives
Before this a post-hoc test might be helpful: * ph_pairwise_bin for Pairwise Binary Test * ph_pairwise_gof for Pairwise Goodness-of-Fit Tests * ph_residual_gof_bin for Residuals Tests using Binary tests * ph_residual_gof_gof for Residuals Using Goodness-of-Fit Tests
After this you might want to use a rule-of-thumb for the interpretation: * th_post_hoc_gof for various rules-of-thumb
Effect size in this function: * es_cohen_g for Cohen g * es_cohen_h_os for Cohen h' * es_alt_ratio for Alternative Ratio * r_rosenthal for Rosenthal Correlation if a z-value is available * es_cramer_v_gof for Cramer's V for Goodness-of-Fit * es_cohen_w for Cohen's w * es_jbm_e for Johnston-Berry-Mielke E * es_fei for Fei
note: the effect size functions are not used themselves in this function, but the same formulas are used.
Author
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076Example
Import pandas, the datafile and select a nominal field
>>> import pandas as pd >>> gss_df = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'}); >>> nominal_field = gss_df['mar1'];
Obtain the post-hoc test results
>>> from ..other.poho_pairwise_bin import ph_pairwise_bin >>> post_hoc_test = ph_pairwise_bin(nominal_field, test='binomial');
Obtain the effect size:
>>> es_post_hoc_gof(post_hoc_test, es='cohenh') category 1 category 2 Cohen h 0 MARRIED NEVER MARRIED 0.435752 1 MARRIED DIVORCED 0.537120 2 MARRIED WIDOWED 0.756027 3 MARRIED SEPARATED 1.015353 4 NEVER MARRIED DIVORCED 0.114495 5 NEVER MARRIED WIDOWED 0.380654 6 NEVER MARRIED SEPARATED 0.729728 7 DIVORCED WIDOWED 0.272030 8 DIVORCED SEPARATED 0.640959 9 WIDOWED SEPARATED 0.403139
Expand source code
def es_post_hoc_gof(post_hoc_results, es = "auto", bergsma=False): ''' Effect Sizes for a Goodness-of-Fit Post-Hoc Analysis ---------------------------------------------------- Determines an effect size for each test (row) from the results of ph_pairwise_bin(), ph_pairwise_gof(), ph_residual_bin(), or ph_residual_gof(). The function is shown in this [YouTube video](https://youtu.be/u4rO00xdj7Q) and described at [PeterStatistics.com](https://peterstatistics.com/Terms/Tests/PostHocAfterGoF.html). Parameters ---------- post_hoc_results : dataframe the result of either ph_pairwise_bin(), ph_pairwise_gof(), ph_residual_bin(), or ph_residual_gof() es : {'auto', 'coheng', 'cohenh', 'ar', 'cramerv', 'cohenw', 'jbme', 'fei', 'rosenthal'}, optional the effect size to determine bergsma : boolean, optional use of Bergsma correction, only for Cramér V Returns ------- pandas.DataFrame A dataframe with the following columns: * for residual post-hoc * *category*, the label of the category * *name effect size*, the effect size value * for pairwise post-hoc * *category 1*, the label of the first category * *category 2*, the label of the second category * *name effect size*, the effect size value Notes ----- 'auto' will use Cohen h for exact tests, Rosenthal correlation for z-tests and Cramér's V otherwise. Cohen g ('coheng'), Cohen h ('cohenh') and Alternative Ratio ('ar') can all be used for any test. Cramér V ('cramerv'), Cohen w ('cohenw'), Johnston-Berry-Mielke E ('jbme'), and Fei ('fei') can be used with chi-square tests (or likelihood ratio tests) The Rosenthal Correlation ('rosenthal') can be used with a z-test (proportion/Wald/score/residual). See the separate functions for each of these for details on the calculations. Before, After and Alternatives ------------------------------ Before this a post-hoc test might be helpful: * [ph_pairwise_bin](../other/poho_pairwise_bin.html#ph_pairwise_bin) for Pairwise Binary Test * [ph_pairwise_gof](../other/poho_pairwise_gof.html#ph_pairwise_gof) for Pairwise Goodness-of-Fit Tests * [ph_residual_gof_bin](../other/poho_residual_gof_bin.html#ph_residual_gof_bin) for Residuals Tests using Binary tests * [ph_residual_gof_gof](../other/poho_residual_gof_gof.html#ph_residual_gof_gof) for Residuals Using Goodness-of-Fit Tests After this you might want to use a rule-of-thumb for the interpretation: * [th_post_hoc_gof](../other/thumb_post_hoc_gof.html#th_post_hoc_gof) for various rules-of-thumb Effect size in this function: * [es_cohen_g](../effect_sizes/eff_size_cohen_g.html#es_cohen_g) for Cohen g * [es_cohen_h_os](../effect_sizes/eff_size_cohen_h_os.html#es_cohen_h_os) for Cohen h' * [es_alt_ratio](../effect_sizes/eff_size_alt_ratio.html#es_alt_ratio) for Alternative Ratio * [r_rosenthal](../correlations/cor_rosenthal.html#r_rosenthal) for Rosenthal Correlation if a z-value is available * [es_cramer_v_gof](../effect_sizes/eff_size_cramer_v_gof.html#es_cramer_v_gof) for Cramer's V for Goodness-of-Fit * [es_cohen_w](../effect_sizes/eff_size_cohen_w.html#es_cohen_w) for Cohen's w * [es_jbm_e](../effect_sizes/eff_size_jbm_e.html#es_jbm_e) for Johnston-Berry-Mielke E * [es_fei](../effect_sizes/eff_size_fei.html#es_fei) for Fei note: the effect size functions are not used themselves in this function, but the same formulas are used. Author ------ Made by P. Stikker Companion website: https://PeterStatistics.com YouTube channel: https://www.youtube.com/stikpet Donations: https://www.patreon.com/bePatron?u=19398076 Example -------- Import pandas, the datafile and select a nominal field >>> import pandas as pd >>> gss_df = pd.read_csv('https://peterstatistics.com/Packages/ExampleData/GSS2012a.csv', sep=',', low_memory=False, storage_options={'User-Agent': 'Mozilla/5.0'}); >>> nominal_field = gss_df['mar1']; Obtain the post-hoc test results >>> from ..other.poho_pairwise_bin import ph_pairwise_bin >>> post_hoc_test = ph_pairwise_bin(nominal_field, test='binomial'); Obtain the effect size: >>> es_post_hoc_gof(post_hoc_test, es='cohenh') category 1 category 2 Cohen h 0 MARRIED NEVER MARRIED 0.435752 1 MARRIED DIVORCED 0.537120 2 MARRIED WIDOWED 0.756027 3 MARRIED SEPARATED 1.015353 4 NEVER MARRIED DIVORCED 0.114495 5 NEVER MARRIED WIDOWED 0.380654 6 NEVER MARRIED SEPARATED 0.729728 7 DIVORCED WIDOWED 0.272030 8 DIVORCED SEPARATED 0.640959 9 WIDOWED SEPARATED 0.403139 ''' #rename the post-hoc results df = post_hoc_results #determine the number of tests in the post-hoc results n_tests = len(df) #get the description of the test used if 'test' in df.columns: test_used = df['test'][0] else: test_used = df['test used'][0] #determine the type of test if any(keyword in test_used for keyword in ['binomial', 'multinomial']): ph_test = 'exact' elif any(keyword in test_used for keyword in ['Wald', 'score', 'adjusted', 'standardized']): ph_test = 'z-test' elif any(keyword in test_used for keyword in ['G test', 'likelihood']): ph_test = 'likelihood-test' else: ph_test = 'chi2-test' #find the name of the test-statistic column if ph_test!='exact': if 'z-statistic' in df.columns: stat_col = 'z-statistic' else: stat_col = 'statistic' #label for effect size es_labels = {'coheng':'Cohen g', 'cohenh':'Cohen h', 'ar':'alternative ratio', 'cramerv':'Cramér V', 'cohenw':'Cohen w', 'jbme':'Johnston-Berry-Mielke E', 'fei':"Fei", 'rosenthal':'Rosenthal correlation' } #set the effect size measure if es='auto' if es=='auto': if ph_test=='exact': es = 'cohenh' elif ph_test=='z-test': es = 'rosenthal' else: es= 'cramerv' #determine if it was a pairwise or residual test if 'category 1' in df.columns: test_type = 'pairwise' col_names = ['category 1', 'category 2', es_labels[es]] res_col=2 else: test_type = 'residual' n = sum(df['obs. count']) col_names = ['category', es_labels[es]] res_col=1 #loop over each row (test) results = pd.DataFrame() for i in range(0, n_tests): # find the observed and expected counts if test_type == 'pairwise': p_obs = df['obs. prop. 1'][i] p_exp = df['exp. prop. 1'][i] n_row = df['n1'][i] + df['n2'][i] #add the two category names to the dataframe results.at[i, 0] = df.iloc[i, 0] results.at[i, 1] = df.iloc[i, 1] else: n_row = n p_obs = df['obs. count'][i]/n_row p_exp = df['exp. count'][i]/n_row #add the category name to the dataframe results.at[i, 0] = df.iloc[i, 0] #for non-exact tests find the test-statistic value if ph_test!='exact': test_statistic = df[stat_col][i] #determine the effect size #effect sizes for without a test-statistic needed if es=="coheng": es_value = p_obs - 0.5 elif es=="cohenh": es_value = 2*asin(p_obs**0.5) - 2*asin(p_exp**0.5) elif es=="ar": es_value = p_obs/p_exp #effect sizes with a z-statistic elif es=="rosenthal": if ph_test == 'z-test': es_value = df[stat_col][i]/(n_row**0.5) else: es_value = 'not possible with this post-hoc test' #effect sizes with a chi-square statistic elif es=='cramerv' or es=="cohenw": if ph_test == 'chi2-test' or ph_test == 'likelihood-test': es_value = (test_statistic/n_row)**0.5 if es=='cramerv' and bergsma: phi2= test_statistic/n_row phi2_tilde = max(0, phi2 - 1/(n_row-1)) es_value = (phi2_tilde/(2 - 1/(n_row-1)))**0.5 else: es_value = 'not possible with this post-hoc test' elif es=="jbme" or es=="fei": if ph_test == 'chi2-test' or ph_test == 'likelihood-test': if ph_test == 'chi2-test': es_value = test_statistic*df['minExp'][i]/(n_row*(n_row - df['minExp'][i])) else: es_value = -1/log(df['minExp'][i]/n_row)*test_statistic/(2*n_row) if es=="fei": es_value= es_value**0.5 else: es_value = 'not possible with this post-hoc test' #add the effect size value to the dataframe results.at[i, res_col] = es_value #add the column names results.columns = col_names return results