Module stikpetP.effect_sizes.eff_size_pairwise_bin_ord
Expand source code
import pandas as pd
from ..effect_sizes.eff_size_common_language_is import es_common_language_is
from ..correlations.cor_rosenthal import r_rosenthal
from ..correlations.cor_rank_biserial_is import r_rank_biserial_is
from ..other.poho_dunn import ph_dunn
def es_pairwise_bin_ord(catField, ordField, categories=None, levels=None, es="cle"):
'''
Pairwise Binary-Ordinal Effect Sizes
---------------------
This function determines the effect size for each comparison in a post-hoc analysis of a nominal vs. ordinal variable (e.g. a Kruskal-Wallis test).
Parameters
----------
catField : pandas series
data with categories
ordField : pandas series
data with the scores
categories : list or dictionary, optional
the categories to use from catField
levels : list or dictionary, optional
the levels or order used in ordField.
es : {"cle", "rb", "rosenthal"}, optional
the effect size to determine.
Returns
-------
pandas.DataFrame
A dataframe with the following columns:
* *cat. 1*, one of the two categories being compared
* *cat. 2*, second of the two categories being compared
* *effect size*, the value of the effect size
Notes
-----
The function simply goes over each possible pair of categories from the *catField* (adjusted with *categories* if used). It then runs for only the scores of those two categories the Common Language Effect Size (Vargha-Delaney A) or (Glass) Rank Biserial (Cliff delta). If the Rosenthal correlation is requested, it will perform the post-hoc Dunn test to obtain the z-statistic.
Author
------
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076
'''
cat1 = []
cat2 = []
p_res = []
if es in ["cle", "rb"]:
#convert to pandas series if needed
if type(catField) is list:
catField = pd.Series(catField)
if type(ordField) is list:
ordField = pd.Series(ordField)
#combine as one dataframe
df = pd.concat([catField, ordField], axis=1)
df = df.dropna()
#only use given categories
if categories is not None:
df = df[df.iloc[:, 0].isin(categories)]
#get the unique categories
cats = list(set(df.iloc[:, 0]))
#number of categories
k = len(cats)
if es=="cle":
col_name = 'Common Language'
else:
col_name = 'rank biserial correlation'
for i in range(0, k-1):
for j in range(i+1, k):
cat1.append(cats[i])
cat2.append(cats[j])
sel_cats = [cats[i], cats[j]]
if es=="cle":
es_res = es_common_language_is(df.iloc[:, 0], df.iloc[:, 1], categories=sel_cats, levels=levels, method="vda")
p_res.append([es_res.iloc[0, 0], es_res.iloc[0, 1]])
elif es=="rb":
es_res = r_rank_biserial_is(df.iloc[:, 0], df.iloc[:, 1], categories=sel_cats, levels=levels)
p_res.append(es_res)
else:
col_name = 'Rosenthal Correlation'
ph_dunn_res = ph_dunn(catField, ordField, categories=categories, levels=levels)
k = len(ph_dunn_res)
ph_dunn['n'] = ph_dunn_res['n1'] + ph_dunn_res['n2']
p_res = [r_rosenthal(ph_dunn_res['statistic'][i], ph_dunn_res['n'][i]) for i in range(k)]
cat1 = ph_dunn_res['cat. 1']
cat2 = ph_dunn_res['cat. 2']
results = pd.DataFrame({'cat. 1':cat1, 'cat. 2':cat2, col_name:p_res})
return results
Functions
def es_pairwise_bin_ord(catField, ordField, categories=None, levels=None, es='cle')
-
Pairwise Binary-Ordinal Effect Sizes
This function determines the effect size for each comparison in a post-hoc analysis of a nominal vs. ordinal variable (e.g. a Kruskal-Wallis test).
Parameters
catField
:pandas series
- data with categories
ordField
:pandas series
- data with the scores
categories
:list
ordictionary
, optional- the categories to use from catField
levels
:list
ordictionary
, optional- the levels or order used in ordField.
es
:{"cle", "rb", "rosenthal"}
, optional- the effect size to determine.
Returns
pandas.DataFrame
-
A dataframe with the following columns:
- cat. 1, one of the two categories being compared
- cat. 2, second of the two categories being compared
- effect size, the value of the effect size
Notes
The function simply goes over each possible pair of categories from the catField (adjusted with categories if used). It then runs for only the scores of those two categories the Common Language Effect Size (Vargha-Delaney A) or (Glass) Rank Biserial (Cliff delta). If the Rosenthal correlation is requested, it will perform the post-hoc Dunn test to obtain the z-statistic.
Author
Made by P. Stikker
Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076Expand source code
def es_pairwise_bin_ord(catField, ordField, categories=None, levels=None, es="cle"): ''' Pairwise Binary-Ordinal Effect Sizes --------------------- This function determines the effect size for each comparison in a post-hoc analysis of a nominal vs. ordinal variable (e.g. a Kruskal-Wallis test). Parameters ---------- catField : pandas series data with categories ordField : pandas series data with the scores categories : list or dictionary, optional the categories to use from catField levels : list or dictionary, optional the levels or order used in ordField. es : {"cle", "rb", "rosenthal"}, optional the effect size to determine. Returns ------- pandas.DataFrame A dataframe with the following columns: * *cat. 1*, one of the two categories being compared * *cat. 2*, second of the two categories being compared * *effect size*, the value of the effect size Notes ----- The function simply goes over each possible pair of categories from the *catField* (adjusted with *categories* if used). It then runs for only the scores of those two categories the Common Language Effect Size (Vargha-Delaney A) or (Glass) Rank Biserial (Cliff delta). If the Rosenthal correlation is requested, it will perform the post-hoc Dunn test to obtain the z-statistic. Author ------ Made by P. Stikker Companion website: https://PeterStatistics.com YouTube channel: https://www.youtube.com/stikpet Donations: https://www.patreon.com/bePatron?u=19398076 ''' cat1 = [] cat2 = [] p_res = [] if es in ["cle", "rb"]: #convert to pandas series if needed if type(catField) is list: catField = pd.Series(catField) if type(ordField) is list: ordField = pd.Series(ordField) #combine as one dataframe df = pd.concat([catField, ordField], axis=1) df = df.dropna() #only use given categories if categories is not None: df = df[df.iloc[:, 0].isin(categories)] #get the unique categories cats = list(set(df.iloc[:, 0])) #number of categories k = len(cats) if es=="cle": col_name = 'Common Language' else: col_name = 'rank biserial correlation' for i in range(0, k-1): for j in range(i+1, k): cat1.append(cats[i]) cat2.append(cats[j]) sel_cats = [cats[i], cats[j]] if es=="cle": es_res = es_common_language_is(df.iloc[:, 0], df.iloc[:, 1], categories=sel_cats, levels=levels, method="vda") p_res.append([es_res.iloc[0, 0], es_res.iloc[0, 1]]) elif es=="rb": es_res = r_rank_biserial_is(df.iloc[:, 0], df.iloc[:, 1], categories=sel_cats, levels=levels) p_res.append(es_res) else: col_name = 'Rosenthal Correlation' ph_dunn_res = ph_dunn(catField, ordField, categories=categories, levels=levels) k = len(ph_dunn_res) ph_dunn['n'] = ph_dunn_res['n1'] + ph_dunn_res['n2'] p_res = [r_rosenthal(ph_dunn_res['statistic'][i], ph_dunn_res['n'][i]) for i in range(k)] cat1 = ph_dunn_res['cat. 1'] cat2 = ph_dunn_res['cat. 2'] results = pd.DataFrame({'cat. 1':cat1, 'cat. 2':cat2, col_name:p_res}) return results