Module stikpetP.effect_sizes.eff_size_pairwise_bin_ord

Expand source code
import pandas as pd
from ..effect_sizes.eff_size_common_language_is import es_common_language_is
from ..correlations.cor_rosenthal import r_rosenthal
from ..correlations.cor_rank_biserial_is import r_rank_biserial_is
from ..other.poho_dunn import ph_dunn

def es_pairwise_bin_ord(catField, ordField, categories=None, levels=None, es="cle"):
    '''
    Pairwise Binary-Ordinal Effect Sizes
    ---------------------
    This function determines the effect size for each comparison in a post-hoc analysis of a nominal vs. ordinal variable (e.g. a Kruskal-Wallis test).
    
    Parameters
    ----------
    catField : pandas series
        data with categories
    ordField : pandas series
        data with the scores
    categories : list or dictionary, optional
        the categories to use from catField
    levels : list or dictionary, optional
        the levels or order used in ordField.
    es : {"cle", "rb", "rosenthal"}, optional
        the effect size to determine.
    
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        * *cat. 1*, one of the two categories being compared
        * *cat. 2*, second of the two categories being compared
        * *effect size*, the value of the effect size
    
    Notes
    -----
    The function simply goes over each possible pair of categories from the *catField* (adjusted with *categories* if used). It then runs for only the scores of those two categories the Common Language Effect Size (Vargha-Delaney A) or (Glass) Rank Biserial (Cliff delta). If the Rosenthal correlation is requested, it will perform the post-hoc Dunn test to obtain the z-statistic.
        
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''    
    cat1 = []
    cat2 = []
    p_res = []
    
    if es in ["cle", "rb"]:
        #convert to pandas series if needed
        if type(catField) is list:
            catField = pd.Series(catField)
        
        if type(ordField) is list:
            ordField = pd.Series(ordField)
        
        #combine as one dataframe
        df = pd.concat([catField, ordField], axis=1)
        df = df.dropna()
    
        #only use given categories
        if categories is not None:
            df = df[df.iloc[:, 0].isin(categories)]
    
        #get the unique categories
        cats = list(set(df.iloc[:, 0]))
    
        #number of categories
        k = len(cats)

        if es=="cle":
            col_name = 'Common Language'
        else:
            col_name = 'rank biserial correlation'
        
        for i in range(0, k-1):
            for j in range(i+1, k):
                cat1.append(cats[i])
                cat2.append(cats[j])
                
                sel_cats = [cats[i], cats[j]]
    
                if es=="cle":                    
                    es_res = es_common_language_is(df.iloc[:, 0], df.iloc[:, 1], categories=sel_cats, levels=levels, method="vda")
                    p_res.append([es_res.iloc[0, 0], es_res.iloc[0, 1]])
    
                elif es=="rb":                    
                    es_res = r_rank_biserial_is(df.iloc[:, 0], df.iloc[:, 1], categories=sel_cats, levels=levels)
                    p_res.append(es_res)
        
    else:
        col_name = 'Rosenthal Correlation'
        
        ph_dunn_res = ph_dunn(catField, ordField, categories=categories, levels=levels)
        k = len(ph_dunn_res)
        ph_dunn['n'] = ph_dunn_res['n1'] + ph_dunn_res['n2']
        p_res = [r_rosenthal(ph_dunn_res['statistic'][i], ph_dunn_res['n'][i]) for i in range(k)]
        cat1 = ph_dunn_res['cat. 1']
        cat2 = ph_dunn_res['cat. 2']

    results = pd.DataFrame({'cat. 1':cat1, 'cat. 2':cat2, col_name:p_res})
    
    return results

Functions

def es_pairwise_bin_ord(catField, ordField, categories=None, levels=None, es='cle')

Pairwise Binary-Ordinal Effect Sizes

This function determines the effect size for each comparison in a post-hoc analysis of a nominal vs. ordinal variable (e.g. a Kruskal-Wallis test).

Parameters

catField : pandas series
data with categories
ordField : pandas series
data with the scores
categories : list or dictionary, optional
the categories to use from catField
levels : list or dictionary, optional
the levels or order used in ordField.
es : {"cle", "rb", "rosenthal"}, optional
the effect size to determine.

Returns

pandas.DataFrame

A dataframe with the following columns:

  • cat. 1, one of the two categories being compared
  • cat. 2, second of the two categories being compared
  • effect size, the value of the effect size

Notes

The function simply goes over each possible pair of categories from the catField (adjusted with categories if used). It then runs for only the scores of those two categories the Common Language Effect Size (Vargha-Delaney A) or (Glass) Rank Biserial (Cliff delta). If the Rosenthal correlation is requested, it will perform the post-hoc Dunn test to obtain the z-statistic.

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code
def es_pairwise_bin_ord(catField, ordField, categories=None, levels=None, es="cle"):
    '''
    Pairwise Binary-Ordinal Effect Sizes
    ---------------------
    This function determines the effect size for each comparison in a post-hoc analysis of a nominal vs. ordinal variable (e.g. a Kruskal-Wallis test).
    
    Parameters
    ----------
    catField : pandas series
        data with categories
    ordField : pandas series
        data with the scores
    categories : list or dictionary, optional
        the categories to use from catField
    levels : list or dictionary, optional
        the levels or order used in ordField.
    es : {"cle", "rb", "rosenthal"}, optional
        the effect size to determine.
    
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        * *cat. 1*, one of the two categories being compared
        * *cat. 2*, second of the two categories being compared
        * *effect size*, the value of the effect size
    
    Notes
    -----
    The function simply goes over each possible pair of categories from the *catField* (adjusted with *categories* if used). It then runs for only the scores of those two categories the Common Language Effect Size (Vargha-Delaney A) or (Glass) Rank Biserial (Cliff delta). If the Rosenthal correlation is requested, it will perform the post-hoc Dunn test to obtain the z-statistic.
        
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''    
    cat1 = []
    cat2 = []
    p_res = []
    
    if es in ["cle", "rb"]:
        #convert to pandas series if needed
        if type(catField) is list:
            catField = pd.Series(catField)
        
        if type(ordField) is list:
            ordField = pd.Series(ordField)
        
        #combine as one dataframe
        df = pd.concat([catField, ordField], axis=1)
        df = df.dropna()
    
        #only use given categories
        if categories is not None:
            df = df[df.iloc[:, 0].isin(categories)]
    
        #get the unique categories
        cats = list(set(df.iloc[:, 0]))
    
        #number of categories
        k = len(cats)

        if es=="cle":
            col_name = 'Common Language'
        else:
            col_name = 'rank biserial correlation'
        
        for i in range(0, k-1):
            for j in range(i+1, k):
                cat1.append(cats[i])
                cat2.append(cats[j])
                
                sel_cats = [cats[i], cats[j]]
    
                if es=="cle":                    
                    es_res = es_common_language_is(df.iloc[:, 0], df.iloc[:, 1], categories=sel_cats, levels=levels, method="vda")
                    p_res.append([es_res.iloc[0, 0], es_res.iloc[0, 1]])
    
                elif es=="rb":                    
                    es_res = r_rank_biserial_is(df.iloc[:, 0], df.iloc[:, 1], categories=sel_cats, levels=levels)
                    p_res.append(es_res)
        
    else:
        col_name = 'Rosenthal Correlation'
        
        ph_dunn_res = ph_dunn(catField, ordField, categories=categories, levels=levels)
        k = len(ph_dunn_res)
        ph_dunn['n'] = ph_dunn_res['n1'] + ph_dunn_res['n2']
        p_res = [r_rosenthal(ph_dunn_res['statistic'][i], ph_dunn_res['n'][i]) for i in range(k)]
        cat1 = ph_dunn_res['cat. 1']
        cat2 = ph_dunn_res['cat. 2']

    results = pd.DataFrame({'cat. 1':cat1, 'cat. 2':cat2, col_name:p_res})
    
    return results