Module `stikpetP.effect_sizes.eff_size_pairwise_bin`

Expand source code

from math import asin
import pandas as pd

def es_pairwise_bin(data, expCounts = None, es="coheng"):
    '''
    Binary Effect Size for Pairwise Test
    ------------------------------------
    When using a pairwise post-hoc test for a single nominal variable, the pair has become binary. This function then can determine the effect size for each pair.
    
    Options are to use Cohen g, Cohen h', or the Alternative Ratio.
    
    Parameters
    ----------
    data : list or pandas data series
        the data
    expCounts : pandas dataframe, optional 
        the categories and expected counts
    es : {"coheng", "cohenh", "ar"}, optional
        effect size to use.
        
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        * *cat1*, label of first category in pair
        * *cat2*, label of second category in pair
        * *n1*, snumber of cases in first category
        * *n2*, number of cases in second category
        * followed by the effect size value
    
    Notes
    -----
    If expected counts are provided, for Cohen h' and the Alternative Ratio these expected counts are converted to expected proportions
    
    See the separate functions of each effect size for more details.
    
     * `stikpetP.effect_sizes.eff_size_cohen_g.es_cohen_g` for Cohen g
     * `stikpetP.effect_sizes.eff_size_cohen_h_os.es_cohen_h_os` for Cohen h'
     * `stikpetP.effect_sizes.eff_size_alt_ratio.es_alt_ratio` for the Alternative Ratio
    
    See Also
    --------
    stikpetP.other.poho_binomial.ph_binomial : performs a pairwise binomial test
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
        
    '''
    
    myList = list(data)
    myList = [x for x in myList if str(x) != 'nan']
    mySet = set(myList)
    Fi = [myList.count(x) for x in mySet]
    categ = list(mySet)
    k = len(categ)
    nPairs = int(k*(k-1)/2)
    
    pairNr=0
    cat1 = [0]*nPairs
    cat2 = [0]*nPairs
    for i in range(0,k-1):
        for j in range(i+1, len(mySet)):
            cat1[pairNr] = categ[i]
            cat2[pairNr] = categ[j]
            pairNr = pairNr + 1
    
    n1 = [myList.count(x) for x in cat1]
    n2 = [myList.count(x) for x in cat2]
    
    res = pd.DataFrame()
    res['cat1'] = cat1
    res['cat2'] = cat2
    res['n1'] = n1
    res['n2'] = n2
    
    if es=="coheng":
        g = res['n1'] / (res['n1']+res['n2']) - 0.5
        res['Cohen g'] = g
    
    elif es=="cohenh":
        pi = res['n1'] / (res['n1']+res['n2'])
        phi1 = [2*asin(x**0.5) for x in pi]
        
        if expCounts is None:            
            phic = 2*asin(0.5**0.5)
            h = [x - phic for x in phi1]
        else:
            n1E = [list(expCounts[expCounts.iloc[:,0]==x].iloc[:,1])[0] for x in cat1]
            n2E = [list(expCounts[expCounts.iloc[:,0]==x].iloc[:,1])[0] for x in cat2]
            pic = [n1E[x] / (n1E[x] + n2E[x]) for x in range(0,nPairs)]            
            phic = [2*asin(pic[x]**0.5) for x in range(0,nPairs)]
            h = [phi1[x] - phic[x] for x in range(0,nPairs)]
        
        res['Cohen h\''] = h
    
    elif es=="ar":
        pi1 = res['n1'] / (res['n1']+res['n2'])
        pi2 = res['n2'] / (res['n1']+res['n2'])
        if expCounts is None:
            ar1 = [x/0.5 for x in pi1]
            ar2 = [x/0.5 for x in pi2]
        else:
            n1E = [list(expCounts[expCounts.iloc[:,0]==x].iloc[:,1])[0] for x in cat1]
            n2E = [list(expCounts[expCounts.iloc[:,0]==x].iloc[:,1])[0] for x in cat2]
            pic1 = [n1E[x] / (n1E[x] + n2E[x]) for x in range(0,nPairs)] 
            pic2 = [n2E[x] / (n1E[x] + n2E[x]) for x in range(0,nPairs)] 
            ar1 = [pi1[x]/pic1[x] for x in range(0,nPairs)] 
            ar2 = [pi2[x]/pic2[x] for x in range(0,nPairs)] 
            
        res['AR 1'] = ar1
        res['AR 2'] = ar2
        
    return (res)

Functions

def es_pairwise_bin(data, expCounts=None, es='coheng')

Binary Effect Size For Pairwise Test

When using a pairwise post-hoc test for a single nominal variable, the pair has become binary. This function then can determine the effect size for each pair.

Options are to use Cohen g, Cohen h', or the Alternative Ratio.

Parameters

data : list or pandas data series: the data
expCounts : pandas dataframe, optional: the categories and expected counts
es : {"coheng", "cohenh", "ar"}, optional: effect size to use.

Returns

pandas.DataFrame

A dataframe with the following columns:

cat1, label of first category in pair
cat2, label of second category in pair
n1, snumber of cases in first category
n2, number of cases in second category
followed by the effect size value

Notes

If expected counts are provided, for Cohen h' and the Alternative Ratio these expected counts are converted to expected proportions

See the separate functions of each effect size for more details.

es_cohen_g() for Cohen g
es_cohen_h_os() for Cohen h'
es_alt_ratio() for the Alternative Ratio

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code

def es_pairwise_bin(data, expCounts = None, es="coheng"):
    '''
    Binary Effect Size for Pairwise Test
    ------------------------------------
    When using a pairwise post-hoc test for a single nominal variable, the pair has become binary. This function then can determine the effect size for each pair.
    
    Options are to use Cohen g, Cohen h', or the Alternative Ratio.
    
    Parameters
    ----------
    data : list or pandas data series
        the data
    expCounts : pandas dataframe, optional 
        the categories and expected counts
    es : {"coheng", "cohenh", "ar"}, optional
        effect size to use.
        
    Returns
    -------
    pandas.DataFrame
        A dataframe with the following columns:
    
        * *cat1*, label of first category in pair
        * *cat2*, label of second category in pair
        * *n1*, snumber of cases in first category
        * *n2*, number of cases in second category
        * followed by the effect size value
    
    Notes
    -----
    If expected counts are provided, for Cohen h' and the Alternative Ratio these expected counts are converted to expected proportions
    
    See the separate functions of each effect size for more details.
    
     * `stikpetP.effect_sizes.eff_size_cohen_g.es_cohen_g` for Cohen g
     * `stikpetP.effect_sizes.eff_size_cohen_h_os.es_cohen_h_os` for Cohen h'
     * `stikpetP.effect_sizes.eff_size_alt_ratio.es_alt_ratio` for the Alternative Ratio
    
    See Also
    --------
    stikpetP.other.poho_binomial.ph_binomial : performs a pairwise binomial test
    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
        
    '''
    
    myList = list(data)
    myList = [x for x in myList if str(x) != 'nan']
    mySet = set(myList)
    Fi = [myList.count(x) for x in mySet]
    categ = list(mySet)
    k = len(categ)
    nPairs = int(k*(k-1)/2)
    
    pairNr=0
    cat1 = [0]*nPairs
    cat2 = [0]*nPairs
    for i in range(0,k-1):
        for j in range(i+1, len(mySet)):
            cat1[pairNr] = categ[i]
            cat2[pairNr] = categ[j]
            pairNr = pairNr + 1
    
    n1 = [myList.count(x) for x in cat1]
    n2 = [myList.count(x) for x in cat2]
    
    res = pd.DataFrame()
    res['cat1'] = cat1
    res['cat2'] = cat2
    res['n1'] = n1
    res['n2'] = n2
    
    if es=="coheng":
        g = res['n1'] / (res['n1']+res['n2']) - 0.5
        res['Cohen g'] = g
    
    elif es=="cohenh":
        pi = res['n1'] / (res['n1']+res['n2'])
        phi1 = [2*asin(x**0.5) for x in pi]
        
        if expCounts is None:            
            phic = 2*asin(0.5**0.5)
            h = [x - phic for x in phi1]
        else:
            n1E = [list(expCounts[expCounts.iloc[:,0]==x].iloc[:,1])[0] for x in cat1]
            n2E = [list(expCounts[expCounts.iloc[:,0]==x].iloc[:,1])[0] for x in cat2]
            pic = [n1E[x] / (n1E[x] + n2E[x]) for x in range(0,nPairs)]            
            phic = [2*asin(pic[x]**0.5) for x in range(0,nPairs)]
            h = [phi1[x] - phic[x] for x in range(0,nPairs)]
        
        res['Cohen h\''] = h
    
    elif es=="ar":
        pi1 = res['n1'] / (res['n1']+res['n2'])
        pi2 = res['n2'] / (res['n1']+res['n2'])
        if expCounts is None:
            ar1 = [x/0.5 for x in pi1]
            ar2 = [x/0.5 for x in pi2]
        else:
            n1E = [list(expCounts[expCounts.iloc[:,0]==x].iloc[:,1])[0] for x in cat1]
            n2E = [list(expCounts[expCounts.iloc[:,0]==x].iloc[:,1])[0] for x in cat2]
            pic1 = [n1E[x] / (n1E[x] + n2E[x]) for x in range(0,nPairs)] 
            pic2 = [n2E[x] / (n1E[x] + n2E[x]) for x in range(0,nPairs)] 
            ar1 = [pi1[x]/pic1[x] for x in range(0,nPairs)] 
            ar2 = [pi2[x]/pic2[x] for x in range(0,nPairs)] 
            
        res['AR 1'] = ar1
        res['AR 2'] = ar2
        
    return (res)