Module `stikpetP.visualisations.vis_histogram_split`

Expand source code

import pandas as pd
import matplotlib.pyplot as plt

def vi_histogram_split(catField, scaleField, categories=None, **kwargs):
    '''
    Split Histogram
    ---------------
    Based on a categorical field the scores for each category are plotted in a separate histogram and each of the histograms is placed underneath each other.
    
    See **vi_histogram()** for more details on histograms.
    
    Parameters
    ----------
    catField : list or dataframe 
        the categories
    scaleField : list or dataframe
        the scores
    categories : list, optional
        categories to use
    kwargs : other parameters for use in pyplot hist function
    
    Returns
    -------
    The split histogram

    Alternatives
    ------------
    In case of a binary-scale situation: [overlaid histogram](../visualisations/vis_histogram_overlay.html), [back-to-back histogram](../visualisations/vis_histogram_b2b.html), [back-to-back stem-and-leaf display](../visualisations/vis_stem_and_leaf_b2b.html), [butterfly chart/pyramid chart](../visualisations/vis_butterfly_bin.html)

    In case of a nominal-scale situation: [split box-plot](../visualisations/vis_boxplot_split.html)

    Next
    ----
    After visualizing the data, you might want to run a test. In case of a binary-scale situation: [Student t](../tests/test_student_t_is.html), [Welch t](../tests/test_welch_t_is.html), [Trimmed means](../tests/test_trimmed_mean_is.html), [Yuen-Welch](../tests/test_trimmed_mean_is.html), [Z test](../tests/test_z_is.html)

    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    if type(catField) is list:
        catField = pd.Series(catField)
    
    if type(scaleField) is list:
        scaleField = pd.Series(scaleField)
    
    #combine as one dataframe
    df = pd.concat([catField, scaleField], axis=1)
    df = df.dropna()
    
    myClusters = df.iloc[:,0]
    myScale = df.iloc[:,1]
    
    if categories != None:
        myCats = categories
    else:
        myCats = myClusters.unique()
    
    myList = []
    for i in myCats:
        myCatScores = myScale[myClusters == i].dropna()
        myList.append(myCatScores)
        
    k = len(myCats)
    
    plt.figure(1, figsize=(8,8))
    plt.subplots_adjust(hspace=0.5)
    
    for i in range(len(myCats)):
        plt.subplot(k,1,i+1)
        plt.hist(myList[i], **kwargs)            
        plt.xlim(min(myScale), max(myScale))
        plt.xlabel(myCats[i])
        plt.ylabel('Frequency')
        
    plt.show()

Functions

def vi_histogram_split(catField, scaleField, categories=None, **kwargs)

Split Histogram

Based on a categorical field the scores for each category are plotted in a separate histogram and each of the histograms is placed underneath each other.

See vi_histogram() for more details on histograms.

Parameters

catField : list or dataframe: the categories
scaleField : list or dataframe: the scores
categories : list, optional: categories to use
kwargs : other parameters for use in pyplot hist function

Returns

The split histogram

Alternatives

In case of a binary-scale situation: overlaid histogram, back-to-back histogram, back-to-back stem-and-leaf display, butterfly chart/pyramid chart

In case of a nominal-scale situation: split box-plot

After visualizing the data, you might want to run a test. In case of a binary-scale situation: Student t, Welch t, Trimmed means, Yuen-Welch, Z test

Author

Made by P. Stikker

Companion website: https://PeterStatistics.com
YouTube channel: https://www.youtube.com/stikpet
Donations: https://www.patreon.com/bePatron?u=19398076

Expand source code

def vi_histogram_split(catField, scaleField, categories=None, **kwargs):
    '''
    Split Histogram
    ---------------
    Based on a categorical field the scores for each category are plotted in a separate histogram and each of the histograms is placed underneath each other.
    
    See **vi_histogram()** for more details on histograms.
    
    Parameters
    ----------
    catField : list or dataframe 
        the categories
    scaleField : list or dataframe
        the scores
    categories : list, optional
        categories to use
    kwargs : other parameters for use in pyplot hist function
    
    Returns
    -------
    The split histogram

    Alternatives
    ------------
    In case of a binary-scale situation: [overlaid histogram](../visualisations/vis_histogram_overlay.html), [back-to-back histogram](../visualisations/vis_histogram_b2b.html), [back-to-back stem-and-leaf display](../visualisations/vis_stem_and_leaf_b2b.html), [butterfly chart/pyramid chart](../visualisations/vis_butterfly_bin.html)

    In case of a nominal-scale situation: [split box-plot](../visualisations/vis_boxplot_split.html)

    Next
    ----
    After visualizing the data, you might want to run a test. In case of a binary-scale situation: [Student t](../tests/test_student_t_is.html), [Welch t](../tests/test_welch_t_is.html), [Trimmed means](../tests/test_trimmed_mean_is.html), [Yuen-Welch](../tests/test_trimmed_mean_is.html), [Z test](../tests/test_z_is.html)

    
    Author
    ------
    Made by P. Stikker
    
    Companion website: https://PeterStatistics.com  
    YouTube channel: https://www.youtube.com/stikpet  
    Donations: https://www.patreon.com/bePatron?u=19398076
    
    '''
    if type(catField) is list:
        catField = pd.Series(catField)
    
    if type(scaleField) is list:
        scaleField = pd.Series(scaleField)
    
    #combine as one dataframe
    df = pd.concat([catField, scaleField], axis=1)
    df = df.dropna()
    
    myClusters = df.iloc[:,0]
    myScale = df.iloc[:,1]
    
    if categories != None:
        myCats = categories
    else:
        myCats = myClusters.unique()
    
    myList = []
    for i in myCats:
        myCatScores = myScale[myClusters == i].dropna()
        myList.append(myCatScores)
        
    k = len(myCats)
    
    plt.figure(1, figsize=(8,8))
    plt.subplots_adjust(hspace=0.5)
    
    for i in range(len(myCats)):
        plt.subplot(k,1,i+1)
        plt.hist(myList[i], **kwargs)            
        plt.xlim(min(myScale), max(myScale))
        plt.xlabel(myCats[i])
        plt.ylabel('Frequency')
        
    plt.show()