Download notebook (.ipynb)

Violin Plot#

import pandas as pd

from lets_plot import *
LetsPlot.setup_html()
QUANTILES = [.25, .5, .75]
mpg_df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
mpg_df.head()
Unnamed: 0 manufacturer model displ year cyl trans drv cty hwy fl class
0 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compact
1 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compact
2 3 audi a4 2.0 2008 4 manual(m6) f 20 31 p compact
3 4 audi a4 2.0 2008 4 auto(av) f 21 30 p compact
4 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compact

Minimalistic example#

ggplot(mpg_df, aes(y='hwy')) + geom_violin() + ggtitle("Simplest example")

Comparison of geoms#

p_d = ggplot(mpg_df) + \
    geom_density(aes(x='hwy', fill='drv'), color='black', alpha=.5) + \
    facet_grid(x='drv') + \
    coord_flip() + \
    ggtitle("geom_density()")
p_v = ggplot(mpg_df, aes(x=as_discrete('drv', order=1), y='hwy')) + \
    geom_violin(aes(fill='drv'), alpha=.5) + \
    ggtitle("geom_violin()")

gggrid([p_d, p_v])

Original parameters#

quantiles#

tests = [
    {'quantiles': None},
    {'quantiles': []},
    {'quantiles': [.05, .5, .95]},
    {'quantiles': [.25]},
    {'quantiles': [0, .5, 1]},
]

gggrid([
    ggplot(mpg_df, aes('drv', 'hwy')) + \
        geom_violin(quantiles=test['quantiles'], quantile_lines=True) + \
        ggtitle("quantiles={0}".format(test['quantiles']))
    for test in tests
], ncol=2)

scale#

tests = [
    {'scale': None},
    {'scale': 'area'},
    {'scale': 'count'},
    {'scale': 'width'},
]

gggrid([
    ggplot(mpg_df, aes('drv', 'hwy')) + \
        geom_violin(scale=test['scale'], \
                    quantiles=QUANTILES, quantile_lines=True) + \
        ggtitle("scale={0}".format(test['scale']))
    for test in tests
], ncol=2)

trim and tails_cutoff#

tests = [
    {'trim': True, 'tails_cutoff': None},
    {'trim': False, 'tails_cutoff': 1},
    {'trim': False, 'tails_cutoff': 2},
    {'trim': False, 'tails_cutoff': 3},
]

gggrid([
    ggplot(mpg_df, aes('drv', 'hwy')) + \
        geom_violin(trim=test['trim'], tails_cutoff=test['tails_cutoff']) + \
        ggtitle("trim={0}, tails_cutoff={1}".format(test['trim'], test['tails_cutoff']))
    for test in tests
], ncol=2)

show_half#

ggplot(mpg_df) + \
    geom_violin(aes(x='drv', y='hwy'), show_half=-1, fill="#66c2a5") + \
    geom_violin(aes(x='drv', y='cty'), show_half=1, fill="#fc8d62") + \
    ylab('hwy/cty') + \
    ggtitle("hwy is green and cty is orange")

Custom density parameters#

p = ggplot(mpg_df, aes('drv', 'hwy'))
p_default = p + geom_violin(quantiles=QUANTILES, quantile_lines=True) + \
    ggtitle("Default")
p_kernel = p + geom_violin(quantiles=QUANTILES, kernel='epanechikov', quantile_lines=True) + \
    ggtitle("kernel='epanechikov'")
p_bw = p + geom_violin(quantiles=QUANTILES, bw=.1, quantile_lines=True) + \
    ggtitle("bw=0.1")
p_adjust = p + geom_violin(quantiles=QUANTILES, adjust=2, quantile_lines=True) + \
    ggtitle("adjust=2")

gggrid([p_default, p_kernel, p_bw, p_adjust], ncol=2)

Grouping and tooltips#

ggplot(mpg_df, aes(x='drv', y='hwy')) + \
    geom_violin(aes(group='year', fill='year'), \
                quantiles=QUANTILES, quantile_lines=True, \
                tooltips=layer_tooltips().line('^x')
                                         .format('@year', 'd')
                                         .line('year|@year')
                                         .line('hwy|@hwy')
                                         .line('violinwidth|@..violinwidth..')
                                         .line('density|@..density..')
                                         .line('count|@..count..')
                                         .line('scaled|@..scaled..')) + \
    scale_fill_discrete(format='d') + \
    ggtitle("Grouping and tooltips")

Facets#

ggplot(mpg_df, aes(x='drv', y='hwy')) + \
    geom_violin(aes(fill='year'), \
                quantiles=QUANTILES, quantile_lines=True) + \
    scale_fill_discrete(format='d') + \
    facet_grid(y='year', y_format='d')

coord_flip()#

ggplot(mpg_df, aes('drv', 'hwy')) + \
    geom_violin(quantiles=QUANTILES, quantile_lines=True) + \
    coord_flip() + \
    ggtitle("Use coord_flip()")

Additional layers#

ggplot(mpg_df, aes(as_discrete('drv', order=-1), 'hwy')) + \
    geom_violin(aes(color='drv', fill='drv'), alpha=.5, size=2, \
                n=8, quantiles=QUANTILES, quantile_lines=True,
                sampling=sampling_group_systematic(2)) + \
    scale_y_continuous(breaks=list(range(12, 29, 2))) + \
    ylim(12, 28) + \
    coord_fixed(ratio=.2) + \
    theme_grey() + \
    ggtitle("Some additional aesthetics, parameters and layers")
quartiles = [1/4, 2/4, 3/4]
ggplot(mpg_df, aes(x='drv', y='hwy')) + \
    geom_violin(quantiles=quartiles, quantile_lines=True) + \
    geom_boxplot(width=.1)