Download notebook (.ipynb)

Dot Plots#

Preparation#

import numpy as np
import pandas as pd

from lets_plot import *
LetsPlot.setup_html()
def plot_matrix(plots=[], width=500, height=375, columns=2):
    return gggrid(plots, ncol=columns) + ggsize(width * columns, height * int((len(plots) + 1) / columns))
df = pd.read_csv("https://raw.githubusercontent.com/JetBrains/lets-plot-docs/master/data/mpg.csv")
df = df.drop(columns=["Unnamed: 0"])
df = df.sample(n=50, random_state=42, ignore_index=True)
df.head()
manufacturer model displ year cyl trans drv cty hwy fl class
0 dodge ram 1500 pickup 4wd 4.7 2008 8 manual(m6) 4 9 12 e pickup
1 toyota toyota tacoma 4wd 4.0 2008 6 auto(l5) 4 16 20 r pickup
2 toyota camry 2.2 1999 4 auto(l4) f 21 27 r midsize
3 audi a4 quattro 2.0 2008 4 manual(m6) 4 20 28 p compact
4 jeep grand cherokee 4wd 4.7 2008 8 auto(l5) 4 14 19 r suv
DEF_BIN_COUNT = 15

def get_binwidth(df, column, binwidth_ratio=1.0/DEF_BIN_COUNT):
    return binwidth_ratio * (df[column].max() - df[column].min())

def get_bincount(df, column, binwidth):
    return int(round((df[column].max() - df[column].min()) / binwidth))

nice_binwidth = round(get_binwidth(df, "hwy"), 3)
print("Nice binwidth value for examples: {0}".format(nice_binwidth))
Nice binwidth value for examples: 1.667

Default#

ggplot(df, aes(x="hwy")) + geom_dotplot() + ggtitle("geom_dotplot() example")
ggplot(df, aes(y="hwy")) + geom_ydotplot() + ggtitle("geom_ydotplot() example")

Comparison of stats and geoms#

ggplot(df, aes(x="hwy")) + \
    geom_area(stat='density') + \
    geom_dotplot(binwidth=nice_binwidth, color='black', fill='white') + \
    ggtitle("density + dotplot (method='dotdensity')")
ggplot(df, aes(x="hwy")) + \
    geom_histogram(binwidth=nice_binwidth, color='white') + \
    geom_dotplot(binwidth=nice_binwidth, method='histodot', \
                 color='black', fill='white') + \
    coord_fixed(ratio=nice_binwidth) + \
    ggtitle("histogram + dotplot (method='histodot')")
ggplot(df, aes("drv", "hwy")) + \
    geom_violin() + \
    geom_ydotplot(binwidth=nice_binwidth, stackratio=.5, \
                  color='black', fill='white') + \
    ggtitle("violin + ydotplot (method='dotdensity')")

Parameters#

binwidth#

p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot() + ggtitle("Default binwidth")
p2 = p + geom_dotplot(binwidth=nice_binwidth) + ggtitle("binwidth={0}".format(nice_binwidth))

plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot() + ggtitle("Default binwidth")
p2 = p + geom_ydotplot(binwidth=nice_binwidth) + ggtitle("binwidth={0}".format(nice_binwidth))

plot_matrix([p1, p2])

stackdir#

p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(stackdir='up') + ggtitle("stackdir='up' (default)")
p2 = p + geom_dotplot(stackdir='down') + ggtitle("stackdir='down'")
p3 = p + geom_dotplot(stackdir='center') + ggtitle("stackdir='center'")
p4 = p + geom_dotplot(stackdir='centerwhole') + ggtitle("stackdir='centerwhole'")

plot_matrix([p1, p2, p3, p4])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(stackdir='left') + \
    ggtitle("stackdir='left'")
p2 = p + geom_ydotplot(stackdir='right') + \
    ggtitle("stackdir='right'")
p3 = p + geom_ydotplot(stackdir='center') + \
    ggtitle("stackdir='center' (default)")
p4 = p + geom_ydotplot(stackdir='centerwhole') + \
    ggtitle("stackdir='centerwhole'")

plot_matrix([p1, p2, p3, p4])

stackratio#

p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot() + ggtitle("stackratio=1.0 (default)")
p2 = p + geom_dotplot(stackratio=0.5) + ggtitle("stackratio=0.5")
p3 = p + geom_dotplot(stackratio=1.5) + ggtitle("stackratio=1.5")

plot_matrix([p1, p2, p3])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot() + ggtitle("stackratio=1.0 (default)")
p2 = p + geom_ydotplot(stackratio=0.5) + ggtitle("stackratio=0.5")
p3 = p + geom_ydotplot(stackratio=1.5) + ggtitle("stackratio=1.5")

plot_matrix([p1, p2, p3])

dotsize#

p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot() + ggtitle("dotsize=1.0 (default)")
p2 = p + geom_dotplot(dotsize=0.5) + ggtitle("dotsize=0.5")
p3 = p + geom_dotplot(dotsize=1.5) + ggtitle("dotsize=1.5")

plot_matrix([p1, p2, p3])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot() + ggtitle("dotsize=1.0 (default)")
p2 = p + geom_ydotplot(dotsize=0.5) + ggtitle("dotsize=0.5")
p3 = p + geom_ydotplot(dotsize=1.5) + ggtitle("dotsize=1.5")

plot_matrix([p1, p2, p3])

center#

p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_dotplot(method='histodot', center=12.0) + ggtitle("center=12.0")

plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(method='histodot', center=12.0) + ggtitle("center=12.0")

plot_matrix([p1, p2])

boundary#

p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_dotplot(method='histodot', boundary=11.0) + ggtitle("boundary=11.0")

plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(method='histodot', boundary=11.0) + ggtitle("boundary=11.0")

plot_matrix([p1, p2])

bins#

p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_dotplot(method='histodot', bins=20) + ggtitle("bins=20")

plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(method='histodot') + ggtitle("Default")
p2 = p + geom_ydotplot(method='histodot', bins=20) + ggtitle("bins=20")

plot_matrix([p1, p2])

Grouping#

ggplot(df, aes(x="hwy")) + \
    geom_dotplot(aes(fill=as_discrete("drv")), color='black') + \
    ggtitle("method='dotdensity'")
p = ggplot(df, aes(x="hwy"))
p1 = p + geom_dotplot(aes(fill=as_discrete("drv")), method='histodot', color='black') + \
    ggtitle("method='histodot', stackgroups=False (default)")
p2 = p + geom_dotplot(aes(fill=as_discrete("drv")), method='histodot', \
                      stackgroups=True, color='black') + \
    ggtitle("method='histodot', stackgroups=True")

plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(aes(fill=as_discrete("year")), color='black') + \
    scale_fill_discrete(format="d") + \
    ggtitle("method='dotdensity', stackgroups=False (default)")
p2 = p + geom_ydotplot(aes(fill=as_discrete("year")), stackgroups=True, color='black') + \
    scale_fill_discrete(format="d") + \
    ggtitle("method='dotdensity', stackgroups=True")

plot_matrix([p1, p2])
p = ggplot(df, aes("drv", "hwy"))
p1 = p + geom_ydotplot(aes(fill=as_discrete("year")), \
                       method='histodot', color='black') + \
    scale_fill_discrete(format="d") + \
    ggtitle("method='histodot', stackgroups=False (default)")
p2 = p + geom_ydotplot(aes(fill=as_discrete("year")), \
                       method='histodot', stackgroups=True, color='black') + \
    scale_fill_discrete(format="d") + \
    ggtitle("method='histodot', stackgroups=True")

plot_matrix([p1, p2])

Tooltips#

ggplot(df, aes(x="hwy")) + \
    geom_dotplot(tooltips=layer_tooltips().line("Stack center|^x")\
                                          .line("Number of dots in stack|@..count..")\
                                          .format('@..binwidth..', ".3f")\
                                          .line("Width of the bin|@..binwidth..")) + \
    ggtitle("With tooltips")
ggplot(df, aes("drv", "hwy")) + \
    geom_ydotplot(tooltips=layer_tooltips().line("^x")\
                                           .line("Stack center|^y")\
                                           .line("Number of dots in stack|@..count..")\
                                           .format('@..binwidth..', ".3f")\
                                           .line("Width of the bin|@..binwidth..")) + \
    ggtitle("With tooltips")

Facetting#

ggplot(df, aes(x="hwy")) + \
    geom_dotplot(aes(fill=as_discrete("year")), color='black') + \
    scale_fill_discrete(format="d") + \
    facet_grid(x="year", x_format="d") + \
    ggtitle("facet_grid()")
ggplot(df, aes("drv", "hwy")) + \
    geom_ydotplot(aes(fill="drv"), color='black') + \
    facet_grid(x="year", x_format="d") + \
    ggtitle("facet_grid()")

Flip coordinates#

ggplot(df, aes(x="hwy")) + \
    geom_dotplot() + \
    coord_flip() + \
    ggtitle("Flip coordinates")
ggplot(df, aes("drv", "hwy")) + \
    geom_ydotplot(aes(fill="drv"), color='black') + \
    coord_flip() + \
    ggtitle("Flip coordinates")

Other layers#

ggplot(df, aes(x="hwy")) + \
    geom_dotplot(aes(fill="year", group="year"), method='histodot', \
                 bins=15, stackdir='centerwhole', \
                 stackratio=.75, dotsize=.75, \
                 color='black', alpha=.5, size=1) + \
    scale_fill_discrete(format="d") + \
    theme_grey() + \
    ggtitle("Some additional aesthetics, parameters and layers")
ggplot(df, aes("drv", "hwy")) + \
    geom_ydotplot(aes(fill=as_discrete("drv")), method='histodot', \
                  bins=15, stackdir='center', stackratio=.75, \
                  color='black', alpha=.5, size=1) + \
    theme_grey() + \
    ggtitle("Some additional aesthetics, parameters and layers")