Object-oriented matplotlib usage mysteriously produces boxplots without whiskers or median lines
-
29-10-2019 - |
Question
I'm trying to use Python and matplotlib to define a custom class that produces a complex figure. However, I'm having trouble getting the boxplots to print correctly - they keep appearing without whiskers or lines marking the median values. I can't embed a sample image, but you can see one here.
My custom class is defined as follows:
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.ticker import FixedLocator
from matplotlib.gridspec import GridSpec
from matplotlib.figure import Figure
from matplotlib.backends.backend_svg import FigureCanvasSVG as FigureCanvas
import numpy as np
import scipy as sp
import scipy.optimize
class DotDashHist(Figure):
"""A Tufte-style dot-dash plot with histograms along the x- and y-axes."""
def __init__(self, the_vals):
# Actually inherit all the attributes and methods of parent class
super(DotDashHist, self).__init__()
# Process incoming data
self.vals = the_vals
self.xvals, self.yvals = zip(*self.vals)
self.xvals_uniq = list(set(self.xvals))
self.yvals_uniq = list(set(self.yvals))
self.xmax = float(max(self.xvals_uniq))
self.xpadding = float(self.xmax / 50)
self.ymax = float(max(self.yvals_uniq))
self.ypadding = float(self.ymax / 50)
self.xlims = [-1 * self.xpadding, self.xmax + self.xpadding]
self.ylims = [-1 * self.ypadding, self.ymax + self.ypadding]
self.lims = [-1 * self.xpadding, self.xmax + self.xpadding,
-1 * self.ypadding, self.ymax + self.ypadding]
# Set some matplotlib default behavior
mpl.rcParams['backend'] = 'SVG'
mpl.rcParams['lines.antialiased'] = True
mpl.rcParams['font.family'] = 'sans-serif'
mpl.rcParams['font.sans-serif'] = 'Gill Sans MT Pro, Lucida Grande, Helvetica, sans-serif'
mpl.rcParams['axes.titlesize'] = 'large'
mpl.rcParams['axes.labelsize'] = 'xx-small'
mpl.rcParams['xtick.major.size'] = 2
mpl.rcParams['xtick.minor.size'] = 0.5
mpl.rcParams['xtick.labelsize'] = 'xx-small'
mpl.rcParams['ytick.major.size'] = 2
mpl.rcParams['ytick.minor.size'] = 0.5
mpl.rcParams['ytick.labelsize'] = 'xx-small'
def _makeskel(self):
# Set up the framework in which the figure will be drawn
# Define the canvas for the figure
self.canvas = FigureCanvas(self)
self.set_canvas(self.canvas)
# Place subplots on a 6x6 grid
gs = GridSpec(6,6)
# Add the main subplot, override weird axis and tick defaults
self.main = self.add_subplot(gs[1:, :-1])
self.main.set_frame_on(False)
self.main.get_xaxis().tick_bottom()
self.main.get_yaxis().tick_left()
self.main.axis(self.lims)
# Add the x-value histogram, override weird axis and tick defaults
self.xhist = self.add_subplot(gs[0, :-1])
self.xhist.set_xticks([])
self.xhist.set_yticks([])
self.xhist.set_frame_on(False)
self.xhist.get_xaxis().tick_bottom()
self.xhist.get_yaxis().tick_left()
self.xhist.set_xlim(self.xlims)
# Add the y-value histogram, override weird axis and tick defaults
self.yhist = self.add_subplot(gs[1:, -1])
self.yhist.set_xticks([])
self.yhist.set_yticks([])
self.yhist.set_frame_on(False)
self.yhist.get_xaxis().tick_bottom()
self.yhist.get_yaxis().tick_left()
self.yhist.set_ylim(self.ylims)
def _makehist(self):
# Draw the x- and y-value histograms
self.xhist.hist(self.xvals, normed=1, bins=min([50, self.xmax + 1]),
range=[0, self.xmax + self.xpadding])
self.yhist.hist(self.yvals, normed=1, bins=min([50, self.ymax + 1]),
range=[0, self.ymax + self.ypadding],
orientation='horizontal')
def makebox(self):
self._makeskel()
self._makehist()
# Aggregate to make boxplots
box_dict = {}
for point in self.vals:
if point[0] <= self.xmax and point[1] <= self.ymax:
box_dict.setdefault(round(float(point[0]), 0),
[]).append(point[1])
self.main.boxplot(box_dict.values(), positions=box_dict.keys(),
whis=1.0, sym='ro')
self.main.set_xticks(np.arange(0, self.xmax + 1, 12))
self.main.xaxis.set_minor_locator(FixedLocator(self.xvals_uniq))
self.main.yaxis.set_minor_locator(FixedLocator(self.yvals_uniq))
This test code displays the problem:
from numpy.random import randn
import mycustomfigures as hf
test_x = np.arange(0, 25, 0.01)
test_y = test_x + randn(2500)
test_data = zip(test_x, test_y)
test_fig = hf.DotDashHist(test_data)
test_fig.makebox()
test_fig.suptitle('Test Figure')
test_fig.savefig('testing.svg')
What's wrong with the way I've defined DotDashHist? I can produce whiskered boxplots using the MATLAB-style stateful syntax, but that approach generates a tremendous amount of code when drawing multiple figures.
Solution
The whiskers are in your original plot for me, they're just obscured by the outlier points you have plotted.
At any rate, I'd proceed a bit more like this:
import collections
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec
import numpy as np
def main():
x = np.arange(0, 25, 0.01)
y = x + np.random.randn(x.size)
plot = DotDashHist(figsize=(10, 8))
plot.plot(x, y, whis=1.0, sym='r.')
plot.title('This is a Test')
plt.show()
class DotDashHist(object):
def __init__(self, **kwargs):
self.fig = plt.figure(**kwargs)
gs = GridSpec(6, 6)
self.ax = self.fig.add_subplot(gs[1:, :-1])
self.topax = self.fig.add_subplot(gs[0, :-1], sharex=self.ax)
self.rightax = self.fig.add_subplot(gs[1:, -1], sharey=self.ax)
for ax in [self.topax, self.rightax]:
ax.set_axis_off()
def plot(self, x, y, **kwargs):
_, _, self.topbars = self.topax.hist(x, normed=1, bins=50)
_, _, self.rightbars = self.rightax.hist(y, normed=1, bins=50,
orientation='horizontal')
boxes = collections.defaultdict(list)
for X, Y in zip(x, y):
boxes[int(X)].append(Y)
kwargs.pop('positions', None)
self.boxes = self.ax.boxplot(boxes.values(), **kwargs)
def title(self, *args, **kwargs):
self.topax.set_title(*args, **kwargs)
if __name__ == '__main__':
main()