Domanda

I am using matplotlib and a stackedbarchart program for it that someone wrote to graph a stacked bar chart.

My graph:

x-axis has 8 income distributions, one for each bar

y-axis is the % of people in each income distribution. person type-a is the first stack, person type-b is the secon dstack, person type-c is the third stack.

My barchart is center aligned, and I am trying to figure out how to space out the bars so the graph looks better and so the labels are easier to read. Any suggestions, or clarifications?

The program is stackedBarGraph.py and the code looks like this, where widths is an array of 8 values, each corresponding to the width of a bar chart.

Let me know if you need any more information (I tried to keep everything relevant). Thanks!

Full code (I hope it's not too difficult to read):

   from __future__ import division
from pylab import * 
import seaborn as sns
import pandas as pd
import numpy as np
from stackedbars import StackedBarGrapher

data = csv2rec('coa.csv', delimiter=',')

x = data['totalgrantaid']
y = data['studenteffort']
z = data['parentcontim']
g = data['parentincomeim']

df = pd.DataFrame(dict(grant = x, stud = y, par = z, income = g))

#organize the data to graph
income_brackets = [(0, 25000), (25000, 50000), (50000, 75000), (75000, 100000), (100000, 150000), (150000,200000), (200000,250000), (250000,300000)]
source = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
    for key in source:
        source[key].append(median(df.query('income > {} and income < {}'.format(lower, upper))[key]))

#set the widths
source2 = {'grant' : [], 'stud': [], 'par': []}
for lower, upper in income_brackets:
    for key in source2:
        source2[key].append(pd.DataFrame(df.query('income > {} and income < {}'.format(lower,upper))).count()[key])

total = pd.DataFrame(df.query('income > 0 and income < 300000')['grant']).count()
total = total/10

#graph specifications
d_widths = [(source2['grant'][i]/total)[0] for i in range(8)]
d_colors = ['r','g','b']
d_labels = ('<25000', '25000-\n50000', '50000-\n75000', '75000-\n100000', '100000-\n150000', '150000-\n200000', '200000-\n250000', '250000-\n300000')
d = np.array([[source[k][i] for k in ('grant', 'stud', 'par')] for i in range(8)])

#the graph
fig = plt.figure()
ax1 = fig.add_subplot(111)
mygraph = StackedBarGrapher()
mygraph.stackedBarPlot(ax1,d,d_colors, edgeCols=['#000000']*3,widths = d_widths,  showFirst = 8, xLabels=d_labels,scale=True)

Stackedbarchart program:

    def stackedBarPlot(self,
                       ax,                                 # axes to plot onto
                       data,                               # data to plot
                       cols,                               # colors for each level
                       xLabels = None,                     # bar specific labels
                       yTicks = 6.,                        # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
                       edgeCols=None,                      # colors for edges
                       showFirst=-1,                       # only plot the first <showFirst> bars
                       scale=False,                        # scale bars to same height
                       widths=None,                        # set widths for each bar
                       heights=None,                       # set heights for each bar
                       ylabel='',                          # label for x axis
                       xlabel=''                          # label for y axis
                       ):

#------------------------------------------------------------------------------
# data fixeratering

        # make sure this makes sense
        if showFirst != -1:
            showFirst = np.min([showFirst, np.shape(data)[0]])
            data_copy = np.copy(data[:showFirst]).transpose().astype('float')
            data_shape = np.shape(data_copy)
            if heights is not None:
                heights = heights[:showFirst]
            if widths is not None:
                widths = widths[:showFirst]
            showFirst = -1
        else:
            data_copy = np.copy(data).transpose()
        data_shape = np.shape(data_copy)

        # determine the number of bars and corresponding levels from the shape of the data
        num_bars = data_shape[1]
        levels = data_shape[0]

        if widths is None:
            widths = np.array([1] * num_bars)
            x = np.arange(num_bars)
        else:
            x = [0]
            for i in range(1, len(widths)):
                x.append(x[i-1] + (widths[i-1] + widths[i])/2)


        # stack the data --
        # replace the value in each level by the cumulative sum of all preceding levels
        data_stack = np.reshape([float(i) for i in np.ravel(np.cumsum(data_copy, axis=0))], data_shape)

        # scale the data is needed
        if scale:
            data_copy /= data_stack[levels-1]
            data_stack /= data_stack[levels-1]
            if heights is not None:
                print "WARNING: setting scale and heights does not make sense."
                heights = None
        elif heights is not None:
            data_copy /= data_stack[levels-1]
            data_stack /= data_stack[levels-1]
            for i in np.arange(num_bars):
                data_copy[:,i] *= heights[i]
                data_stack[:,i] *= heights[i]

#------------------------------------------------------------------------------
# ticks

        if yTicks is not "none":
            # it is either a set of ticks or the number of auto ticks to make
            real_ticks = True
            try:
                k = len(yTicks[1])
            except:
                real_ticks = False

            if not real_ticks:
                yTicks = float(yTicks)
                if scale:
                    # make the ticks line up to 100 %
                    y_ticks_at = np.arange(yTicks)/(yTicks-1)
                    y_tick_labels = np.array(["%0.0f"%(i * 100) for i in y_ticks_at])
                else:
                    # space the ticks along the y axis
                    y_ticks_at = np.arange(yTicks)/(yTicks-1)*np.max(data_stack)
                    y_tick_labels = np.array([str(i) for i in y_ticks_at])
                yTicks=(y_ticks_at, y_tick_labels)

#------------------------------------------------------------------------------
# plot

        if edgeCols is None:
            edgeCols = ["none"]*len(cols)

        # bars
        ax.bar(x,
               data_stack[0],
               color=cols[0],alpha=0.7,
               edgecolor=edgeCols[0],
               width=widths,
               linewidth=0.5,
               align='center'
               )
        for i in np.arange(1,levels):
            ax.bar(x,
                   data_copy[i],
                   bottom=data_stack[i-1],
                   color=cols[i],alpha=0.7,
                   edgecolor=edgeCols[i],
                   width=widths,
                   linewidth=0.5,
                   align='center'
                   )

        # borders
        ax.spines["top"].set_visible(False)
        ax.spines["right"].set_visible(False)
        ax.spines["bottom"].set_visible(False)
        ax.spines["left"].set_visible(False)

        # make ticks if necessary
        if yTicks is not "none":
            ax.tick_params(axis='y', which='both', labelsize=8, direction="out")
            ax.yaxis.tick_left()
            plt.yticks(yTicks[0], yTicks[1])
        else:
            plt.yticks([], [])

        if xLabels is not None:
            ax.tick_params(axis='x', which='both', labelsize=8, direction="out")
            ax.xaxis.tick_bottom()
            plt.xticks(x, xLabels, rotation='horizontal')
        else:
            plt.xticks([], [])

        # limits
        ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
        ax.set_ylim(0, np.max(data_stack))

        # labels
        if xlabel != '':
            ax.xlabel(xlabel)
        if ylabel != '':
            ax.ylabel(ylabel)

What it looks like so far

È stato utile?

Soluzione

Alright thanks everyone for the input (and Bill for showing me how to use list comprehensions effectively).

I was able to alter the program to achieve what I wanted (I think). I added a new variable, axspacing to the below parts of the program:

def stackedBarPlot(self,
                   ax,                                 # axes to plot onto
                   data,                               # data to plot
                   cols,                               # colors for each level
                   xLabels = None,                     # bar specific labels
                   yTicks = 6.,                        # information used for making y ticks ["none", <int> or [[tick_pos1, tick_pos2, ... ],[tick_label_1, tick_label2, ...]]
                   edgeCols=None,                      # colors for edges
                   showFirst=-1,                       # only plot the first <showFirst> bars
                   scale=False,                        # scale bars to same height
                   widths=None,                        # set widths for each bar
                   heights=None,                       # set heights for each bar
                   ylabel='',                          # label for x axis
                   xlabel='',                          # label for y axis
                   xaxlim=None,
                   axspacing=0,
                   ):

.

    if widths is None:
        widths = np.array([1] * num_bars)
        x = np.arange(num_bars)
    else:
        x = [0]
        for i in range(1, len(widths)):
            x.append(x[i-1] + (widths[i-1] + widths[i])/2 + axspacing)

.

    # limits
    #ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5)
    ax.set_ylim(0, np.max(data_stack))
    if xaxlim is None:
        ax.set_xlim(-1.*widths[0]/2., np.sum(widths)-0.5 + num_bars * axspacing)
    else:
        ax.set_xlim(xaxlim)
Autorizzato sotto: CC-BY-SA insieme a attribuzione
Non affiliato a StackOverflow
scroll top