質問

I'm trying to generate a matrix of scatter plots and histograms with pyqtgraph. The input (x and y values) for each scatter plot are numpy arrays of length greater than 1,000,000. It takes a very long time (>1 min for 2x2 plot matrix) for these plots to be generated (matplotlib is actually faster at generating the same plots). Can you anyone point out what I can do to speed things up? Below is the code I'm using.

Thanks.

from pyqtgraph.Qt import QtGui, QtCore
import numpy as np
import pyqtgraph as pg

def scatter_matrix(data, cols):
    pg.setConfigOption('background', 'w')
    pg.setConfigOption('foreground', 'k')
    now = pg.ptime.time()

    app = QtGui.QApplication([])

    win = pg.GraphicsWindow(title="Scater Plot Matrix")
    win.resize(800,600)

    for i, col_i in enumerate(cols):
        for j, col_j in enumerate(cols):
            x = data[col_i]
            y = data[col_j]
            if i == j:
                current_plot = win.addPlot(title="Histogram")
                y,x = np.histogram(x, bins=100)
                curve = pg.PlotCurveItem(x, y, stepMode=True, fillLevel=0, brush=(0, 0, 255, 80))
                current_plot.addItem(curve)
            else:
                current_plot = win.addPlot(title="Scatter plot")
                current_plot.plot(x, y, pen=None, symbol='t', symbolPen=None, symbolSize=10, symbolBrush=(100, 100, 255, 50))
                current_plot.setLabel('left', "{}".format(col_i), units='')
                current_plot.setLabel('bottom', "{}".format(col_j), units='')
                current_plot.setLogMode(x=False, y=False)
        win.nextRow()
    ## Start Qt event loop unless running in interactive mode or using pyside.
    import sys
    print "Plot time: %0.2f sec" % (pg.ptime.time()-now)
    if (sys.flags.interactive != 1) or not hasattr(QtCore, 'PYQT_VERSION'):
        app.exec_()

data = {}
for key in ['a','b']:
    data[key] = np.random.normal(size=(1000000), scale=1e-5)

scatter_matrix(data,['a','b'])

After a lot of searching around on the web, I ended up trying a gpu based plotting library, galry. The result was a speed up of greater than 100x! Below is the code. Regardless of this I'd still like to know if there are ways to speed up plotting using pyqtgraph.

import numpy as np
from galry import *
import time

class MyPaintManager(PlotPaintManager):
    def initialize(self):
        if self.parent.visual == BarVisual:
            self.add_visual(self.parent.visual, self.parent.x, primitive_type= self.parent.plot_type, color='b')
        elif self.parent.visual == PlotVisual:
            self.add_visual(self.parent.visual, x=self.parent.x, y=self.parent.y, primitive_type= self.parent.plot_type, color='b')

class MyWidget(GalryWidget):
    def initialize(self, x, y, visual, title=None, plot_type=None):
        self.activate_grid = True
        self.show_grid = True

        self.x = x
        self.y = y
        self.visual = visual
        self.plot_type = plot_type
        self.title = title

        self.set_bindings(PlotBindings)
        self.set_companion_classes(
            paint_manager=MyPaintManager,
            interaction_manager=PlotInteractionManager,)
        self.initialize_companion_classes()

def scatter_matrix(df, cols):
    now = time.time()

    class Window(QtGui.QWidget):
        def __init__(self):
            super(Window, self).__init__()
            self.initUI()

        def initUI(self):
            vbox = QtGui.QGridLayout()
            self.setLayout(vbox)
            self.setGeometry(300, 300, 600, 600)
            for i, col_i in enumerate(cols):
                for j, col_j in enumerate(cols):
                    x = df[col_i]
                    y = df[col_j]
                    if i == j:
                        y,x = np.histogram(x, bins=100)
                        vbox.addWidget(MyWidget(x=y,y=y, visual = BarVisual, title='{}_vs_{}'.format(col_i, col_j)), i, j)
                    else:
                        vbox.addWidget(MyWidget(x=x,y=y, visual = PlotVisual, title='{}_vs_{}'.format(col_i, col_j), plot_type='POINTS'), i, j)

            print "Plot time: %0.2f sec" % (time.time()-now)
            self.show()

    show_window(Window)

if __name__ == '__main__':
    data = {}
    for key in ['a','b']:
        data[key] = np.random.normal(size=(1000000), scale=1e-5)

    scatter_matrix(data,['a','b'])
役に立ちましたか?

解決

Your code looks fine. Depending on your system, scatterplot efficiency degrades around 10k to 100k points for pyqtgraph. If you really want to continue using pyqtgraph, all I can recommend is to subsample your data by 10x to 100x.

The amount of data you want to visualize almost requires GPU acceleration, so Galry is a good tool to use here. FYI, the developers of pyqtgraph, Galry, and a few other python graphics libraries are working together on VisPy, which is not quite ready for use, but should be a very good option in the future. PyQtGraph will also be using VisPy to provide GPU acceleration in the future.

ライセンス: CC-BY-SA帰属
所属していません StackOverflow
scroll top