Question

I have the following code that is the bottleneck in my Python code:

def get_payoff(self,  actual, predicted):
    if abs(actual - 1.0) < 1e-5:  # if actual == 1
        if predicted < 0.5:
            return self.fn_payoff * (0.5 - predicted)
        elif predicted > 0.5:
            return self.tp_payoff * (predicted - 0.5)
        else:
            return 0
    else:
        if predicted < 0.5:
            return self.tn_payoff * (0.5 - predicted)
        elif predicted > 0.5:
            return self.fp_payoff * (predicted - 0.5)
        else:
            return 0

def get_total_payoff(self):
    total_payoff = 0
    for target_element, prediction_element in zip(np.nditer(self.target), np.nditer(predictions)):
        total_payoff += self.get_payoff(target_element, prediction_element)

fn_payoff, tp_payoff, tn_payoff, and fp_payoff are all floats. self.target and self.predictions are both numpy ndarrays.

I assume there's some way to do replace the for loop in get_total_payoff with some kind of numpy vectorization, but I don't know how to handle the if/then statements to do the vectorization properly.

Was it helpful?

Solution 2

def _get_payoff(self, actual, predicted):
    pred_factor = numpy.abs(0.5 - predicted)
    payoff_selector = 2*numpy.isclose(actual, 1) + (predicted < 0.5)
    payoff = numpy.choose(payoff_selector,
                          [
                              self.fp_payoff,
                              self.tn_payoff,
                              self.tp_payoff,
                              self.fn_payoff,
                          ])
    return numpy.sum(payoff * pred_factor)

def get_total_payoff(self):
    return self._get_payoff(self.target, predictions)

We use numpy.choose to generate an array of payoff selections and multiply that with an array of absolute differences between 0.5 and the prediction values, then sum. numpy.isclose is used to test whether the actual values are close to 1. We can ignore the predicted == 0.5 case, since multiplying by numpy.abs(0.5 - predicted) gives the correct result of 0 anyway. If self.target and predictions are guaranteed to be 1D, numpy.dot is likely to perform better than separately multiplying and summing.

OTHER TIPS

The key for vectorizing functions which use different expressions based on a condition is using np.choose. Also, in your case, predict-0.5 and 0.5-predict can be replaced by abs(predict-0.5), plus special handling of the case where predict==0.5 (I'm guessing the special handling is there for correct handling of NaN's).

import numpy as np

class A(object):
    def __init__(self):
        self.fn_payoff = 222.
        self.tn_payoff = 444.
        self.fp_payoff = 777.
        self.tp_payoff = 888.
        self.target = np.array([ 0.3, 1., 2. ])
        self.predictions = np.array([ 0.4, 0.5, 1.7 ])

    def get_payoff(self,  actual, predicted):
        if abs(actual - 1.0) < 1e-5:  # if actual == 1
            if predicted < 0.5:
                return self.fn_payoff * (0.5 - predicted)
            elif predicted > 0.5:
                return self.tp_payoff * (predicted - 0.5)
            else:
                return 0
        else:
            if predicted < 0.5:
                return self.tn_payoff * (0.5 - predicted)
            elif predicted > 0.5:
                return self.fp_payoff * (predicted - 0.5)
            else:
                return 0

    def get_total_payoff(self):
        total_payoff = 0
        for target_element, prediction_element in zip(np.nditer(self.target), np.nditer(self.predictions)):
            total_payoff += self.get_payoff(target_element, prediction_element)
        return total_payoff

    def get_total_payoff_VECTORIZED(self):
        actual_mask = np.abs(self.target - 1) < 1e-5
        predict_mask = self.predictions < 0.5
        payoff_n = np.choose(actual_mask, [ self.tn_payoff, self.fn_payoff ])
        payoff_p = np.choose(actual_mask, [ self.fp_payoff, self.tp_payoff ])
        payoff = np.choose(predict_mask, [ payoff_p, payoff_n ]) * abs(self.predictions-0.5)
        payoff[self.predictions==0.5] = 0
        return payoff.sum()

a = A()
print a.get_total_payoff()
=> 976.8
print a.get_total_payoff_VECTORIZED()
=> 976.8
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top