Yes, your trick allows to avoid branch and it makes it faster... sometimes.
I wrote benchmark that compares these solutions in various situations, along with my own:
ticks += mStepSize & -static_cast<int>(running)
My results are following:
Off:
branch: 399949150
mul: 399940271
andneg: 277546678
On:
branch: 204035423
mul: 399937142
andneg: 277581853
Pattern:
branch: 327724860
mul: 400010363
andneg: 277551446
Random:
branch: 915235440
mul: 399916440
andneg: 277537411
Off
is when timers are turned off. In this cases solutions take about the same time.
On
is when they are turned on. Branching solution two times faster.
Pattern
is when they are in 100110 pattern. Performance is similar, but branching is a bit faster.
Random
is when branch is unpredictable. In this case multiplications is more than 2 times faster.
In all cases my bit-hacking trick is fastest, except for On
where branching wins.
Note that this benchmark is not necessarily representative for all compiler versions processors etc. Even small changes of benchmark can turn results upside down (for example if compiler can inline knowing mStepSize
is 1
than multiplication can be actually fastest).
Code of the benchmark:
#include<array>
#include<iostream>
#include<chrono>
struct Timer {
bool running{false};
int ticks{0};
void branch(int mStepSize) {
if(running) ticks += mStepSize;
}
void mul(int mStepSize) {
ticks += mStepSize * static_cast<int>(running);
}
void andneg(int mStepSize) {
ticks += mStepSize & -static_cast<int>(running);
}
};
void run(std::array<Timer, 256>& timers, int step) {
auto start = std::chrono::steady_clock::now();
for(int i = 0; i < 1000000; i++)
for(auto& t : timers)
t.branch(step);
auto end = std::chrono::steady_clock::now();
std::cout << "branch: " << (end - start).count() << std::endl;
start = std::chrono::steady_clock::now();
for(int i = 0; i < 1000000; i++)
for(auto& t : timers)
t.mul(step);
end = std::chrono::steady_clock::now();
std::cout << "mul: " << (end - start).count() << std::endl;
start = std::chrono::steady_clock::now();
for(int i = 0; i < 1000000; i++)
for(auto& t : timers)
t.andneg(step);
end = std::chrono::steady_clock::now();
std::cout << "andneg: " << (end - start).count() << std::endl;
}
int main() {
std::array<Timer, 256> timers;
int step = rand() % 256;
run(timers, step); // warm up
std::cout << "Off:\n";
run(timers, step);
for(auto& t : timers)
t.running = true;
std::cout << "On:\n";
run(timers, step);
std::array<bool, 6> pattern = {1, 0, 0, 1, 1, 0};
for(int i = 0; i < 256; i++)
timers[i].running = pattern[i % 6];
std::cout << "Pattern:\n";
run(timers, step);
for(auto& t : timers)
t.running = rand()&1;
std::cout << "Random:\n";
run(timers, step);
for(auto& t : timers)
std::cout << t.ticks << ' ';
return 0;
}