First of all I would rather do as Baldrick suggests in a comment to the OP then I would try the other alternatives cited in the OP. I would everytime profiling/mesuring the results to make an informed decision.
If you are not yet satisfied, then I suggest something along these lines:
template <typename T>
void doStuffImpl(const T &obj) {
Calculator calc;
for(int i = 0; i < 1000000; i++)
calc.proceed(obj);
}
void doStuff(const std::shared_ptr<Base> &base_ptr) {
auto a_ptr = std::dynamic_pointer_cast<DerivedA>(base_ptr);
if (a_ptr)
doStuffImpl(*a_ptr);
auto b_ptr = std::dynamic_pointer_cast<DerivedB>(base_ptr);
if (b_ptr)
doStuffImpl(*b_ptr);
}