Another example is a strict left fold. You can write an applicative instance which allows you to compose folds so that the resulting fold can be performed on the data in a single pass and constant space. However, the monad instance needs to re-iterate from the beginning of the data for each bind and keep the whole list in memory.
{-# LANGUAGE GADTs #-}
import Criterion.Main
import Data.Monoid
import Control.Applicative
import Control.Monad
import Prelude hiding (sum)
data Fold e r where
Step :: !(a -> e -> a) -> !a -> !(a -> r) -> Fold e r
Bind :: !(Fold e r) -> !(r -> Fold e s) -> Fold e s
data P a b = P !a !b
instance Functor (Fold e) where
fmap f (Step step acc ret) = Step step acc (f . ret)
fmap f (Bind fld g) = Bind fld (fmap f . g)
instance Applicative (Fold e) where
pure a = Step const a id
Step fstep facc fret <*> Step xstep xacc xret = Step step acc ret where
step (P fa xa) e = P (fstep fa e) (xstep xa e)
acc = P facc xacc
ret (P fa xa) = (fret fa) (xret xa)
Bind fld g <*> fldx = Bind fld ((<*> fldx) . g)
fldf <*> Bind fld g = Bind fld ((fldf <*>) . g)
instance Monad (Fold e) where
return = pure
(>>=) = Bind
fold :: Fold e r -> [e] -> r
fold (Step _ acc ret) [] = ret acc
fold (Step step acc ret) (x:xs) = fold (Step step (step acc x) ret) xs
fold (Bind fld g) lst = fold (g $ fold fld lst) lst
monoidalFold :: Monoid m => (e -> m) -> (m -> r) -> Fold e r
monoidalFold f g = Step (\a -> mappend a . f) mempty g
count :: Num n => Fold e n
count = monoidalFold (const (Sum 1)) getSum
sum :: Num n => Fold n n
sum = monoidalFold Sum getSum
avgA :: Fold Double Double
avgA = liftA2 (/) sum count
avgM :: Fold Double Double
avgM = liftM2 (/) sum count
main :: IO ()
main = defaultMain
[ bench "Monadic" $ nf (test avgM) 1000000
, bench "Applicative" $ nf (test avgA) 1000000
] where test f n = fold f [1..n]
I wrote the above from the top of my head as an example so it might not be the optimal implementation for applicative and monadic folds, but running the above gives me:
benchmarking Monadic
mean: 119.3114 ms, lb 118.8383 ms, ub 120.2822 ms, ci 0.950
std dev: 3.339376 ms, lb 2.012613 ms, ub 6.215090 ms, ci 0.950
benchmarking Applicative
mean: 51.95634 ms, lb 51.81261 ms, ub 52.15113 ms, ci 0.950
std dev: 850.1623 us, lb 667.6838 us, ub 1.127035 ms, ci 0.950