@Alex answer is good but, with only 256 values (indexes) an array should be better
import qualified Data.ByteString.Lazy as L
import qualified Data.Array.Unboxed as A
import qualified Data.ByteString as B
import Data.Int
import Data.Word
fq :: L.ByteString -> A.UArray Word8 Int64
fq = A.accumArray (+) 0 (0, 255) . map (\c -> (c, 1)) . concat . map B.unpack . L.toChunks
main = L.getContents >>= print . fq
@alex code take (for my sample file) 24.81 segs, using array take 7.77 segs.
UPDATED:
although Snoyman solution is better, an improvement avoiding unpack
maybe
fq :: L.ByteString -> A.UArray Word8 Int64
fq = A.accumArray (+) 0 (0, 255) . toCounterC . L.toChunks
where toCounterC [] = []
toCounterC (x:xs) = toCounter x (B.length x) xs
toCounter _ 0 xs = toCounterC xs
toCounter x i xs = (B.index x i', 1): toCounter x i' xs
where i' = i - 1
with ~50% speedup.
UPDATED:
Using IOVector
as Snoyman is as Conduit
version (a bit faster really, but this is a raw code, better use Conduit
)
import Data.Int
import Data.Word
import Control.Monad.IO.Class
import qualified Data.ByteString.Lazy as L
import qualified Data.Array.Unboxed as A
import qualified Data.ByteString as B
import qualified Data.Vector.Unboxed.Mutable as V
fq :: L.ByteString -> IO (V.IOVector Int64)
fq xs =
do
v <- V.replicate 256 0 :: IO (V.IOVector Int64)
g v $ L.toChunks xs
return v
where g v = toCounterC
where toCounterC [] = return ()
toCounterC (x:xs) = toCounter x (B.length x) xs
toCounter _ 0 xs = toCounterC xs
toCounter x i xs = do
let i' = i - 1
w = fromIntegral $ B.index x i'
c <- V.read v w
V.write v w (c + 1)
toCounter x i' xs
main = do
v <- L.getContents >>= fq
mapM_ (\i -> V.read v i >>= liftIO . putStr . (++", ") . show) [0..255]