Question

I'm still working on groking the F# thing - trying to work out how to 'think' in F# rather than just translating from other languages I know.

I've recently been thinking about the cases where you don't have a 1:1 map between before and after. Cases where List.map falls down.

One example of this is moving averages, where typically you will have len-n+1 results for a list of length len when averaging over n items.

For the gurus out there, is this a good way to do it (using queue pinched from Jomo Fisher)?

//Immutable queue, with added Length member
type Fifo<'a> =
    new()={xs=[];rxs=[]}
    new(xs,rxs)={xs=xs;rxs=rxs}

    val xs: 'a list;
    val rxs: 'a list;

    static member Empty() = new Fifo<'a>()
    member q.IsEmpty = (q.xs = []) && (q.rxs = [])
    member q.Enqueue(x) = Fifo(q.xs,x::q.rxs)
    member q.Length() = (List.length q.xs) + (List.length q.rxs)
    member q.Take() =
        if q.IsEmpty then failwith "fifo.Take: empty queue"
        else match q.xs with
                | [] -> (Fifo(List.rev q.rxs,[])).Take()
                | y::ys -> (Fifo(ys, q.rxs)),y

//List module, add function to split one list into two parts (not safe if n > lst length)
module List =
    let splitat n lst =
        let rec loop acc n lst =
            if List.length acc = n then
                (List.rev acc, lst)
            else
                loop (List.hd lst :: acc) n (List.tl lst)
        loop [] n lst

//Return list with moving average accross len elements of lst
let MovingAverage (len:int) (lst:float list) = 
    //ugly mean - including this in Fifo kills genericity
    let qMean (q:Fifo<float>) = ((List.sum q.xs) + (List.sum q.rxs))/(float (q.Length()))

    //get first part of list to initialise queue
    let (init, rest) = List.splitat len lst

    //initialise queue with first n items
    let q = new Fifo<float>([], init)

    //loop through input list, use fifo to push/pull values as they come
    let rec loop (acc:float list) ls (q:Fifo<float>) =
        match ls with
        | [] -> List.rev acc
        | h::t -> 
            let nq = q.Enqueue(h) //enqueue new value
            let (nq, _) = nq.Take() //drop old value
            loop ((qMean nq)::acc) t nq //tail recursion

    loop [qMean q] rest q

//Example usage    
MovingAverage 3 [1.;1.;1.;1.;1.;2.;2.;2.;2.;2.]

(Maybe a better way would be to implement a MovingAverageQueue by inheriting from Fifo?)

Was it helpful?

Solution

If you don't care too much about performance, here is a very simple solution:

#light

let MovingAverage n s =
   Seq.windowed n s
   |> Seq.map Array.average

let avgs = MovingAverage 5000 (Seq.map float [|1..999999|])

for avg in avgs do
    printfn "%f" avg
    System.Console.ReadKey() |> ignore

This recomputes the average of each 'window' from scratch, so it is poor if the windows are large.

In any case, check out Seq.windowed:

http://research.microsoft.com/projects/cambridge/fsharp/manual/FSharp.Core/Microsoft.FSharp.Collections.Seq.html

as it's handy to have in your back pocket for such things.

OTHER TIPS

If you do care about performance, then you can calculate a moving average efficiently using something like this (assuming we're calculating a moving average over a 3-day window)

Numbers[n]    Running Total[n]
---------     ---------------
n[0] = 7       7 = Numbers[0]
n[1] = 1       8 = RunningTotal[1-1] + Numbers[1]
n[2] = 2      10 = RunningTotal[2-1] + Numbers[2]
n[3] = 8      11 = RunningTotal[3-1] + Numbers[3] - Numbers[3-3]
n[4] = 4      14 = RunningTotal[4-1] + Numbers[4] - Numbers[4-3]
n[5] = 1      13 = RunningTotal[5-1] + Numbers[5] - Numbers[5-3] 
n[6] = 9      14 = RunningTotal[6-1] + Numbers[6] - Numbers[6-3]
...
N             RunningTotal[N] = RunningTotal[N-1] + Numbers[N] - Numbers[N-3]

The hard part about this is holding on your previous running total and NumberN-window. I came up with the following code:

let movingAverage days l =
    seq {
        let queue = new Queue<_>(days : int)
        let divisor = decimal days

        let total = ref 0m
        for cur in l do
            queue.Enqueue(cur)
            total := !total + cur
            if queue.Count < days then
                yield (cur, 0m)
            else
                yield (cur, !total / divisor)
                total := !total - (queue.Dequeue())
    }

This version isn't as nice looking as the Haskell code, but it should avoid performance problems associated with recomputing your "window" on each run. It keeps a running total and holds previously used numbers in a queue, so it should be very fast.

Just for fun, I wrote a simple benchmark:

#light
open System
open System.Collections.Generic
open System.Diagnostics;

let windowAverage days (l : #seq<decimal>) = Seq.windowed days l |> Seq.map (Seq.average)

let princessAverage days l =
    seq {
        let queue = new Queue<_>(days : int)
        let divisor = decimal days

        let total = ref 0m
        for cur in l do
            queue.Enqueue(cur)
            total := !total + cur
            if queue.Count < days then
                yield (cur, 0m)
            else
                yield (cur, !total / divisor)
                total := !total - (queue.Dequeue())
    }

let testData =
    let rnd = new System.Random()
    seq { for a in 1 .. 1000000 -> decimal (rnd.Next(1000)) }

let benchmark msg f iterations =
    let rec loop = function
        | 0 -> ()
        | n -> f 3 testData |> ignore; loop (n - 1)

    let stopWatch = Stopwatch.StartNew()
    loop iterations
    stopWatch.Stop()
    printfn "%s: %f" msg stopWatch.Elapsed.TotalMilliseconds

let _ =
    let iterations = 10000000
    benchmark "princessAverage" princessAverage iterations
    benchmark "windowAverage" windowAverage iterations
    printfn "Done"

Results:

princessAverage: 1670.791800
windowAverage: 2986.146900

My version is ~1.79x faster.

Here's a (corrected, I hope) F# version of the Haskell solution proposed here.

EDIT: Now tail-recursive, not any faster, but doesn't explode with n = 50000. (see edit history for non-tail-recursive version)

let LimitedAverage n ls = 
    let rec loop acc i n ls = 
        match i with
        | 0 -> acc //i counts down from n to 0, when we hit 0 we stop
        | _ -> match ls with
               | [] -> acc //if we hit empty list before end of n, we stop too
               | x::xs -> (loop (acc + (x / float n)) (i - 1) n xs) //divide this value by n, perform average on 'rest' of list
    loop 0. n n ls

LimitedAverage 50000 (List.map float [1..9999999])
//>val it : float = 25000.5

let rec MovingAverage3 n ls = 
    let rec acc loop i n ls = 
        match i with 
        | 0 -> List.rev acc //i counts down from n to 0, when we hit 0 we stop
        | _ -> match ls with
                | [] -> List.rev acc //if we hit empty list before end of n, we stop too
                | x::xs -> loop (LimitedAverage2 n ls :: acc) (i - 1) n xs // limited average on whole list, then moving average on tail
    loop [] (n + 1) n ls 

MovingAverage3 50000 (List.map float [1..9999999])
//>val it : float list = [25000.5; 25001.5; 25002.5; ...]

If you care about performance and like elegant code then try

module MovingAverage = 
    let selfZip n l =
        Seq.skip n l |> Seq.zip l 

    let runTotal i z =
        Seq.scan ( fun sum (s, e) -> sum - s + e ) i z

    let average n l:seq<'a> =
        Seq.skip n l
        |> selfZip n
        |> runTotal (l |> Seq.take n |> Seq.sum)
        |> Seq.map ( fun sum -> decimal sum / decimal n ) 

 let ma = MovingAverage.average 2 myseq

Using FSUnit we can test it

 let myseq = seq { for i in 0..10 do yield i }

 Seq.nth 0 ma |> should equal 0.5
    Seq.nth 1 ma |> should equal 1.5
    Seq.nth 2 ma |> should equal 2.5
    Seq.nth 3 ma |> should equal 3.5

The trick of the algorithm is the first sum the first n numbers and then maintain a running total by adding the head of the window and subtracting the tail of the window. The sliding window is achieved by doing a self zip on the sequence but with the second argument to zip advanced by the window size.

At the end of the pipeline we just divide the running total by the window size.

Note scan is just like fold but yield each version of the state into a sequence.

An even more elegant solution though possibley with performance hit is to make the observation that if we zero pad the sequence we don't need to calculate the initial sum.

namespace Utils

module MovingAverage = 
    let selfZip n l =
        Seq.skip n l |> Seq.zip l 

    let rec zeros = 
        seq { yield 0.0; yield! zeros} 

    // Create a running total given
    let runTotal z =
        Seq.scan (fun sum (s,e) -> sum - s + e ) 0.0 z

    let average n l =
        Seq.concat [(Seq.take n zeros); l]
        |> selfZip n
        |> runTotal
        |> Seq.map ( fun sum -> sum / float n ) 
        |> Seq.skip n

There could be a performance hit due to the second indirection related to the wrapping of the two sequences but perhaps it is not significant depending on the size of the window

This is my version.

let sma list n =
    let len = List.length list
    let rec loop acc sum cnt =
        if cnt >= len then List.rev acc
        else if cnt < n-1 then loop (0.0::acc) (sum + List.nth list cnt) (cnt+1)
        else loop (((sum + List.nth list cnt)/(float n))::acc) (sum + (List.nth list cnt) - (List.nth list (cnt-n+1))) (cnt+1)
    loop [] 0.0 0

Example:

sma (List.map float [5..50]) 5
[0, 0, 0, 0, 7, 8, 9, ...]

As far as I can see, your code is full of let statements. I'm not familiar with F# but did do some Haskell. The functional paradigm means not thinking about "how" but about "what": you think Fifo, but you should actually just specify the semantics of the moving average.

-- the limited average of a list
limitedaverage 0 _ = 0
limited verage n (x:xs) = (x/n) + ( limited average (n-1) xs )

-- a list, transformed into a sequence of moving averages of 
movingaverages n [] = []
movingaverages n (x:xs) = ( movingaverage n (x:xs) : movingaverages n xs )
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top