Question

I'm having some trouble making a sequence. Basically I need to chop a sequence into a sequence of arrays. Seq.windowed almost does it but I don't want duplicate elements.

I can get what I want by reading everything into an array first but I'd rather use a sequence.

let array_chunk s (a:int[]) =
    Array.init (a.Length / s) (fun i -> Array.sub a (i * s) s)

someSequence |> Seq.to_array |> array_chunk 5
Was it helpful?

Solution

Here's a nice imperative one that'll work with seq and generate arrays of any size. The last one will be smaller if the sequence isn't even by n.

let chunk n xs = seq {
    let i = ref 0
    let arr = ref <| Array.create n (Unchecked.defaultof<'a>)
    for x in xs do
        if !i = n then 
            yield !arr
            arr := Array.create n (Unchecked.defaultof<'a>)
            i := 0 
        (!arr).[!i] <- x
        i := !i + 1
    if !i <> 0 then
        yield (!arr).[0..!i-1] }

OTHER TIPS

I love Seq.take & Seq.skip solution. It is beautiful, simple and very readable, but I would use something like this:

let chunks n (sequence: seq<_>) =
    let fold_fce (i, s) value = 
        if i < n then (i+1, Seq.append s (Seq.singleton value))
                 else (  1, Seq.singleton value)
    in sequence
    |> Seq.scan (fold_fce) (0, Seq.empty)
    |> Seq.filter (fun (i,_) -> i = n)
    |> Seq.map (Seq.to_array << snd )

It is not imperative code and it should be more efficient than the solution that uses Seq.skip. On the other hand, it trims input sequence to the length divisible by n. If this behavior is unacceptable it can be fixed by simple modification:

let chunks n (sequence: seq<_>) =
    let fold_fce (i, s) value = 
        if i < n then (i+1, Seq.append s (Seq.singleton value))
                 else (  1, Seq.singleton value)
    in sequence
    |> Seq.map (Some)
    |> fun s -> Seq.init_finite (n-1) (fun _ -> None) |> Seq.append s
    |> Seq.scan (fold_fce) (0, Seq.empty)
    |> Seq.filter (fun (i,_) -> i = n) 
    |> Seq.map (Seq.to_array << (Seq.choose (id)) << snd )

This answer will probably get buried, but here's my take on the problem:

let chunk n xs = 
    xs 
    |> Seq.mapi(fun i x -> i/n, x)
    |> Seq.groupBy fst
    |> Seq.map (fun (_, g) -> Seq.map snd g)

Pros:

  • Uses only seq, no arrays
  • O(n) runtime. Not O(n^2) like Seq.skip/take solutions
  • Seq.length doesn't have to be a multiple of n
  • small and easy to understand?

Cons:

  • probably not as efficient as imperative/mutable loops

How about:

let rec chunks n sq =
  if not (Seq.is_empty sq) then 
    seq {
      yield Seq.take n sq |> Seq.to_array
      yield! chunks n (Seq.skip n sq)
    }
  else
    Seq.empty

Note that this requires sq to have a number of elements which is evenly divisible by n (because Seq.take and Seq.skip, unlike LINQ's Take and Skip extension methods, require that the sequence contains at least n elements). Also, this isn't as efficient as explicitly using the enumerator would be, but it's more elegant.

Corrected version of take/skip answer, as extension function. Should work for uneven lengths. No guarantees for performance though...

 module Seq = 
    let rec chunks n (s:#seq<_>) =
        seq {
                 if Seq.length s <= n then
                    yield s
                 else
                    yield Seq.take n s
                    yield! chunks n (Seq.skip n s)           
            }

(Code taken from my answer here)

This is nice and succinct:

let chunk size (arr : 'a array) =
    [| for a in 0 .. size .. arr.Length - size -> arr.[a..a + size - 1] |]

However, this lops off the last (arr.Length % size) elements in the array. You can fix this by grabbing the missing elements and using Array.append:

let chunk2 size (arr : 'a array) = 
    let first = [| for a in 0 .. size .. arr.Length - size -> arr.[a..a + size - 1] |]
    let numberOfMissingElements = arr.Length - (first.Length * size)
    if numberOfMissingElements > 0 then
        let last = [| arr.[arr.Length - numberOfMissingElements..] |]
        Array.append first last
    else first

Here's another approach with some pattern matching - it looks more like *.iter, and I've got it spitting out lists rather than arrays, since that's how I usually like my data.

let sequence_in_lists_of_length_n_with_acc (s: seq<'a>) n acc = seq {
use e = s.GetEnumerator()
let rec next_with_acc acc =
  match e.MoveNext(), acc with
  | true, a when List.length a + 1 = n ->  
    yield (List.rev (e.Current :: a))
    next_with_acc []
  | true, _ -> next_with_acc (e.Current :: acc)
  | false, _ ->
    f(List.rev acc)
  ()
next_with_acc []

}

I'm liking this solution better. It generates a new sequence from the existing sequence (meaning it doesn't need to traverse the whole sequence to get a result - that's critical if you're doing something like log processing, where you can't call things like Length).

I ended up writing a blog post with more detail about how I got here.

module Seq =  

let grouped_by_with_leftover_processing f (f2: 'a list -> list<'a> option) (s: seq<'a>)= let rec grouped_by_with_acc (f: 'a -> 'a list -> 'a list option * 'a list) acc (ie: IEnumerator<'a>) = seq { if ie.MoveNext() then let nextValue, leftovers = f ie.Current acc if nextValue.IsSome then yield nextValue.Value yield! grouped_by_with_acc f leftovers ie else let rems = f2 acc if rems.IsSome then yield rems.Value } seq { yield! grouped_by_with_acc f [] (s.GetEnumerator()) }

let YieldReversedLeftovers (f: 'a list) = if f.IsEmpty then None else Some (List.rev f)

let grouped_by f s = grouped_by_with_leftover_processing f YieldReversedLeftovers s

let group_by_length_n n s = let grouping_function newValue acc = let newList = newValue :: acc // If we have the right length, return // a Some as the first value. That'll // be yielded by the sequence. if List.length acc = n - 1 then Some (List.rev newList), [] // If we don't have the right length, // use None (so nothing will be yielded) else None, newList
grouped_by grouping_function s

Large sequences aren't an issue:

seq { for i in 1..1000000000 -> i} |> Seq.group_by_length_n 3;; val it : seq<int list> = seq [[1; 2; 3]; [4; 5; 6]; [7; 8; 9]; [10; 11; 12]; ...] >

Nice version from Princess has been fixed to get tail and converted into seq

let array_chunk size (arr : 'a array) = 
    let maxl = arr.Length - 1
    seq { for a in 0 .. size .. maxl -> arr.[a .. min (a + size - 1) maxl ] }

How about this one :

let grouped n = 
   Seq.unfold(fun s -> if not (Seq.isEmpty s) then 
                           Some (Seq.take n s, Seq.skip n s) 
                       else None) 

It is in the same vein than kvb's answer.

I somehow remember (link?) that a sequence does not remember the position, so that successive take / skip will not be optimal.

Here's @kvb's solution with the Seq.skip/take limitations fixed. It's small, elegant, and O(n).

let eSkip n s = System.Linq.Enumerable.Skip(s, n)

let rec seq_chunks n sq =
  if (Seq.isEmpty sq) 
  then Seq.empty
  else seq {
      yield Seq.truncate n sq
      yield! seq_chunks n (eSkip n sq)
 }

Here is my version taking an array as input and output :

  let chunk chunkNumber (array : _ array) =
    let chunkSize = array.Length/chunkNumber
    let mutable startIndex = 0
    [|
        let n1 = array.Length % chunkNumber
        for i = 1 to n1 do
            yield Array.sub array startIndex (chunkSize+1)
            startIndex <- startIndex + chunkSize+1

        let n2 = chunkNumber - n1
        for i = 1 to n2 do
            yield Array.sub array startIndex chunkSize
            startIndex <- startIndex + chunkSize
    |]

The function tries to make chunks of similar size (instead of getting a very small last chunk) and builds the output the same way you would build a sequence (making it easy to rewrite it in order to get a sequence as output)

Summarizing the above Chunking or Buffering or Segmenting of a seqence, list, or array. Two forms:

let rec chunk size xs = seq {
    yield Seq.take size xs
    yield! chunk size (Seq.skip size xs)
    }

or

let chunk size = Seq.unfold (fun xs ->
    match Seq.isEmpty xs with
    | false -> Some(Seq.take size xs, Seq.skip size xs)
    | _ -> None
    )

Note: If Seq functioned properly like a cursor (as I would have expected being a lazy evaluation), Seq.take would advance the position Seq.skip would not be necessary. However, that is not the case.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top