I thought it might be interesting to benchmark the three solutions that have been been contributed to date. If anyone wants to see additional tests, please let me know and I'll add them.
Niklas1
def enumerate_part(i, j, lo, y)
# i = index of the current part
# j = index of the current number in the current part
# lo = lower bound for numbers to chose
# y = output yielder
if i == @parts.size
y << @parts.map(&:dup)
return
end
if j == @sizes[i]
enumerate_part(i + 1, 0, @sizes[i + 1] == @sizes[i] ? @parts[i][0] : 1, y)
return
end
(lo..@n).each do |x|
next if @used[x]
@used[x] = true
@parts[i][j] = x
enumerate_part(i, j + 1, x + 1, y)
@used[x] = false
end
end
def niklas1(n, k)
@n, @k = n, k
@used = n.times.map { false }
# @sizes = [2,2,3] for n = 7, k = 3
@sizes = (k - n % k).times.map { n / k } + (n % k).times.map { (n + k - 1) / k }
@parts = @sizes.map { |i| [0]*i }
results = Enumerator.new { |y| enumerate_part(0,0,1,y) }
cnt = results.count
puts "count = #{cnt} != #{@sz}" unless cnt == @sz
end
Niklas2
def dfs(i, j, lo, eq, sz, state)
parts, y, n, k, left = state
if i == k
y << parts
return
end
if j == sz
mid = i+1 == n % k
neweq, newsz = mid ? [k - i - 1, sz-1] : [eq - 1, sz]
dfs(i+1, 0, mid ? 1 : parts[i][0], neweq, newsz, state)
return
end
higher = ((j == 0) ? (eq * sz - j - 1) : (sz - j - 1))
left.drop(higher).each_with_index do |x,idx|
break if x < lo
parts[i][j] = x
left.delete_at(idx + higher)
dfs(i, j + 1, x + 1, eq, sz, state)
left.insert(idx + higher, x)
end
end
def niklas2(n, k)
parts = k.times.map{|i| [0]*(i < n%k ? (n+k-1)/k : n/k) }
left = n.downto(1).to_a
results = Enumerator.new { |y|
dfs(0, 0, 1, (n % k == 0) ? k : n % k, (n + k - 1) / k, [parts, y, n, k, left])
}
cnt = results.count
puts "count = #{cnt} != #{@sz}" unless cnt == @sz
end
Marc-André
def enum(n, k)
# Pick smaller_size items from the list, repeat smaller_n times
# then pick larger_size items from the list, repeat larger_n times.
smaller_n = n.div k
larger_times = n % k
smaller_times = k - larger_times
larger_n = smaller_n + 1
return to_enum(:enum, n, k) { calc_size(n, smaller_n, smaller_times, larger_n, larger_times) } unless block_given?
all = [*1..n]
# split all into one subset to group with the smaller_n and another
# to group with the larger_n.
all.combination(smaller_n * smaller_times).each do |smaller|
larger = all - smaller
subdivide(smaller, smaller_n) do |small|
subdivide(larger, larger_n) do |large|
yield [*small, *large]
end
end
end
end
# Subdivides elems into groups of n, keeping the elements sorted
# and generating only the sorted such combinations.
def subdivide(elems, n)
return yield [] if elems.empty?
# No choice for the first element, because we want to keep things sorted.
first, *rest = elems
rest.combination(n - 1).each do |comb|
remain = rest - comb
subdivide(remain, n) do |sub|
yield [[first, *comb], *sub]
end
end
end
def calc_size(n, smaller_n, smaller_times, larger_n, larger_times)
all = [
smaller_times.times.map do |i|
Array.new(n - i*smaller_n).combination(smaller_n)
end,
larger_times.times.map do |i|
Array.new(n - smaller_times*smaller_n - i*larger_n).combination(larger_n)
end
]
# Multiply everything, but divide by the number of symmetries, because
# we don't want to distinguish (1,2), (3,4), ... from (3,4), (1,2), ...
all.map do |enums|
enums.map(&:size).inject(1, :*) / enums.permutation.size
end.inject(:*)
end
def marc_andre(n, k)
a = enum(n, k).to_a
a.size
end
Cary1
require 'set'
def cary1(n, k)
m, r = n.divmod(k)
cnt = doit([*1..n], Array.new(k-r, m) + Array.new(r, m+1)).to_a.map(&:to_a).size
puts "count = #{cnt} != #{@sz}" unless cnt == @sz
end
def doit(items, subs, so_far=[], combos=Set.new)
return combos << (so_far+[items]).to_set if subs.size == 1
items.combination(subs.first){|c|doit(items-c,subs[1..-1],so_far+[c],combos)}
combos
end
Cary2
def cary2(n, k)
m, r = n.divmod(k)
cnt = doit2([*1..n], Array.new(k-r, m) + Array.new(r, m+1)).to_a.map(&:to_a).size
puts "count = #{cnt} != #{@sz}" unless cnt == @sz
end
def doit2(list, group_sizes)
@group_sizes = group_sizes
@combos = []
@empty_group_filled_by_size = group_sizes.uniq.product([false]).to_h
recurse(list, group_sizes.map { |sub| [] })
@combos
end
def recurse(remaining_items_to_assign, so_far)
empty_group_filled_by_size = @empty_group_filled_by_size.dup
so_far.each_with_index do |group, ndx|
next if group.size == @group_sizes[ndx] # all positions in group allocated
if group.empty?
group_size = @group_sizes[ndx]
empty_group_filled_by_size[group_size] ? next :
empty_group_filled_by_size[group_size] = true
end
so_far[ndx] << remaining_items_to_assign.first
if remaining_items_to_assign.size == 1
@combos << so_far.map { |group| group.dup } # deep copy
else
recurse(remaining_items_to_assign[1..-1], so_far)
end
so_far[ndx].pop
end
end
Benchmark code
require 'benchmark'
def bench_it(n, k, iterations)
puts "\nn = #{n}, k = #{k}, size = #{@sz = marc_andre(n,k)}\n"
Benchmark.bm(%w[Niklas Marc-André Cary].map(&:size).max) do |bm|
bm.report('Niklas1') do
iterations.times do
niklas1(n, k)
end
end
bm.report('Niklas2') do
iterations.times do
niklas2(n, k)
end
end
bm.report('Marc-André') do
iterations.times do
marc_andre(n, k)
end
end
bm.report('Cary1') do
iterations.times do
cary1(n, k)
end
end
bm.report('Cary2') do
iterations.times do
cary2(n, k)
end
end
end
end
iterations = 1
bench_it( 7, 3, iterations)
bench_it(10, 2, iterations)
bench_it(10, 3, iterations)
bench_it(10, 4, iterations)
bench_it(13, 2, iterations)
bench_it(13, 3, iterations)
bench_it(13, 4, iterations)
bench_it(13, 5, iterations)
bench_it(16, 2, iterations)
bench_it(16, 3, iterations)
bench_it(16, 4, iterations)
bench_it(18, 2, iterations)
bench_it(18, 3, iterations)
bench_it(20, 2, iterations)
Results
The results are best read with this clip playing in the background.
n = 7, k = 3, size = 105
user system total real
Niklas1 0.000000 0.000000 0.000000 ( 0.001131)
Niklas2 0.000000 0.000000 0.000000 ( 0.000595)
Marc-André 0.000000 0.000000 0.000000 ( 0.000911)
Cary1 0.000000 0.000000 0.000000 ( 0.003241)
Cary2 0.000000 0.000000 0.000000 ( 0.000922)
n = 10, k = 2, size = 126
user system total real
Niklas1 0.010000 0.000000 0.010000 ( 0.004986)
Niklas2 0.000000 0.000000 0.000000 ( 0.001031)
Marc-André 0.000000 0.000000 0.000000 ( 0.000880)
Cary1 0.000000 0.000000 0.000000 ( 0.003850)
Cary2 0.000000 0.000000 0.000000 ( 0.001607)
n = 10, k = 3, size = 2100
user system total real
Niklas1 0.040000 0.000000 0.040000 ( 0.042930)
Niklas2 0.010000 0.000000 0.010000 ( 0.012296)
Marc-André 0.020000 0.000000 0.020000 ( 0.017632)
Cary1 0.080000 0.000000 0.080000 ( 0.081082)
Cary2 0.020000 0.000000 0.020000 ( 0.018739)
n = 10, k = 4, size = 6300
user system total real
Niklas1 0.090000 0.000000 0.090000 ( 0.094029)
Niklas2 0.030000 0.000000 0.030000 ( 0.030908)
Marc-André 0.040000 0.000000 0.040000 ( 0.038247)
Cary1 0.510000 0.000000 0.510000 ( 0.512819)
Cary2 0.060000 0.000000 0.060000 ( 0.059061)
n = 13, k = 2, size = 1716
user system total real
Niklas1 0.210000 0.000000 0.210000 ( 0.219898)
Niklas2 0.020000 0.000000 0.020000 ( 0.017828)
Marc-André 0.020000 0.000000 0.020000 ( 0.015917)
Cary1 0.030000 0.000000 0.030000 ( 0.029272)
Cary2 0.020000 0.000000 0.020000 ( 0.022058)
n = 13, k = 3, size = 45045
user system total real
Niklas1 1.480000 0.010000 1.490000 ( 1.484895)
Niklas2 0.350000 0.000000 0.350000 ( 0.343872)
Marc-André 0.470000 0.010000 0.480000 ( 0.493646)
Cary1 1.890000 0.010000 1.900000 ( 1.895684)
Cary2 0.520000 0.010000 0.530000 ( 0.531843)
n = 13, k = 4, size = 200200
user system total real
Niklas1 4.160000 0.070000 4.230000 ( 4.225072)
Niklas2 1.210000 0.000000 1.210000 ( 1.216814)
Marc-André 2.170000 0.030000 2.200000 ( 2.203629)
Cary1 29.930000 0.580000 30.510000 ( 30.545276)
Cary2 1.720000 0.040000 1.760000 ( 1.757895)
n = 13, k = 5, size = 600600
user system total real
Niklas1 12.750000 0.040000 12.790000 ( 12.800083)
Niklas2 3.070000 0.010000 3.080000 ( 3.085927)
Marc-André 3.630000 0.010000 3.640000 ( 3.637411)
Cary1 191.200000 0.890000 192.090000 (192.319270)
Cary2 5.290000 0.300000 5.590000 ( 5.625138)
n = 16, k = 2, size = 6435
user system total real
Niklas1 2.120000 0.010000 2.130000 ( 2.131065)
Niklas2 0.090000 0.010000 0.100000 ( 0.092635)
Marc-André 0.080000 0.000000 0.080000 ( 0.076312)
Cary1 0.290000 0.000000 0.290000 ( 0.295062)
Cary2 0.070000 0.000000 0.070000 ( 0.071995)
n = 16, k = 3, size = 1009008
user system total real
Niklas1 62.370000 0.940000 63.310000 ( 63.404404)
Niklas2 8.070000 0.020000 8.090000 ( 8.099837)
Marc-André 14.080000 0.330000 14.410000 ( 14.424437)
Cary1 48.930000 0.220000 49.150000 ( 49.227445)
Cary2 13.540000 0.280000 13.820000 ( 13.856880)
n = 16, k = 4, size = 2627625
user system total real
Niklas2 22.530000 0.290000 22.820000 ( 23.252738)
Marc-André 35.850000 1.160000 37.010000 ( 37.159520)
Cary2 39.850000 0.860000 40.710000 ( 40.780489)
n = 18, k = 2, size = 24310
user system total real
Niklas2 0.280000 0.000000 0.280000 ( 0.288286)
Marc-André 0.240000 0.020000 0.260000 ( 0.262669)
Cary2 0.240000 0.010000 0.250000 ( 0.245008)
n = 18, k = 3, size = 2858856
user system total real
Niklas2 37.150000 2.670000 39.820000 ( 48.484321)
Marc-André 68.010000 1.350000 69.360000 ( 70.249403)
Cary2 48.300000 0.840000 49.140000 ( 49.591279)
n = 20, k = 2, size = 92378
user system total real
Niklas2 1.260000 0.000000 1.260000 ( 1.271094)
Marc-André 1.180000 0.040000 1.220000 ( 1.205478)
Cary2 1.210000 0.010000 1.220000 ( 1.232024)
Interpretation
We appear to have a clear winner. Congratulations, Marc-André! Niklas, no gold medal this time, but silver's not bad.
I will speak to my code and let @Niklas and @Marc-André speak to theirs, as they better understand how their code works. My code appears to work reasonable well when the value of k
is small (2 or 3). As that parameter increases, however, my code spends an increasing percentage of time throwing out duplicates (e.g., dismissing [[2,3],[3,4,5],[6,7]
when I already "have" [[6,7,[3,4,5][2,3]]
. In particular, see the results for n = 13, k = 5
. That's in part because I designed the code for arbitrary partitions, not just for those that look like [m, m,...,m, m+1, m+1,...m+1]
. I will have a look at whether there is anything I can do to exploit that structure (without drastically changing my code), but I'm not optimistic about the outcome.
Edit
I've updated the benchmarks to include the results for my second solution ("cary2"). It posts solution times that are roughly comparable to @Marc-André
's, which is not surprising considering that both algorithms avoid unnecessary enumerations of combinations.
Edit 2
I've updated the benchmarks to include @Niklas' second solution. I've labelled the new one "Niklas2" and the original one "Nicklas1". I also added two tests for n = 18
. I tried running n = 20, k = 3
(for "Niklas1", "Marc-André" and "Cary2"), but cancelled it after 5 minutes or so, as it had not obtained an initial solution.
I believe the results speak for themselves.