Question

Consider the class below that represents a Broker:

public class Broker
{
    public string Name = string.Empty;
    public int Weight = 0;

    public Broker(string n, int w)
    {
        this.Name = n;
        this.Weight = w;
    }
}

I'd like to randomly select a Broker from an array, taking into account their weights.

What do you think of the code below?

class Program
    {
        private static Random _rnd = new Random();

        public static Broker GetBroker(List<Broker> brokers, int totalWeight)
        {
            // totalWeight is the sum of all brokers' weight

            int randomNumber = _rnd.Next(0, totalWeight);

            Broker selectedBroker = null;
            foreach (Broker broker in brokers)
            {
                if (randomNumber <= broker.Weight)
                {
                    selectedBroker = broker;
                    break;
                }

                randomNumber = randomNumber - broker.Weight;
            }

            return selectedBroker;
        }


        static void Main(string[] args)
        {
            List<Broker> brokers = new List<Broker>();
            brokers.Add(new Broker("A", 10));
            brokers.Add(new Broker("B", 20));
            brokers.Add(new Broker("C", 20));
            brokers.Add(new Broker("D", 10));

            // total the weigth
            int totalWeight = 0;
            foreach (Broker broker in brokers)
            {
                totalWeight += broker.Weight;
            }

            while (true)
            {
                Dictionary<string, int> result = new Dictionary<string, int>();

                Broker selectedBroker = null;

                for (int i = 0; i < 1000; i++)
                {
                    selectedBroker = GetBroker(brokers, totalWeight);
                    if (selectedBroker != null)
                    {
                        if (result.ContainsKey(selectedBroker.Name))
                        {
                            result[selectedBroker.Name] = result[selectedBroker.Name] + 1;
                        }
                        else
                        {
                            result.Add(selectedBroker.Name, 1);
                        }
                    }
                }


                Console.WriteLine("A\t\t" + result["A"]);
                Console.WriteLine("B\t\t" + result["B"]);
                Console.WriteLine("C\t\t" + result["C"]);
                Console.WriteLine("D\t\t" + result["D"]);

                result.Clear();
                Console.WriteLine();
                Console.ReadLine();
            }
        }
    }

I'm not so confident. When I run this, Broker A always gets more hits than Broker D, and they have the same weight.

Is there a more accurate algorithm?

Thanks!

Was it helpful?

Solution

Your algorithm is nearly correct. However, the test should be < instead of <=:

if (randomNumber < broker.Weight)

This is because 0 is inclusive in the random number while totalWeight is exclusive. In other words, a broker with weight 0 would still have a small chance of being selected – not at all what you want. This accounts for broker A having more hits than broker D.

Other than that, your algorithm is fine and in fact the canonical way of solving this problem.

OTHER TIPS

How about something a little more generic, that can be used for any data type?

using System;
using System.Linq;
using System.Collections;
using System.Collections.Generic;

public static class IEnumerableExtensions {

    public static T RandomElementByWeight<T>(this IEnumerable<T> sequence, Func<T, float> weightSelector) {
        float totalWeight = sequence.Sum(weightSelector);
        // The weight we are after...
        float itemWeightIndex =  new Random().NextDouble * totalWeight;
        float currentWeightIndex = 0;

        foreach(var item in from weightedItem in sequence select new { Value = weightedItem, Weight = weightSelector(weightedItem) }) {
            currentWeightIndex += item.Weight;

            // If we've hit or passed the weight we are after for this item then it's the one we want....
            if(currentWeightIndex >= itemWeightIndex)
                return item.Value;

        }

        return default(T);

    }

}

Simply call by

    Dictionary<string, float> foo = new Dictionary<string, float>();
    foo.Add("Item 25% 1", 0.5f);
    foo.Add("Item 25% 2", 0.5f);
    foo.Add("Item 50%", 1f);

    for(int i = 0; i < 10; i++)
        Console.WriteLine(this, "Item Chosen {0}", foo.RandomElementByWeight(e => e.Value));
class Program
{
    static void Main(string[] args)
    {
        var books = new List<Book> {
        new Book{Isbn=1,Name="A",Weight=1},
        new Book{Isbn=2,Name="B",Weight=100},
        new Book{Isbn=3,Name="C",Weight=1000},
        new Book{Isbn=4,Name="D",Weight=10000},
        new Book{Isbn=5,Name="E",Weight=100000}};

        Book randomlySelectedBook = WeightedRandomization.Choose(books);
    }
}

public static class WeightedRandomization
{
    public static T Choose<T>(List<T> list) where T : IWeighted
    {
        if (list.Count == 0)
        {
            return default(T);
        }

        int totalweight = list.Sum(c => c.Weight);
        Random rand = new Random();
        int choice = rand.Next(totalweight);
        int sum = 0;

        foreach (var obj in list)
        {
            for (int i = sum; i < obj.Weight + sum; i++)
            {
                if (i >= choice)
                {
                    return obj;
                }
            }
            sum += obj.Weight;
        }

        return list.First();
    }
}

public interface IWeighted
{
    int Weight { get; set; }
}

public class Book : IWeighted
{
    public int Isbn { get; set; }
    public string Name { get; set; }
    public int Weight { get; set; }
}

An alternative method favours speed when selecting the broker over memory usage. Basically we create the list containing the same number of references to a broker instance as the specified weight.

List<Broker> brokers = new List<Broker>();
for (int i=0; i<10; i++)
    brokers.Add(new Broker("A", 10));
for (int i=0; i<20; i++)
    brokers.Add(new Broker("B", 20));
for (int i=0; i<20; i++)
    brokers.Add(new Broker("C", 20));
for (int i=0; i<10; i++)
    brokers.Add(new Broker("D", 10));

Then, to select a randomly weighted instance is an O(1) operation:

int randomNumber = _rnd.Next(0, brokers.length);
selectedBroker = brokers[randomNumber];

Since this is the top result on Google:

I've created a C# library for randomly selected weighted items.

  • It implements both the tree-selection and walker alias method algorithms, to give the best performance for all use-cases.
  • It is unit-tested and optimized.
  • It has LINQ support.
  • It's free and open-source, licensed under the MIT license.

Some example code:

IWeightedRandomizer<string> randomizer = new DynamicWeightedRandomizer<string>();
randomizer["Joe"] = 1;
randomizer["Ryan"] = 2;
randomizer["Jason"] = 2;

string name1 = randomizer.RandomWithReplacement();
//name1 has a 20% chance of being "Joe", 40% of "Ryan", 40% of "Jason"

string name2 = randomizer.RandomWithRemoval();
//Same as above, except whichever one was chosen has been removed from the list.

If you want more speed you can either consider weighted reservoir sampling where you don't have to find the total weight ahead of time (but you sample more often from the random number generator). The code might look something like

Broker selected = null;
int s = 0;
foreach(Broker broker in brokers) {
    s += broker.Weight;
    if (broker.Weight <= _rnd.Next(0,s)) {
        selected = broker;
    }
}

This requires going once through the list brokers. However if the list of brokers is fixed or doesn't change that often you can keep an array of cumulative sums, i.e. A[i] is the sum of weights of all brokers 0,..,i-1. Then A[n] is the total weight and if you pick a number between 1 and A[n-1], say x you find the broker j s.t. A[j-1] <= x < A[j]. For convenience you let A[0] = 0. You can find this broker number j in log(n) steps using binary search, I'll leave the code as an easy exercise. If your data changes frequently this might not be a good way to go since every time some weight changes you might need to update a large portion of the array.

I've come up with a generic version of this solution:

public static class WeightedEx
{
    /// <summary>
    /// Select an item from the given sequence according to their respective weights.
    /// </summary>
    /// <typeparam name="TItem">Type of item item in the given sequence.</typeparam>
    /// <param name="a_source">Given sequence of weighted items.</param>
    /// <returns>Randomly picked item.</returns>
    public static TItem PickWeighted<TItem>(this IEnumerable<TItem> a_source)
        where TItem : IWeighted
    {
        if (!a_source.Any())
            return default(TItem);

        var source= a_source.OrderBy(i => i.Weight);

        double dTotalWeight = source.Sum(i => i.Weight);

        Random rand = new Random();

        while (true)
        {
            double dRandom = rand.NextDouble() * dTotalWeight;

            foreach (var item in source)
            {
                if (dRandom < item.Weight)
                    return item;

                dRandom -= item.Weight;
            }
        }
    }
}

/// <summary>
/// IWeighted: Implementation of an item that is weighted.
/// </summary>
public interface IWeighted
{
    double Weight { get; }
}

Just to share my own implementation. Hope you'll find it useful.

    // Author: Giovanni Costagliola <giovanni.costagliola@gmail.com>

    using System;
    using System.Collections.Generic;
    using System.Linq;

    namespace Utils
    {
    /// <summary>
    /// Represent a Weighted Item.
    /// </summary>
    public interface IWeighted
    {
        /// <summary>
        /// A positive weight. It's up to the implementer ensure this requirement
        /// </summary>
        int Weight { get; }
    }

    /// <summary>
    /// Pick up an element reflecting its weight.
    /// </summary>
    /// <typeparam name="T"></typeparam>
    public class RandomWeightedPicker<T> where T:IWeighted
    {
        private readonly IEnumerable<T> items;
        private readonly int totalWeight;
        private Random random = new Random();

        /// <summary>
        /// Initiliaze the structure. O(1) or O(n) depending by the options, default O(n).
        /// </summary>
        /// <param name="items">The items</param>
        /// <param name="checkWeights">If <c>true</c> will check that the weights are positive. O(N)</param>
        /// <param name="shallowCopy">If <c>true</c> will copy the original collection structure (not the items). Keep in mind that items lifecycle is impacted.</param>
        public RandomWeightedPicker(IEnumerable<T> items, bool checkWeights = true, bool shallowCopy = true)
        {
            if (items == null) throw new ArgumentNullException("items");
            if (!items.Any()) throw new ArgumentException("items cannot be empty");
            if (shallowCopy)
                this.items = new List<T>(items);
            else
                this.items = items;
            if (checkWeights && this.items.Any(i => i.Weight <= 0))
            {
                throw new ArgumentException("There exists some items with a non positive weight");
            }
            totalWeight = this.items.Sum(i => i.Weight);
        }
        /// <summary>
        /// Pick a random item based on its chance. O(n)
        /// </summary>
        /// <param name="defaultValue">The value returned in case the element has not been found</param>
        /// <returns></returns>
        public T PickAnItem()
        {
            int rnd = random.Next(totalWeight);
            return items.First(i => (rnd -= i.Weight) < 0);
        }

        /// <summary>
        /// Resets the internal random generator. O(1)
        /// </summary>
        /// <param name="seed"></param>
        public void ResetRandomGenerator(int? seed)
        {
            random = seed.HasValue ? new Random(seed.Value) : new Random();
        }
    }
}

Gist: https://gist.github.com/MrBogomips/ae6f6c9af8032392e4b93aaa393df447

The implementation in the original question seems a little odd to me;

The total weight of the list is 60 so the random number is 0-59. It always checks the random number against the weight and then decrements it. It looks to me that it would favour things in the list based on their order.

Here's a generic implementation I'm using - the crux is in the Random property:

using System;
using System.Collections.Generic;
using System.Linq;

public class WeightedList<T>
{
    private readonly Dictionary<T,int> _items = new Dictionary<T,int>();

    // Doesn't allow items with zero weight; to remove an item, set its weight to zero
    public void SetWeight(T item, int weight)
    {
        if (_items.ContainsKey(item))
        {
            if (weight != _items[item])
            {
                if (weight > 0)
                {
                    _items[item] = weight;
                }
                else
                {
                    _items.Remove(item);
                }

                _totalWeight = null; // Will recalculate the total weight later
            }
        }
        else if (weight > 0)
        {
            _items.Add(item, weight);

            _totalWeight = null; // Will recalculate the total weight later
        }
    }

    public int GetWeight(T item)
    {
        return _items.ContainsKey(item) ? _items[item] : 0;
    }

    private int? _totalWeight;
    public int totalWeight
    {
        get
        {
            if (!_totalWeight.HasValue) _totalWeight = _items.Sum(x => x.Value);

            return _totalWeight.Value;
        }
    }

    public T Random
    {
        get
        {
            var temp = 0;
            var random = new Random().Next(totalWeight);

            foreach (var item in _items)
            {
                temp += item.Value;

                if (random < temp) return item.Key;
            }

            throw new Exception($"unable to determine random {typeof(T)} at {random} in {totalWeight}");
        }
    }
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top