Question

I'm performance testing variations on Linq extension methods, and I came across an odd situation.

When execution returns to the test, calling Count() first will return 1, and subsequent Any() is false.

When calling Any() first, it is true and subsequent Count() is 0.

A breakpoint inspection before either method is called shows there is 1 item as expected, but the enumerable is empty after it's enumerated this way or by calling either Any() or Count().

Can someone explain this behavior? Is there a bug in my implementation because of some caveat of deferred execution?

public class Thing
{
    public Guid Id { get; set; }
}

[TestClass]
public class IEnumerableExtensionsTests
{
    Guid[] thingKeys = new Guid[1] { Guid.Parse("11A1AA1A-1A11-A111-AA11-111111AA1A11") };
    System.Collections.ObjectModel.Collection<Thing> things= new System.Collections.ObjectModel.Collection<Thing>();
    int additionalThingCount = 100;

    [TestMethod]
    public void TestIntersect1()
    {
        DateTime start = DateTime.Now;
        var exceptionsList = things.Intersect1(thingKeys, (e) => e.Id);
        //int count1 = exceptionsList.Count();
        //Assert.AreEqual<int>(thingKeys.Length, count1);
        bool any1 = exceptionsList.Any();
        int count2 = exceptionsList.Count();
        bool any2 = exceptionsList.Any();
        string key = thingKeys[0].ToString();
        var first = exceptionsList.FirstOrDefault();
        var result = exceptionsList.FirstOrDefault(e => e.Id.ToString() == key);
        var duration = DateTime.Now - start;
        Debug.WriteLine(string.Format("TestIntersect1 duration {0}", duration));
        Assert.IsNotNull(result);
    }

    [TestInitialize]
    public void TestInit()
    {
        foreach(var key in thingKeys)
        {
            things.Add(new Thing()
            {
                Id = key
            });
        };
        for (int i1 = 0; i1 < additionalThingCount; i1++)
        {
            things.Add(new Thing()
            {
                Id = Guid.NewGuid()
            });
        }
    }
}

public static class IEnumerableExtension
{
    public static IEnumerable<T> Intersect1<T, Y>(this IEnumerable<T> items, IEnumerable<Y> keys, Func<T, Y> firstMemberSelector)
    {
        var hashset = new HashSet<Y>(keys);
        var returnValue = items.Where(t => hashset.Remove(firstMemberSelector(t)));
        return returnValue;
    }
}
Was it helpful?

Solution

Each time you iterate over the result, you're going to call that Where filter... which removes the items from hashset as it goes.

So after it's iterated once, hashset will no longer have any of those items, so there'll be nothing left to return.

Basically, what you're observing is that a Where clause with a side-effect is a bad idea.

You might want to use a Join to perform the intersection instead:

return items.Join(keys, firstMemberSelector, key => key, (value, key) => value);

It's not quite the same, in that it won't be a set operation... but you could potentially fix that.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top