Do not use IEnumerable<T>
with anything touching a database!
A solution aimed at LinqToSql and LinqToEntities should not be using IEnumerable<T>
. Your current self answer will result in every single person being selected from the database and then being queried in memory using LinqToObjects.
To make a solution that is translated to SQL and executed by the database you have to use IQueryable<T>
and Expressions instead.
public static class QueryableExtensions
{
public static IQueryable<T> TopWithTies<T, TComparand>(this IQueryable<T> source, Expression<Func<T, TComparand>> topBy, int topCount)
{
if (source == null) throw new ArgumentNullException("source");
if (topBy == null) throw new ArgumentNullException("topBy");
if (topCount < 1) throw new ArgumentOutOfRangeException("topCount", string.Format("topCount must be greater than 0, was {0}", topCount));
var topValues = source.OrderBy(topBy)
.Select(topBy)
.Take(topCount);
var queryableMaxMethod = typeof(Queryable).GetMethods()
.Single(mi => mi.Name == "Max" &&
mi.GetParameters().Length == 1 &&
mi.IsGenericMethod)
.MakeGenericMethod(typeof(TComparand));
var lessThanOrEqualToMaxTopValue = Expression.Lambda<Func<T, bool>>(
Expression.LessThanOrEqual(
topBy.Body,
Expression.Call(
queryableMaxMethod,
topValues.Expression)),
new[] { topBy.Parameters.Single() });
var topNRowsWithTies = source.Where(lessThanOrEqualToMaxTopValue)
.OrderBy(topBy);
return topNRowsWithTies;
}
public static IQueryable<T> TopWithTiesDescending<T, TComparand>(this IQueryable<T> source, Expression<Func<T, TComparand>> topBy, int topCount)
{
if (source == null) throw new ArgumentNullException("source");
if (topBy == null) throw new ArgumentNullException("topBy");
if (topCount < 1) throw new ArgumentOutOfRangeException("topCount", string.Format("topCount must be greater than 0, was {0}", topCount));
var topValues = source.OrderByDescending(topBy)
.Select(topBy)
.Take(topCount);
var queryableMinMethod = typeof(Queryable).GetMethods()
.Single(mi => mi.Name == "Min" &&
mi.GetParameters().Length == 1 &&
mi.IsGenericMethod)
.MakeGenericMethod(typeof(TComparand));
var greaterThanOrEqualToMinTopValue = Expression.Lambda<Func<T, bool>>(
Expression.GreaterThanOrEqual(
topBy.Body,
Expression.Call(queryableMinMethod,
topValues.Expression)),
new[] { topBy.Parameters.Single() });
var topNRowsWithTies = source.Where(greaterThanOrEqualToMinTopValue)
.OrderByDescending(topBy);
return topNRowsWithTies;
}
}
This creates queries of the following form:
SELECT [t0].[Id], [t0].[Name], [t0].[Score]
FROM [Person] AS [t0]
WHERE [t0].[Score] >= ((
SELECT MIN([t2].[Score])
FROM (
SELECT TOP (3) [t1].[Score]
FROM [Person] AS [t1]
ORDER BY [t1].[Score] DESC
) AS [t2]
))
ORDER BY [t0].[Score] DESC
That query is only about 50% worse than the baseline query:
SELECT TOP (3) WITH TIES
[t0].[Id],
[t0].[Name],
[t0].[Score]
FROM
[Person] AS [t0]
ORDER BY [t0].[Score] desc
With a data set consisting of your original 5 records and an additional 10000 records all with scores less than the original both of these are more or less instant (less than 20 milliseconds).
The IEnumerable<T>
approach took a whole 2 minutes!
If the expression building and reflection seems scary the same thing can be achieved with a join:
public static IQueryable<T> TopWithTiesDescendingJoin<T, TComparand>(this IQueryable<T> source, Expression<Func<T, TComparand>> topBy, int topCount)
{
if (source == null) throw new ArgumentNullException("source");
if (topBy == null) throw new ArgumentNullException("topBy");
if (topCount < 1) throw new ArgumentOutOfRangeException("topCount", string.Format("topCount must be greater than 0, was {0}", topCount));
var orderedByValue = source.OrderByDescending(topBy);
var topNValues = orderedByValue.Select(topBy).Take(topCount).Distinct();
var topNRowsWithTies = topNValues.Join(source, value => value, topBy, (x, row) => row);
return topNRowsWithTies.OrderByDescending(topBy);
}
With the following query as the result (with about the same performance):
SELECT [t3].[Id], [t3].[Name], [t3].[Score]
FROM (
SELECT DISTINCT [t1].[Score]
FROM (
SELECT TOP (3) [t0].[Score]
FROM [Person] AS [t0]
ORDER BY [t0].[Score] DESC
) AS [t1]
) AS [t2]
INNER JOIN [Person] AS [t3] ON [t2].[Score] = [t3].[Score]
ORDER BY [t3].[Score] DESC