سؤال

I am having a problem with a custom struct and overloading linq's except method to remove duplicates.

My struct is as follows:

public struct hashedFile
{
    string _fileString;
    byte[] _fileHash;

    public hashedFile(string fileString, byte[] fileHash)
    {
        this._fileString = fileString;
        this._fileHash = fileHash;
    }

    public string FileString { get { return _fileString; } }
    public byte[] FileHash { get { return _fileHash; } }
}

Now, the following code works fine:

    public static void test2()
    {
        List<hashedFile> list1 = new List<hashedFile>();
        List<hashedFile> list2 = new List<hashedFile>();

        hashedFile one = new hashedFile("test1", BitConverter.GetBytes(1));
        hashedFile two = new hashedFile("test2", BitConverter.GetBytes(2));
        hashedFile three = new hashedFile("test3", BitConverter.GetBytes(3));
        hashedFile threeA = new hashedFile("test3", BitConverter.GetBytes(4));
        hashedFile four = new hashedFile("test4", BitConverter.GetBytes(4));

        list1.Add(one); 
        list1.Add(two);
        list1.Add(threeA);
        list1.Add(four);

        list2.Add(one);
        list2.Add(two);
        list2.Add(three);

        List<hashedFile> diff = list1.Except(list2).ToList();

        foreach (hashedFile h in diff)
        {
            MessageBox.Show(h.FileString + Environment.NewLine + h.FileHash[0].ToString("x2"));
        }

    }

This code shows "threeA" and "four" just fine. But if I do the following.

public static List<hashedFile> list1(var stuff1)
{
//Generate a List here and return it
}

public static List<hashedFile> list2(var stuff2)
{
//Generate a List here and return it
}

List<hashedFile> diff = list1.except(list2);

"diff" becomes an exact copy of "list1". I should also mention that I am sending a byte array from ComputeHash from System.Security.Cryptography.MD5 to the byte fileHash in the list generations.

Any ideas on how to overload either the Except or GetHashCode method for linq to successfully exclude the duplicate values from list2?

I'd really appreciate it! Thanks! ~MrFreeman

EDIT: Here was how I was originally trying to use List<hashedFile> diff = newList.Except(oldList, new hashedFileComparer()).ToList();

class hashedFileComparer : IEqualityComparer<hashedFile>
{

    public bool Equals(hashedFile x, hashedFile y)
    {
        if (Object.ReferenceEquals(x, y)) return true;

        if (Object.ReferenceEquals(x, null) || Object.ReferenceEquals(y, null))
            return false;

        return x.FileString == y.FileString && x.FileHash == y.FileHash;
    }

    public int GetHashCode(hashedFile Hashedfile)
    {
        if (Object.ReferenceEquals(Hashedfile, null)) return 0;

        int hashFileString = Hashedfile.FileString == null ? 0 : Hashedfile.FileString.GetHashCode();
        int hashFileHash = Hashedfile.FileHash.GetHashCode();
        int returnVal = hashFileString ^ hashFileHash;
        if (Hashedfile.FileString.Contains("blankmusic") == true)
        {
            Console.WriteLine(returnVal.ToString());
        }

        return returnVal;
    }

}
هل كانت مفيدة؟

المحلول

If you want the type to handle its own comparisons in Except the interface you need is IEquatable. The IEqualityComparer interface is to have another type handle the comparisons so it can be passed into Except as an overload.

This achieves what you want (assuming you wanted both file string and hash compared).

public struct hashedFile : IEquatable<hashedFile>
{
    string _fileString;
    byte[] _fileHash;

    public hashedFile(string fileString, byte[] fileHash)
    {
        this._fileString = fileString;
        this._fileHash = fileHash;
    }

    public string FileString { get { return _fileString; } }
    public byte[] FileHash { get { return _fileHash; } }

    public bool Equals(hashedFile other)
    {
        return _fileString == other._fileString && _fileHash.SequenceEqual(other._fileHash);
    }
}

Here is an example in a working console application.

public class Program
{
    public struct hashedFile : IEquatable<hashedFile>
    {
        string _fileString;
        byte[] _fileHash;

        public hashedFile(string fileString, byte[] fileHash)
        {
            this._fileString = fileString;
            this._fileHash = fileHash;
        }

        public string FileString { get { return _fileString; } }
        public byte[] FileHash { get { return _fileHash; } }

        public bool Equals(hashedFile other)
        {
            return _fileString == other._fileString && _fileHash.SequenceEqual(other._fileHash);
        }
    }

    public static void Main(string[] args)
    {
        List<hashedFile> list1 = GetList1();
        List<hashedFile> list2 = GetList2();
        List<hashedFile> diff = list1.Except(list2).ToList();

        foreach (hashedFile h in diff)
        {
            Console.WriteLine(h.FileString + Environment.NewLine + h.FileHash[0].ToString("x2"));
        }

        Console.ReadLine();
    }

    private static List<hashedFile> GetList1()
    {
        hashedFile one = new hashedFile("test1", BitConverter.GetBytes(1));
        hashedFile two = new hashedFile("test2", BitConverter.GetBytes(2));
        hashedFile threeA = new hashedFile("test3", BitConverter.GetBytes(4));
        hashedFile four = new hashedFile("test4", BitConverter.GetBytes(4));

        var list1 = new List<hashedFile>();
        list1.Add(one);
        list1.Add(two);
        list1.Add(threeA);
        list1.Add(four);
        return list1;
    }

    private static List<hashedFile> GetList2()
    {
        hashedFile one = new hashedFile("test1", BitConverter.GetBytes(1));
        hashedFile two = new hashedFile("test2", BitConverter.GetBytes(2));
        hashedFile three = new hashedFile("test3", BitConverter.GetBytes(3));

        var list1 = new List<hashedFile>();
        list1.Add(one);
        list1.Add(two);
        list1.Add(three);
        return list1;
    }
}

This is becoming quite large but I will continue there is an issue with above implementation if hashedFile is a class not a struct (and sometimes when a stuct maybe version depdendant). Except uses an internal Set class the relevant part of that which is problematic is that it compares the hash codes and only if they are equal does it then use the comparer to check equality.

int hashCode = this.InternalGetHashCode(value);
for (int i = this.buckets[hashCode % this.buckets.Length] - 1; i >= 0; i = this.slots[i].next)
{
    if ((this.slots[i].hashCode == hashCode) && this.comparer.Equals(this.slots[i].value, value))
    {
        return true;
    }
}

The fix for this depending on performance requirements is you can just return a 0 hash code. This means the comparer will always be used.

public override int GetHashCode()
{
    return 0;
}

The other option is to generate a proper hash code this matters sooner than I expected the difference for 500 items is 7ms vs 1ms and for 5000 items is 650ms vs 13ms. So probably best to go with a proper hash code. byte array hash code function taken from https://stackoverflow.com/a/7244316/1002621

public override int GetHashCode()
{
    var hashCode = 0;
    var bytes = _fileHash.Union(Encoding.UTF8.GetBytes(_fileString)).ToArray();
    for (var i = 0; i < bytes.Length; i++)
        hashCode = (hashCode << 3) | (hashCode >> (29)) ^ bytes[i]; // Rotate by 3 bits and XOR the new value.
    return hashCode;
}
مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top