Question

I have to MD5 hash files/folders on both a client(C#) and a server(PHP) file structure. (Server land is PHP and client land is c#.) The problem is while they work they do not match. Any ideas would be greatly appreciated

Here are my two algorithms

C#

using System;
using System.IO;
using System.Security.Cryptography;
using System.Text;

namespace nofolder
{
    public class classHasher
    {
        /**********
         *  recursive folder MD5 hash of a dir
         */
        MD5 hashAlgo = null;
        StringBuilder sb;
        public classHasher()
        {
            hashAlgo = new MD5CryptoServiceProvider();
        }
        public string getHash(String path)
        {
            // get the file attributes for file or directory
            if (File.Exists(path)) return getHashOverFile(path);
            if (Directory.Exists(path)) return getHashOverFolder(path);
            return "";
        }
        public string getHashOverFolder(String path)
        {
            sb = new StringBuilder();
            getFolderContents(path);
            return sb.ToString().GetHashCode().ToString();
        }
        public string getHashOverFile(String filename)
        {
            sb = new StringBuilder();
            getFileHash(filename);
            return sb.ToString().GetHashCode().ToString();
        }
        private void getFolderContents(string fold)
        {
            foreach (var d in Directory.GetDirectories(fold))
            {
                getFolderContents(d);
            }
            foreach (var f in Directory.GetFiles(fold))
            {
                getFileHash(f);
            }
        }
        private void getFileHash(String f)
        {
            using (FileStream file = new FileStream(f, FileMode.Open, FileAccess.Read))
            {
                byte[] retVal = hashAlgo.ComputeHash(file);
                file.Close();
                foreach (var y in retVal)
                {
                    sb.Append(y.ToString());
                }
            }
        }
    }
}

PHP

function include__md5_dir($dir){
    /**********
    *   recursive folder MD5 hash of a dir
    */
    if (!is_dir($dir)){
        return  md5_file($dir);
    }

    $filemd5s = array();
    $d = dir($dir);

    while (false !== ($entry = $d->read())){
        if ($entry != '.' && $entry != '..'){
             if (is_dir($dir.'/'.$entry)){
                 $filemd5s[] = include__md5_dir($dir.'/'.$entry);
             }
             else{
                 $filemd5s[] = md5_file($dir.'/'.$entry);
             }
         }
    }
    $d->close();
    return md5(implode('', $filemd5s));
}

EDIT.

I have decided the c# must change. the PHP is fine as it is. The first code that works 100% gets the bounty

Was it helpful?

Solution 2

I eventually fixed this myself and I include the answer for future posterity - the key to this solution was irradicating the different default dir ORDERING that linux and windows use. This was only tested on the linux server (Cent OS6.3) and Windows 7 Client.

C#

public class classHasher
    {
        /**********
        *   recursive folder MD5 hash of a dir
        */
        MD5 hashAlgo = null;
        StringBuilder sb;
        public classHasher()
        {
            hashAlgo = new MD5CryptoServiceProvider();
        }

        public string UltraHasher(String path)
        { 
            /**********
            *   recursive folder MD5 hash of a dir
            */
            if (!Directory.Exists(path))
            {
                return  getHashOverFile(path);
            }

            List<string> filemd5s = new List<string>();
            List<string> dir = new List<string>();

            if (Directory.GetDirectories(path) != null) foreach (var d in Directory.GetDirectories(path))
            {
                dir.Add(d);

            }
            if (Directory.GetFiles(path) != null) foreach (var f in Directory.GetFiles(path))
            {
                dir.Add(f);                
            }

            dir.Sort();

            foreach (string entry in dir)
            {
                if (Directory.Exists(entry))
                {
                    string rtn = UltraHasher(entry.ToString());
                    //Debug.WriteLine("   ULTRRAAHASHER:! " + entry.ToString() + ":" + rtn);
                    filemd5s.Add(rtn); 
                } 
                if (File.Exists(entry))
                {
                    string rtn = getHashOverFile(entry.ToString());
                    //Debug.WriteLine("   FILEEEEHASHER:! " + entry.ToString() + ":" + rtn);
                    filemd5s.Add(rtn);
                }
            }

            //Debug.WriteLine("   ULTRRAASUMMMM:! " + String.Join("", filemd5s.ToArray()));
            string tosend = CalculateMD5Hash(String.Join("", filemd5s.ToArray()));
            //Debug.WriteLine("   YEAHHAHHAHHAH:! " + tosend);
            return tosend;
        }

        public string getHashOverFile(String filename)
        {
            sb = new StringBuilder();
            getFileHash(filename);
            return sb.ToString();
        }
        private void getFileHash(String f)
        {
            using (FileStream file = new FileStream(f, FileMode.Open, FileAccess.Read))
            {
                byte[] retVal = hashAlgo.ComputeHash(file);
                file.Close();
                foreach (var y in retVal)
                {
                    sb.Append(y.ToString("x2"));
                }
            }
        }
        public string CalculateMD5Hash(string input)
        {
            byte[] inputBytes = System.Text.Encoding.ASCII.GetBytes(input);
            byte[] hash = hashAlgo.ComputeHash(inputBytes);

            StringBuilder sz = new StringBuilder();
            for (int i = 0; i < hash.Length; i++)
            {
                sz.Append(hash[i].ToString("x2"));
            }
            return sz.ToString();
        }
 }

PHP

function md5_dir($dir){
        /**********
        *   recursive folder MD5 hash of a dir
        */
        if (!is_dir($dir)){
            return  md5_file($dir);
        }

        $filemd5s = array();
        $bit = array();
        $d = scandir($dir);

        foreach($d as $entry){
            if ($entry != '.' && $entry != '..'){
                 $bit[] = $entry;
            }
        }

        asort($bit);

        foreach($bit as $entry){
            if (is_dir($dir.'/'.$entry)){
                $sz = md5_dir($dir.'/'.$entry);
                //echo "\n   ULTRRAAHASHER:! ".$dir.'/'.$entry.":$sz";
                $filemd5s[] = $sz;
             }
             else{
                $sz = md5_file($dir.'/'.$entry);
                $filemd5s[] = $sz;
                //echo "\n   FILEEEEHASHER:! ".$dir.'/'.$entry.":$sz";
             }
         }
        //echo "\n   ULTRRAASUMMMM:! ".implode('', $filemd5s)."";
        //echo "\n   YEAHHAHHAHHAH:! ".md5(implode('', $filemd5s))."";
        return md5(implode('', $filemd5s));
    }

these two will traverse either a C# Windows and or a PHP linux folder and return the SAME hashes for all dirs (recursive, so it includes sub dirs) inside Linuxland and all inside Windowsland.

OTHER TIPS

Your PHP code is assembling hexadecimal numbers (as per the md5_file() documentation)

Your C# code is assembling non-0-padded decimal numbers.
You need to y.ToString("x2") to format as hexadecimal.

Also, return sb.ToString().GetHashCode().ToString(); is extremely wrong. Don't call GetHashCode(); it's not what you want.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top