Question

Is there a way to calculate the MD5 hash of a file before the upload to the server using Javascript?

Was it helpful?

Solution

While there are JS implementations of the MD5 algorithm, older browsers are generally unable to read files from the local filesystem.

I wrote that in 2009. So what about new browsers?

With a browser that supports the FileAPI, you *can * read the contents of a file - the user has to have selected it, either with an <input> element or drag-and-drop. As of Jan 2013, here's how the major browsers stack up:

OTHER TIPS

I've made a library that implements incremental md5 in order to hash large files efficiently. Basically you read a file in chunks (to keep memory low) and hash it incrementally. You got basic usage and examples in the readme.

Be aware that you need HTML5 FileAPI, so be sure to check for it. There is a full example in the test folder.

https://github.com/satazor/SparkMD5

it is pretty easy to calculate the MD5 hash using the MD5 function of CryptoJS and the HTML5 FileReader API. The following code snippet shows how you can read the binary data and calculate the MD5 hash from an image that has been dragged into your Browser:

var holder = document.getElementById('holder');

holder.ondragover = function() {
  return false;
};

holder.ondragend = function() {
  return false;
};

holder.ondrop = function(event) {
  event.preventDefault();

  var file = event.dataTransfer.files[0];
  var reader = new FileReader();

  reader.onload = function(event) {
    var binary = event.target.result;
    var md5 = CryptoJS.MD5(binary).toString();
    console.log(md5);
  };

  reader.readAsBinaryString(file);
};

I recommend to add some CSS to see the Drag & Drop area:

#holder {
  border: 10px dashed #ccc;
  width: 300px;
  height: 300px;
}

#holder.hover {
  border: 10px dashed #333;
}

More about the Drag & Drop functionality can be found here: File API & FileReader

I tested the sample in Google Chrome Version 32.

You need to to use FileAPI. It is available in the latest FF & Chrome, but not IE9. Grab any md5 JS implementation suggested above. I've tried this and abandoned it because JS was too slow (minutes on large image files). Might revisit it if someone rewrites MD5 using typed arrays.

Code would look something like this:

HTML:     
<input type="file" id="file-dialog" multiple="true" accept="image/*">

JS (w JQuery)

$("#file-dialog").change(function() {
  handleFiles(this.files);
});

function handleFiles(files) {
    for (var i=0; i<files.length; i++) {
        var reader = new FileReader();
        reader.onload = function() {
        var md5 = binl_md5(reader.result, reader.result.length);
            console.log("MD5 is " + md5);
        };
        reader.onerror = function() {
            console.error("Could not read the file");
        };
        reader.readAsBinaryString(files.item(i));
     }
 }

HTML5 + spark-md5 and Q

Assuming your'e using a modern browser (that supports HTML5 File API), here's how you calculate the MD5 Hash of a large file (it will calculate the hash on variable chunks)

function calculateMD5Hash(file, bufferSize) {
  var def = Q.defer();

  var fileReader = new FileReader();
  var fileSlicer = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice;
  var hashAlgorithm = new SparkMD5();
  var totalParts = Math.ceil(file.size / bufferSize);
  var currentPart = 0;
  var startTime = new Date().getTime();

  fileReader.onload = function(e) {
    currentPart += 1;

    def.notify({
      currentPart: currentPart,
      totalParts: totalParts
    });

    var buffer = e.target.result;
    hashAlgorithm.appendBinary(buffer);

    if (currentPart < totalParts) {
      processNextPart();
      return;
    }

    def.resolve({
      hashResult: hashAlgorithm.end(),
      duration: new Date().getTime() - startTime
    });
  };

  fileReader.onerror = function(e) {
    def.reject(e);
  };

  function processNextPart() {
    var start = currentPart * bufferSize;
    var end = Math.min(start + bufferSize, file.size);
    fileReader.readAsBinaryString(fileSlicer.call(file, start, end));
  }

  processNextPart();
  return def.promise;
}

function calculate() {

  var input = document.getElementById('file');
  if (!input.files.length) {
    return;
  }

  var file = input.files[0];
  var bufferSize = Math.pow(1024, 2) * 10; // 10MB

  calculateMD5Hash(file, bufferSize).then(
    function(result) {
      // Success
      console.log(result);
    },
    function(err) {
      // There was an error,
    },
    function(progress) {
      // We get notified of the progress as it is executed
      console.log(progress.currentPart, 'of', progress.totalParts, 'Total bytes:', progress.currentPart * bufferSize, 'of', progress.totalParts * bufferSize);
    });
}
<script src="https://cdnjs.cloudflare.com/ajax/libs/q.js/1.4.1/q.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/spark-md5/2.0.2/spark-md5.min.js"></script>

<div>
  <input type="file" id="file"/>
  <input type="button" onclick="calculate();" value="Calculate" class="btn primary" />
</div>

Apart from the impossibility to get file system access in JS, I would not put any trust at all in a client-generated checksum. So generating the checksum on the server is mandatory in any case. – Tomalak Apr 20 '09 at 14:05

Which is useless in most cases. You want the MD5 computed at client side, so that you can compare it with the code recomputed at server side and conclude the upload went wrong if they differ. I have needed to do that in applications working with large files of scientific data, where receiving uncorrupted files were key. My cases was simple, cause users had the MD5 already computed from their data analysis tools, so I just needed to ask it to them with a text field.

To get the hash of files, there are a lot of options. Normally the problem is that it's really slow to get the hash of big files.

I created a little library that get the hash of files, with the 64kb of the start of the file and the 64kb of the end of it.

Live example: http://marcu87.github.com/hashme/ and library: https://github.com/marcu87/hashme

There is a couple scripts out there on the internet to create an MD5 Hash.

The one from webtoolkit is good, http://www.webtoolkit.info/javascript-md5.html

Although, I don't believe it will have access to the local filesystem as that access is limited.

With current HTML5 it should be possible to calculate the md5 hash of a binary file, But I think the step before that would be to convert the banary data BlobBuilder to a String, I am trying to do this step: but have not been successful.

Here is the code I tried: Converting a BlobBuilder to string, in HTML5 Javascript

hope you have found a good solution by now. If not, the solution below is an ES6 promise implementation based on js-spark-md5

import SparkMD5 from 'spark-md5';

// Read in chunks of 2MB
const CHUCK_SIZE = 2097152;

/**
 * Incrementally calculate checksum of a given file based on MD5 algorithm
 */
export const checksum = (file) =>
  new Promise((resolve, reject) => {
    let currentChunk = 0;
    const chunks = Math.ceil(file.size / CHUCK_SIZE);
    const blobSlice =
      File.prototype.slice ||
      File.prototype.mozSlice ||
      File.prototype.webkitSlice;
    const spark = new SparkMD5.ArrayBuffer();
    const fileReader = new FileReader();

    const loadNext = () => {
      const start = currentChunk * CHUCK_SIZE;
      const end =
        start + CHUCK_SIZE >= file.size ? file.size : start + CHUCK_SIZE;

      // Selectively read the file and only store part of it in memory.
      // This allows client-side applications to process huge files without the need for huge memory
      fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
    };

    fileReader.onload = e => {
      spark.append(e.target.result);
      currentChunk++;

      if (currentChunk < chunks) loadNext();
      else resolve(spark.end());
    };

    fileReader.onerror = () => {
      return reject('Calculating file checksum failed');
    };

    loadNext();
  });

I don't believe there is a way in javascript to access the contents of a file upload. So you therefore cannot look at the file contents to generate an MD5 sum.

You can however send the file to the server, which can then send an MD5 sum back or send the file contents back .. but that's a lot of work and probably not worthwhile for your purposes.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top