Read id3 v2.4 tags with native Chrome Javascript/FileReader/DataView

Question 1

Using the code I found here: http://www.ulduzsoft.com/2012/07/parsing-id3v2-tags-in-the-mp3-files/, I translated it into Javascript here: http://jsfiddle.net/eb7rrbw4/

Here is the code as I wrote it there:

DataView.prototype.getChar=function(start) {
    return String.fromCharCode(this.getUint8(start));
};
DataView.prototype.getString=function(start,length) {
    for(var i=0,v='';i<length;++i) {
        v+=this.getChar(start+i);
    }
    return v;
};
DataView.prototype.getInt=function(start) {
    return (this.getUint8(start) << 21) | (this.getUint8(start+1) << 14) | (this.getUint8(start+2) << 7) | this.getUint8(start+3);
};

function readID3(){
    var a=new DataView(this.result);
    // Parse it quickly
    if ( a.getString(0,3)!="ID3" )
    {
        return false;
    }

    // True if the tag is pre-V3 tag (shorter headers)
    var TagVersion = a.getUint8(3);

    // Check the version
    if ( TagVersion < 0 || TagVersion > 4 )
    {
        return false;
    }

    // Get the ID3 tag size and flags; see 3.1
    var tagsize = a.getInt(6)+10;
        //(a.getUint8(9) & 0xFF) | ((a.getUint8(8) & 0xFF) << 7 ) | ((a.getUint8(7) & 0xFF) << 14 ) | ((a.getUint8(6) & 0xFF) << 21 ) + 10;
    var uses_synch = (a.getUint8(5) & 0x80) != 0 ? true : false;
    var has_extended_hdr = (a.getUint8(5) & 0x40) != 0 ? true : false;

    var headersize=0;         
    // Read the extended header length and skip it
    if ( has_extended_hdr )
    {
        var headersize = a.getInt(10);
            //(a.getUint8(10) << 21) | (a.getUint8(11) << 14) | (a.getUint8(12) << 7) | a.getUint8(13); 
    }

    // Read the whole tag
    var buffer=new DataView(a.buffer.slice(10+headersize,tagsize));

    // Prepare to parse the tag
    var length = buffer.byteLength;

    // Recreate the tag if desynchronization is used inside; we need to replace 0xFF 0x00 with 0xFF
    if ( uses_synch )
    {
        var newpos = 0;
        var newbuffer = new DataView(new ArrayBuffer(tagsize));

        for ( var i = 0; i < tagsize; i++ )
        {
            if ( i < tagsize - 1 && (buffer.getUint8(i) & 0xFF) == 0xFF && buffer.getUint8(i+1) == 0 )
            {
                newbuffer.setUint8(newpos++,0xFF);
                i++;
                continue;
            }

            newbuffer.setUint8(newpos++,buffer.getUint8(i));                 
        }

        length = newpos;
        buffer = newbuffer;
    }

    // Set some params
    var pos = 0;
    var ID3FrameSize = TagVersion < 3 ? 6 : 10;
    var m_title;
    var m_artist;

    // Parse the tags
    while ( true )
    {
        var rembytes = length - pos;

        // Do we have the frame header?
        if ( rembytes < ID3FrameSize )
            break;

        // Is there a frame?
        if ( buffer.getChar(pos) < 'A' || buffer.getChar(pos) > 'Z' )
            break;

        // Frame name is 3 chars in pre-ID3v3 and 4 chars after
        var framename;
        var framesize;

        if ( TagVersion < 3 )
        {
            framename = buffer.getString(pos,3);
            framesize = ((buffer.getUint8(pos+5) & 0xFF) << 8 ) | ((buffer.getUint8(pos+4) & 0xFF) << 16 ) | ((buffer.getUint8(pos+3) & 0xFF) << 24 );
        }
        else
        {
            framename = buffer.getString(pos,4);
            framesize = buffer.getInt(pos+4);
                //(buffer.getUint8(pos+7) & 0xFF) | ((buffer.getUint8(pos+6) & 0xFF) << 8 ) | ((buffer.getUint8(pos+5) & 0xFF) << 16 ) | ((buffer.getUint8(pos+4) & 0xFF) << 24 );
        }

        if ( pos + framesize > length )
            break;

        if ( framename== "TPE1"  || framename== "TPE2"  || framename== "TPE3"  || framename== "TPE" )
        {
            if ( m_artist == null )
                m_artist = parseTextField( buffer, pos + ID3FrameSize, framesize );
        }

        if ( framename== "TIT2" || framename== "TIT" )
        {
            if ( m_title == null )
                m_title = parseTextField( buffer, pos + ID3FrameSize, framesize );
        }

        pos += framesize + ID3FrameSize;
        continue;
    }
    console.log(m_title,m_artist);
    return m_title != null || m_artist != null;
}

function parseTextField( buffer, pos, size )
{
    if ( size < 2 )
        return null;

    var charcode = buffer.getUint8(pos); 

    //TODO string decoding         
    /*if ( charcode == 0 )
        charset = Charset.forName( "ISO-8859-1" );
    else if ( charcode == 3 )
        charset = Charset.forName( "UTF-8" );
    else
        charset = Charset.forName( "UTF-16" );

    return charset.decode( ByteBuffer.wrap( buffer, pos + 1, size - 1) ).toString();*/
    return buffer.getString(pos+1,size-1);
}

You should see the title and author in the console log. Look at the parse text function, though, where the encoding determines the way to read the string. (search for TODO). Also I have not tested it with extended headers or uses_synch true or tag version 3.

Question 2

You can try using id3 parser on github.

Here's your updated fiddle that logs the tags object in the console

With the id3.js included, all you need to do in your code is this:

function readFile(){
   id3(this.files[0], function(err, tags) {
       console.log(tags);
   })
}
document.getElementsByTagName('input')[0].addEventListener('change',readFile,false);

And here is the tags object as created by id3:

{
  "title": "Stairway To Heaven",
  "album": "Stairway To Heaven",
  "artist": "Led Zeppelin",
  "year": "1999",
  "v1": {
    "title": "Stairway To Heaven",
    "artist": "Led Zeppelin",
    "album": "Stairway To Heaven",
    "year": "1999",
    "comment": "Classic Rock",
    "track": 13,
    "version": 1.1,
    "genre": "Other"
  },
  "v2": {
    "version": [3, 0],
    "title": "Stairway To Heaven",
    "album": "Stairway To Heaven",
    "comments": "Classic Rock",
    "publisher": "Virgin Records"
  }
}

Hope this helps!

Question 3

Partially Correct Answer (it properly reads utf8 formatted id3v2.4.0 including cover)

The things i asked in my question probably work now.

I wanted a very crude minimal function set to handle only id3v2.4.0 & and also parse the attached image.

With the help of @Siderite Zackwehdex, which answer is marked as correct, i understood the important part of the code that was missing.

As i had some time to play with it i made various modifications to the code.

First of all sorry for the compressed script but i have a better overview of the overall code. it's easier for me. if you have some questions about the code just ask.

Anyway, i removed the uses_synch ... it's really hard to find a file that uses synch. Same for the has_extended_hdr.I also remove the support for id3v2.0.0 to id3v2.2.0. I added a version check, that one works with all id3v2 subversions.

The main function output contains an array with all the tags, inside you can also find the id3v2 Version.Last, but i guess usefull to expand, i added a custom FRAME object that contains custom functions for FRAMES other than textFrames. The now only function inside converts the image/cover/APIC to a easy to use base64 string. Doing so the array can be stored as a JSON string.

While for some of you the compatibility is important the above mentioned exended header or sync are actually the smallest problem.

PROBLEMS

The encoding needs to be UTF-8 else you get strange text paddings and some images are parsed only partially. basically broken.

I want to avoid the use of external library or even a really big function just for that ... there needs to be some smart simple solution to handle properly the encoding. ISO-8859-1,UTF-8,UTF-16 .. big endian... whatever... #00 vs #00 00 ..

If that is done the support can be improved exponentially.

I hope that some of you have a solution for that.

CODE

DataView.prototype.str=function(a,b,c,d){//start,length,placeholder,placeholder
 b=b||1;c=0;d='';for(;c<b;)d+=String.fromCharCode(this.getUint8(a+c++));return d
}
DataView.prototype.int=function(a){//start
 return (this.getUint8(a)<<21)|(this.getUint8(a+1)<<14)|
 (this.getUint8(a+2)<<7)|this.getUint8(a+3)
}
var frID3={
 'APIC':function(x,y,z,q){
  var b=0,c=['',0,''],d=1,e,b64;
  while(b<3)e=x.getUint8(y+z+d++),c[b]+=String.fromCharCode(e),
  e!=0||(b+=b==0?(c[1]=x.getUint8(y+z+d),2):1);
  b64='data:'+c[0]+';base64,'+
  btoa(String.fromCharCode.apply(null,new Uint8Array(x.buffer.slice(y+z+++d,q))));
  return {mime:c[0],description:c[2],type:c[1],base64:b64}
 }
}
function readID3(a,b,c,d,e,f,g,h){
 if(!(a=new DataView(this.result))||a.str(0,3)!='ID3')return;
 g={Version:'ID3v2.'+a.getUint8(3)+'.'+a.getUint8(4)};
 a=new DataView(a.buffer.slice(10+((a.getUint8(5)&0x40)!=0?a.int(10):0),a.int(6)+10));
 b=a.byteLength;c=0;d=10;
 while(true){
  f=a.str(c);e=a.int(c+4);
  if(b-c<d||(f<'A'||f>'Z')||c+e>b)break;
  g[h=a.str(c,4)]=frID3[h]?frID3[h](a,c,d,e):a.str(c+d,e);
  c+=e+d;
 }
 console.log(g);
}

DEMO

https://jsfiddle.net/2awq6pz7/