Question

I'm new to scripting, but I have a lot of experience programming in languages such as C# and Java.

I have a file that contains binary data. I want to write a Bash script that reads the year, month, and day contained in that file so I can sort the associated MOD files into folders according to the date they were recorded. I'm having trouble finding a way to read binary data and parsing it in a bash script. Is there any way to do this?

Was it helpful?

Solution

You can use od (plus head and awk for a little post-processing) for this. To get the year:

year=$(od -t x2 --skip-bytes=6 --read-bytes=2 file.moi | head -1 | awk '{print $2}')

For the month:

month=$(od -t x1 --skip-bytes=8 --read-bytes=1 file.moi | head -1 | awk '{print $2}')

And the day:

day=$(od -t x1 --skip-bytes=9 --read-bytes=1 file.moi | head -1 | awk '{print $2}')

OTHER TIPS

I would recommend using python for this.

However, if you insist on bash, i would try using either sed in binary mode (never tried it) or using dd for extracting specific bytes and then convert them.

If this is not too hardcore for you I suggest compiling the following C-language program:

#include <stdio.h>
#include <inttypes.h>

typedef union {
  char array[sizeof(int32_t)];
  int32_t val;
} int32_u;

typedef union {
  char array[sizeof(uint32_t)];
  uint32_t val;
} uint32_u;

typedef union {
  char array[sizeof(uint64_t)];
  uint64_t val;
} uint64_u;

typedef union {
  char array[sizeof(int64_t)];
  int64_t val;
} int64_u;

int swap(char* mem, int size) {
  if (size & 1 != 0)
    return -1;
  int i;
  for (i = 0; i < size / 2; i++) {
    char tmp = mem[i];
    mem[i] = mem[size - i - 1];
    mem[size - i - 1] = tmp;
  }
  return 0;
}

int sys_big_endian() {
    int x = 1;
    return !(*(char*)&x);
}

int main(int argc, char** argv) {
  char* file_name = NULL;
  int offset = 0;
  char* type = "int32";
  int big_endian = 0;

  int i;
  for(i = 1; i < argc; i++) {
    if(!strncmp("-o", argv[i], 2)) {
      ++i;
      sscanf(argv[i], "%d", &offset);
    } else if(!strncmp("-t", argv[i], 2)) {
      ++i;
      type = argv[i];
    } else if(!strncmp("-e", argv[i], 2)) {
      ++i;
      big_endian = !strncmp("big", argv[i], 3);
    } else {
      file_name = argv[i];
      break;
    }
  }

  if (i < argc - 1) {
    fprintf(stderr, "Ignoring extra arguments: ");
    ++i;
    for (; i < argc; i++) {
      fprintf(stderr, "%s ", argv[i]);
    }
    fprintf(stderr, "\n");
  }

  if (file_name == NULL) {
    fprintf(stderr, "Syntax: readint [-o offset] [-t type] [-e endian] <filename>\n"
      "Where:\n"
      "  type      'uint32', 'uint64', 'int32' (default), 'int64'.\n"
      "  endian    'big' or 'little' (default).\n"
      "  offset    offset in a file from where the read will happen, default is 0.\n"
    );
    return -1;
  }

  FILE* fp = fopen(file_name, "rb");

  if (fp == NULL) {
    fprintf(stderr, "Could not open the file: %s\n", file_name);
    return -1;
  }

  fseek(fp, offset, SEEK_SET);

  if (!strncmp("uint32", type, 6)) {
    uint32_u u;
    fread(u.array, sizeof(u.array), 1, fp);
    if (big_endian ^ sys_big_endian())
      swap(u.array, sizeof(u.array));
    printf("%u\n", u.val);
  } else if (!strncmp("int32", type, 5)) {
    int32_u u;
    fread(u.array, sizeof(u.array), 1, fp);
    if (big_endian ^ sys_big_endian())
      swap(u.array, sizeof(u.array));
    printf("%d\n", u.val);
  } else if (!strncmp("uint64", type, 6)) {
    uint64_u u;
    fread(u.array, sizeof(u.array), 1, fp);
    if (big_endian ^ sys_big_endian())
      swap(u.array, sizeof(u.array));
    printf("%"PRIu64"\n", u.val);
  } else if (!strncmp("int64", type, 5)) {
    int64_u u;
    fread(u.array, sizeof(u.array), 1, fp);
    if (big_endian ^ sys_big_endian())
      swap(u.array, sizeof(u.array));
    printf("%"PRId64"\n", u.val);
  } else {
    printf("Unknown type: %s\n", type);
  }

  fclose(fp); 
  return 0;
}

Then do this:

gcc -o readint readint.c
sudo mv readint /usr/local/bin

Now you have a handy tool called 'readint' with the following syntax:

readint [-o offset] [-t int32|uint32|int64|uint64 ] [-e little|big ] <filename>

you can search the net for modules to interpret MOI files (either Perl or Python). Otherwise, i don't really think you can get the date just like that from the binary file because if you look inside, its really "garbage" since its binary. Although you may also give the strings command a try to see if there are legible strings that match the date

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top