Frage

I am coding up a C program that extracts from a standard UNIX archive ar and creates the files it stores.

Here is an example of what an ar looks like if I open it in vim:

!<arch>
yo              1382105439  501   20    100644  10        `
test1 lol
yo2             1382105444  501   20    100644  10        `
test2 lol

...where "test1 lol" and "test2 lol" are the contents of each file, "yo" and "yo2" are two different file names, and the rest is metadata stored in a format corresponding to the standard ar.h (read more on it here: http://www.lehman.cuny.edu/cgi-bin/man-cgi?ar.h+3)

Anyway, I am still in the process of writing out the function but here is what I have so far:

static void extract_files (int argc, char *argv[])
{

  int fd;
  int new_file_fd;
  int num_read = 0;
  int new_file_size;

  struct ar_hdr current_header;

  char name_buffer[16];
  char date_buffer[12];
  char uid_buffer[6];
  char gid_buffer[6];
  char mode_buffer[8];
  char size_buffer[10];
  char fmag_buffer[2];

  // grab the fd #
  fd = open(argv[2], O_RDWR | O_CREAT, 0666);

  // go to the first header
  lseek(fd, SARMAG, SEEK_CUR);

  // store the number of bits read in a struct current_header
  // until its size equal to the size of the entire
  // header, or in other words, until the entire
  // header is read
  while ((num_read = read(fd, (char*) &current_header, 
    sizeof(struct ar_hdr))) == sizeof(struct ar_hdr))
  {

    // scans the current string in header and stores
    // in nameStr array
    sscanf(current_header.ar_name, "%s", name_buffer);
    sscanf(current_header.ar_date, "%s", date_buffer);
    sscanf(current_header.ar_uid, "%s", uid_buffer);
    sscanf(current_header.ar_gid, "%s", gid_buffer);

    int mode;
    sscanf(current_header.ar_mode, "%o", &mode);
    sscanf(current_header.ar_size, "%s", size_buffer);
    int size = atoi(size_buffer);
    sscanf(current_header.ar_fmag, "%s", fmag_buffer);

    // Create a new file
    new_file_fd = creat(name_buffer, mode);
    // Grab new file size
    new_file_size = atoi(size_buffer);

    int io_size; // buffer size
    char buff[size];
    int read_cntr = 0;

    // from copy.c
    while ((io_size = read (fd, buff, new_file_size)) > 0)
    {
      read_cntr++;
      if (read_cntr > new_file_size)
        break;
      write (new_file_fd, buff, new_file_size);
    }

    close(new_file_fd);
    printf("%s\n", name_buffer);
    printf("%s\n", date_buffer);
    printf("%s\n", uid_buffer);
    printf("%s\n", gid_buffer);
    printf("%s\n", mode_buffer);
    printf("%s\n", size_buffer);
    printf("%s\n", fmag_buffer);

    /* Seek to next header. */
    lseek(fd, atoi(current_header.ar_size) + (atoi(current_header.ar_size)%2), SEEK_CUR);
  }

}

The issue I am having lies in the second while loop in the above code:

    // from copy.c
while ((io_size = read (fd, buff, new_file_size)) > 0)
{
  read_cntr++;
  if (read_cntr > new_file_size)
    break;
  write (new_file_fd, buff, new_file_size);
}

For some reason, the files written in this while loop don't run to the length specified by write. The third argument for the standard read()/write() should be the number of bytes to write. For some reason though, my code results in the entire archive being read in and written into the first file.

If I open up the resulting "yo" file, I find the entire archive file has been written to it

test1 lol
yo2             1382105444  501   20    100644  10        `
test2 lol

instead of terminating after reading 10 bytes and giving the expected outcome "test1 lol".

I can also confirm that the "new_file_size" value is indeed 10. So my question is: what am I reading wrong about this while loop?

Note: Expected input would be a command line argument that looks something like: ./extractor.c -x name_of_archive_file

The only relevant information I think I need to deal with in this function is the name of the archive file which I get the fd for at the beginning of extract_files.

Added: Misc -- the output from when this is run:

yo
1382105439
501
20
X
10
`

As you can see, it never sees the yo2 file or prints out its header because it gets written to "yo" before that can happen...because of this stray while loop :(

War es hilfreich?

Lösung

You read a value, size_buffer, and assign it to size and new_file_size, you also create a buffer[size] of that same size,

int size = atoi(size_buffer);
sscanf(current_header.ar_fmag, "%s", fmag_buffer);
//...
new_file_size = atoi(size_buffer);
//...
char buff[size];

Read returns a ssize_t count of bytes in range [0..new_file_size], which you set into io_size, realize that read(2) may return < new_file_size bytes, which is why you need the while loop. So you need to write everything you have read, until you reach your write limit. I have made some comments to guide you.

// from copy.c
while ((io_size = read (fd, buff, new_file_size)) > 0)
{
    read_cntr++;
    //perhaps you mean read_cntr += io_size;
    //you probably mean to write io_size bytes here, regardless
    //write(new_file_fd, buff, io_size);
    if (read_cntr > new_file_size) //probably you want >= here
        break;
    //you may have broke before you write...
    write (new_file_fd, buff, new_file_size);
}

A more typical idiom for this copy would be something where you pick a read/write buffer size, say 4*1024 (4K), 16*1024 (16K), etc, and read that blocksize, until you have less than that blocksize remaining; for example,

//decide how big to make buffer for read()
#define BUFSIZE (16*1024) //16K
//you need min(
#define min(x,y) ( ((x)<(y)) ? (x) : (y) )
ssize_t fdreader(int fd, int ofd, ssize_t new_file_size )
{
    ssize_t remaining = new_file_size;
    ssize_t readtotal = 0;
    ssize_t readcount;
    unsigned char buffer[BUFSIZE];
    for(  ; readcount=read(fd,buffer,min(sizeof(buffer),remaining));  )
    {
        readtotal += readcount;
        if( readcount > remaining ) //only keep remaining
            readcount = remaining;
        write( ofd, buffer, readcount);
        remaining -= readcount;
        if( remaining <= 0 ) break; //done
    }
    return readtotal;
}

Try this,

#include<stdio.h>
#include<stdlib.h>

void usage(char*progname)
{
    printf("need 2 files\n");
    printf("%s <infile> <outfile>\n",progname);
}

//decide how big to make buffer for read()
#define BUFSIZE (16*1024) //16K
//you need min(
#define min(x,y) ( ((x)<(y)) ? (x) : (y) )
ssize_t fdreader(int fd, int ofd, ssize_t new_file_size )
{
    ssize_t remaining = new_file_size;
    ssize_t readtotal = 0;
    ssize_t readcount;
    unsigned char buffer[BUFSIZE];
    for(  ; readcount=read(fd,buffer,min(sizeof(buffer),remaining));  )
    {
        readtotal += readcount;
        if( readcount > remaining ) //only keep remaining
            readcount = remaining;
        write( ofd, buffer, readcount);
        remaining -= readcount;
        if( remaining <= 0 ) break; //done
    }
    return readtotal;
}

int main(int argc,char**argv)
{
    int i=0; /* the infamous 'i' */
    FILE*infh;
    FILE*outfh;

    if( argc < 3 )
    {
        usage(argv[0]);
        return 0;
    }

    printf("%s %s\n",argv[1],argv[2]); fflush(stdout);
    if( !(infh=fopen(argv[1],"r")) )
    {
        printf("cannot open %s\n",argv[2]); fflush(stdout);
        return(2);
    }
    if( !(outfh=fopen(argv[2],"w+")) )
    {
        printf("cannot open %s\n",argv[3]); fflush(stdout);
        return(3);
    }

    int x = fdreader(fileno(infh), fileno(outfh), 512 );

    return 0;
}

Andere Tipps

Your while() loop should probably have braces ({ ... }) after it, otherwise you're just incrementing read_cntr without doing anything else.

Lizenziert unter: CC-BY-SA mit Zuschreibung
Nicht verbunden mit StackOverflow
scroll top