Question

I'm reading a .gz file from some slow source (like FTP Server) and am processing the received data right away. Looks something like this:

FtpWebResponse response = ftpclientRequest.GetResponse() as FtpWebResponse;
using (Stream ftpStream = response.GetResponseStream())
using (GZipStream unzipped = new GZipStream(ftpStream, CompressionMode.Decompress))
using (StreamReader linereader = new StreamReader(unzipped))
{
  String l;
  while ((l = linereader.ReadLine()) != null)
  {
    ...
  }
}

My problem is showing an accurate progress bar. In advance I can get the compressed .gz file size, but I got no clue how large the content would be uncompressed. Reading the file line by line I know quite well how many uncompressed bytes I read, but I don't know how this does relate to the compressed file size.

So, is there any way to get from GZipStream how far the file pointer is advanced into the compressed file? I only need the current position, the gz file size I can fetch before reading the file.

Was it helpful?

Solution

You can plug a stream in between which counts, how many bytes GZipStream has read.

  public class ProgressStream : Stream
  {
    public long BytesRead { get; set; }
    Stream _baseStream;
    public ProgressStream(Stream s)
    {
      _baseStream = s;
    }
    public override bool CanRead
    {
      get { return _baseStream.CanRead; }
    }
    public override bool CanSeek
    {
      get { return false; }
    }
    public override bool CanWrite
    {
      get { return false; }
    }
    public override void Flush()
    {
      _baseStream.Flush();
    }
    public override long Length
    {
      get { throw new NotImplementedException(); }
    }
    public override long Position
    {
      get
      {
        throw new NotImplementedException();
      }
      set
      {
        throw new NotImplementedException();
      }
    }
    public override int Read(byte[] buffer, int offset, int count)
    {
      int rc = _baseStream.Read(buffer, offset, count);
      BytesRead += rc;
      return rc;
    }
    public override long Seek(long offset, SeekOrigin origin)
    {
      throw new NotImplementedException();
    }
    public override void SetLength(long value)
    {
      throw new NotImplementedException();
    }
    public override void Write(byte[] buffer, int offset, int count)
    {
      throw new NotImplementedException();
    }
  }

// usage
FtpWebResponse response = ftpclientRequest.GetResponse() as FtpWebResponse;
using (Stream ftpStream = response.GetResponseStream())
using (ProgressStream progressStream = new ProgressStream(ftpstream))
using (GZipStream unzipped = new GZipStream(progressStream, CompressionMode.Decompress))
using (StreamReader linereader = new StreamReader(unzipped))
{
  String l;
  while ((l = linereader.ReadLine()) != null)
  {
    progressStream.BytesRead(); // does contain the # of bytes read from FTP so far.
  }
}

OTHER TIPS

I suggest you to take a look at the following code :

public static readonly byte[] symbols = new byte[8 * 1024];

public static void Decompress(FileInfo inFile, FileInfo outFile)
{
    using (var inStream = inFile.OpenRead())
    {
        using (var zipStream = new GZipStream(inStream, CompressionMode.Decompress))
        {
            using (var outStream = outFile.OpenWrite())
            {
                var total = 0;
                do
                {
                    var async = zipStream.BeginRead(symbols, 0, symbols.Length, null, null);
                    total = zipStream.EndRead(async);
                    if (total != 0)
                    {
                        // Report progress. Read total bytes (8K) from the zipped file.
                        outStream.Write(symbols, 0, total);
                    }
                } while (total != 0);
            }
        }
    }
}

I've revisited my code and I've performed some tests. IMHO darin is right. However I think it's possible to read only the header (size ?) of the zipped stream and find out the resulting file size. (WinRar "knows" what's the unzipped file size without unzipping the entire zip archive. It reads this information from archive's header.) If you find the resulting file size this code will help you to report a precise progress.

public static readonly byte[] symbols = new byte[8 * 1024];

public static void Decompress(FileInfo inFile, FileInfo outFile, double size, Action<double> progress)
{
    var percents = new List<double>(100);

    using (var inStream = inFile.OpenRead())
    {
        using (var zipStream = new GZipStream(inStream, CompressionMode.Decompress))
        {
            using (var outStream = outFile.OpenWrite())
            {
                var current = 0;

                var total = 0;
                while ((total = zipStream.Read(symbols, 0, symbols.Length)) != 0)
                {
                    outStream.Write(symbols, 0, total);
                    current += total;

                    var p = Math.Round(((double)current / size), 2) * 100;
                    if (!percents.Contains(p))
                    {
                        if (progress != null)
                        {
                            progress(p);
                        }
                        percents.Add(p);
                    }
                }
            }
        }
    }
}

I hope this helps.

As a proxy for decompressing progress you may try to get the information for file's download progress from the underlying stream using:

var percentageProgress = ftpStream.Position / (double)ftpWebResponse.ContentLength;

or

var percentageProgress = ftpStream.Position / (double)ftpStream.Length;

It works on a FileStream and it should work on a GetResponseStream() provided it implements Position property and the FTP server returns information on the downloaded file's length: http://msdn.microsoft.com/en-us/library/system.net.ftpwebresponse.contentlength(v=vs.110).aspx

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top