Question

I'm trying to output gzip encoded strings using wsgi, these are my attempts but unfortunately browser only decodes the first string, any help?

test 1:

import zlib

def application(environ, start_response):
    headers = [('Content-Type', 'text/html; charset=utf-8'),('Content-Encoding', 'gzip')]
    data = b'\x1f\x8b\x08\x00\x00\x00\x00\x00'
    data += zlib.compress(b'test')[:-4]
    data += zlib.compress(b'test2')[:-4]
    headers.append(('Content-Length', str(len(data))))
    start_response('200 OK',headers)
    return [data]

test 2:

import zlib

def application(environ, start_response):
    headers = [('Content-Type', 'text/html; charset=utf-8'),('Content-Encoding', 'gzip')]
    data = b'\x1f\x8b\x08\x00\x00\x00\x00\x00'
    data += zlib.compress(b'test')
    data += zlib.compress(b'test2')
    headers.append(('Content-Length', str(len(data))))
    start_response('200 OK',headers)
    return [data]

test 3:

import zlib

def application(environ, start_response):
    headers = [('Content-Type', 'text/html; charset=utf-8'),('Content-Encoding', 'gzip')]
    start_response('200 OK',headers)
    yield b'\x1f\x8b\x08\x00\x00\x00\x00\x00'
    yield zlib.compress(b'test')[:-4]
    yield zlib.compress(b'test2')[:-4]

test 4:

import zlib

def application(environ, start_response):
    headers = [('Content-Type', 'text/html; charset=utf-8'),('Content-Encoding', 'gzip')]
    start_response('200 OK',headers)
    yield b'\x1f\x8b\x08\x00\x00\x00\x00\x00'
    yield zlib.compress(b'test')
    yield zlib.compress(b'test2')

test 5:

import gzip

def application(environ, start_response):
    headers = [('Content-Type', 'text/html; charset=utf-8'),('Content-Encoding', 'gzip')]
    start_response('200 OK',headers)
    yield gzip.compress(b'test')
    yield gzip.compress(b'test2')
Was it helpful?

Solution

I think the problem is the following:

 gzip.compress(b'test')

return a string with

 header  content  THE END 

in it.

This means when you read it the uncompressing will only return the b'test'. Try it yourself.

Two solutions depending on what you want to achieve:

  1. Create a multipart message. Every yield is a new document
  2. compress does this:

    def compress(data, compresslevel=9):
    
        """Compress data in one shot and return the compressed string.
        Optional argument is the compression level, in range of 0-9.
        """
        buf = io.BytesIO()
        with GzipFile(fileobj=buf, mode='wb', compresslevel=compresslevel) as f:
            f.write(data)
        return buf.getvalue()
    

    Do the something like this:

    import gzip, io
    
    def application(environ, start_response):
        headers = [('Content-Type', 'text/html; charset=utf-8'),('Content-Encoding',     'gzip')]
        start_response('200 OK',headers)
        buf = io.BytesIO()
        with GzipFile(fileobj=buf, mode='wb') as f:
             f.write(b'test')
             f.write(b'test2')
        return buf
    

OTHER TIPS

Thanks to @User, and here is my own solution, which I used and I guess we won't buffer a lot of data here:

import gzip

class gzipreader:
    def __init__(self):
        self._content = b''

    def flush(self):
        pass

    def write(self,data):
        self._content += data

    def read(self):
        data = self._content
        self._content = b''
        return data

def application(environ, start_response):
    headers = [('Content-Type', 'text/html; charset=utf-8'),('Content-Encoding', 'gzip')]
    start_response('200 OK',headers)
    reader = gzipreader()
    writer = gzip.GzipFile(mode='wb',fileobj=reader)
    for s in [b'test', b'test2']:
        writer.write(s)
        #writer.flush()
        yield reader.read()
    writer.close()
    yield reader.read()
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top