How to get the internal position while reading bzip2 file

Question 1

This is the solution I came up with that seems to work.

import bz2

class SimpleBZ2File(object):

    def __init__(self,path,readsize=1024):
        self.decomp = bz2.BZ2Decompressor()
        self.rawinput = open(path,'rb')
        self.eof = False
        self.readsize = readsize
        self.leftover = ''

    def tell(self):
        return self.rawinput.tell()

    def __iter__(self):
        while not self.eof:
            rawdata = self.rawinput.read(self.readsize)
            if rawdata == '':
                self.eof = True
            else:
                data = self.decomp.decompress(rawdata)
                if not data:
                    continue #we need to supply more raw to decompress
                newlines = list(data.splitlines(True))
                yield self.leftover + newlines[0]
                self.leftover = ''
                for l in newlines[1:-1]:
                    yield l
                if newlines[-1].endswith('\n'):
                    yield newlines[-1]
                else:
                    self.leftover = newlines[-1]
        if self.leftover:
            yield self.leftover
        self.rawinput.close()

Question 2

If you only need to parse the data in the bziped file, I think it should be possible to avoid to unzip the file before reading it. I have not tested it on bzip, but on gziped files. I hope this is also possible with bziped files.

See for instance : How to write csv in python efficiently?.