Question

Looking through the itertools module, I don't see anything that could be used as a generic, iterable version of str.split. Is there a simple, idiomatic way of doing this?

These unit tests should demonstrate what I mean:

class SplitAnalog(unittest.TestCase):
    def test_splitEmpty(self):
        """
        >>> ''.split()
        []
        """
        actual = split(None, [])
        self.assertEqual(tuple(actual), ())

    def test_singleLine(self):
        """
        >>> '123\n'.split()
        ['123']
        """
        actual = split(lambda n: n is None, [1, 2, 3, None])
        self.assertEqual(tuple(tuple(line) for line in actual), ((1, 2, 3),))

    def test_allNones(self):
        """
        >>> '\n\n\n'.split()
        []
        """
        actual = split(lambda n: n is None, [None] * 3)
        self.assertEqual(tuple(actual), ())

    def test_splitNumsOnNone(self):
        """
        >>> '314159\n26535\n89793'.split()
        ['314159', '26535', '89793']
        """
        nums = [3, 1, 4, 1, 5, 9, None, 2, 6, 5, 3, 5, None, 8, 9, 7, 9, 3]
        actual = split(lambda n: n is None, nums)
        self.assertEqual(tuple(tuple(line) for line in actual), (
            (3, 1, 4, 1, 5, 9),
            (2, 6, 5, 3, 5),
            (8, 9, 7, 9, 3)))

    def test_splitNumsOnNine(self):
        nums = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 9, 8, 7, 3]
        actual = split(lambda n: n == 9, nums)
        self.assertEqual(tuple(tuple(line) for line in actual), (
            (3, 1, 4, 1, 5, ),
            (2, 6, 5, 3, 5),
            (8, 7, 3)))

What would such a function be called? I can't find an example even when I poke around in other language libraries.

Was it helpful?

Solution

Assuming I understand what you're after, maybe

def pseudosplit(predicate, seq):
    return (tuple(g) for k,g in groupby(seq, key=lambda x: not predicate(x)) if k)

which produces

>>> list(pseudosplit(lambda x: x is None, ()))
[]
>>> list(pseudosplit(lambda x: x is None, [1,2,3]))
[(1, 2, 3)]
>>> list(pseudosplit(lambda x: x is None, [None]*3))
[]
>>> list(pseudosplit(lambda x: x is None, [3, 1, 4, 1, 5, 9, None, 2, 6, 5, 3, 5, None, 8, 9, 7, 9, 3, None]))
[(3, 1, 4, 1, 5, 9), (2, 6, 5, 3, 5), (8, 9, 7, 9, 3)]

which seems to split as your test cases do, anyway.

OTHER TIPS

This will split based on the predicate.

def split(predicate, iterable):
    groups = (tuple(g) for k, g in groupby(iterable, predicate))
    return (g for g in groups if not all(imap(predicate, g)))

Passes all the tests, including a test with something other than None.

def test_splitNumsOnNine(self):
    """
    >>> '314159265359873\n'.split()
    ['31415', '26535', '873']
    """
    nums = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5, 9, 8, 7, 3]
    actual = split(lambda n: n is 9, nums)
    self.assertEqual(tuple(tuple(line) for line in actual), (
        (3, 1, 4, 1, 5, ),
        (2, 6, 5, 3, 5),
        (8, 7, 3)))

Here's a sample implementation:

def split(predicate, iterable):
    iterable = iter(iterable)
    line = []
    try:
        while True:
            val = next(iterable)
            if predicate(val):
                if line:
                    yield line
                line = []
            else:
                line.append(val)
    except StopIteration:
        if line:
            yield line

I wonder though if I'm overlooking a simpler, easier, more-idiomatic way. Anyone?

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top