Removing redundancies in a loop using python

Question

Try this on for size:

# this code assumes Python 2.7
from itertools import groupby, izip
from operator import attrgetter

INPUT    = "file.txt"
HOMO_YES = "homologs.txt"
HOMO_NO  = "nonhomologs.txt"
MAX_DIFF = 5

class Row:
    __slots__ = ["line", "con", "protein", "start", "end"]

    def __init__(self, s):
        self.line    = s.rstrip()
        data         = s.split()
        self.con     = data[0]
        self.protein = data[1]
        self.start   = int(data[2])
        self.end     = int(data[3])

    def __str__(self):
        return self.line

def count_homologs(items, max_diff=MAX_DIFF):
    num_items  = len(items)
    counts     = [0] * num_items
    # first item
    for i, item_i in enumerate(items):
        max_start = item_i.start + max_diff
        max_end   = item_i.end   + max_diff
        # second item
        for j in xrange(i+1, num_items):
            item_j = items[j]
            if item_j.start > max_start:
                break
            elif item_j.end <= max_end:
                counts[i] += 1
                counts[j] += 1
    return counts

def main():
    with open(INPUT) as inf, open(HOMO_YES, "w") as outhomo, open(HOMO_NO, "w") as outnothomo:
        # skip header
        next(inf, '')
        rows = (Row(line) for line in inf)

        for con, item_iter in groupby(rows, key=attrgetter("con")):
            # per-con list of Rows sorted by start,end
            items = sorted(item_iter, key=attrgetter("start", "end"))
            # get #homologs for each item
            counts = count_homologs(items)
            # do output
            for c,item in izip(counts, items):
                if c:
                    outhomo.write(str(item) + "\n")
                else:
                    outnothomo.write(str(item) + "\n")

if __name__=="__main__":
    main()

on your given data, produces:

=== homologs.txt ===

con1    P1  140 602
con1    P2  140 602
con3    P9  16  348
con3    P8  17  348

=== nonhomologs.txt ===

con1    P3  232 548
con2    P6  335 779
con2    P4  335 801
con2    P5  642 732
con2    P7  729 812