I would do something like this. Define an "overlaps" function for two hits, and then test for each contig whether all, some or none overlap. Then write all the contigs to the desired file:
from itertools import groupby
def overlaps(a, b):
result = True
# Supposing a[2] is the start, a[3] the end.
# If end before start, they are not overlapping
if a[3] < b[2] or b[3] < a[2]:
result = False
return result
def test_overlapping(hits):
overlapping = 'None'
overlapping_count = 0
for i in range(len(hits)-1):
if overlaps(hits[i], hits[i+1]):
overlapping_count += 1
if overlapping_count == 0:
overlapping = 'None'
elif overlapping_count == len(hits) -1:
overlapping = 'All'
else:
overlapping = 'Some'
return overlapping
fh = open('file.txt')
file_all = open('result_all.txt', 'w')
file_some = open('result_some.txt', 'w')
file_none = open('result_none.txt', 'w')
line = fh.readline() # quit header
for qid, grp in groupby(fh, lambda l: l.split()[0]):
hits = []
for line in grp:
hsp = line.split()
hsp[2], hsp[3] = int(hsp[2]), int(hsp[3])
hits.append(hsp)
if len(hits) > 1:
hits.sort(key=lambda x: x[2])
overlapping = test_overlapping(hits)
out_file = file_none
if overlapping == 'All':
out_file = file_all
elif overlapping == 'Some':
out_file = file_some
for h in hits:
out_file.write('\t'.join([str(v) for v in h]))
out_file.write('\n')
file_all.close()
file_some.close()
file_none.close()