If you wanted to find repetions in the first two fields, you could use them as a dictionary key and populate it with a list of all matching lines. When you have processed the entire file, those dictionary entries which contain a list of more than one element are repetitions.
#!/usr/bin/env python
import fileinput
def read (line, d=dict()):
tokens = line.split()
key = ' '.join(tokens[0:2])
try:
d[key].append(line)
except KeyError:
d[key] = [line]in v:
return d
def main ():
d = dict()
for line in fileinput.input():
read(line, d)
for k in d:
v = d[k]
if len(v) > 1:
# print "### %s => %s" % (k, v) for debugging
for l in v:
print l,
if __name__ == '__main__':
main()
Sample output (with the debug prints enabled so you can see why it prints these outputs):
### Exited __init__ => ['Exited __init__ ConfigHandler None 56663624 10:25:30:747000\n', 'Exited __init__ ColumnConverter None 56963312 10:25:30:769000\n', 'Exited __init__ PredicatesFactory None 56963424 10:25:30:769000\n', 'Exited __init__ LogFileConverter None 56963536 10:25:30:769000\n']
Exited __init__ ConfigHandler None 56663624 10:25:30:747000
Exited __init__ ColumnConverter None 56963312 10:25:30:769000
Exited __init__ PredicatesFactory None 56963424 10:25:30:769000
Exited __init__ LogFileConverter None 56963536 10:25:30:769000
### Entering __init__ => ["Entering __init__ ConfigHandler ['config_filepath'] 56663624 10:25:30:743000\n", 'Entering __init__ ColumnConverter [] 56963312 10:25:30:769000\n', 'Entering __init__ PredicatesFactory [] 56963424 10:25:30:769000\n', 'Entering __init__ LogFileConverter [] 56963536 10:25:30:769000\n']
Entering __init__ ConfigHandler ['config_filepath'] 56663624 10:25:30:743000
Entering __init__ ColumnConverter [] 56963312 10:25:30:769000
Entering __init__ PredicatesFactory [] 56963424 10:25:30:769000
Entering __init__ LogFileConverter [] 56963536 10:25:30:769000