Straight-up data processing using your ad-hoc requirement above, I can come up with the following algorithm.
First sweep: collect frequency information for every key (i.e. 'A', 'B', 'C'
):
def generate_frequency_table(lst):
assoc = {} # e.g. 'A': {'id1': 3, 'id2': 2}
for key, unused, val in list:
freqs = assoc.get(key, None)
if freqs is None:
freqs = {}
assoc[key] = freqs
valfreq = freqs.get(val, None)
if valfreq is None:
freqs[val] = 1
else:
freqs[val] = valfreq + 1
return assoc
>>> generate_frequency_table(lst)
{'A': {'id2': 2, 'id1': 3}, 'C': {'id6': 2, 'id5': 1}, 'B': {'id3': 4, 'id': 1}}
Then, see what 'value' is associated with each key (i.e. {'A': 'id1'}
):
def generate_max_assoc(assoc):
max = {} # e.g. {'A': 'id1'}
for key, freqs in assoc.iteritems():
curmax = ('', 0)
for val, freq in freqs.iteritems():
if freq > curmax[1]:
curmax = (val, freq)
max[key] = curmax[0]
return max
>>> maxtable = generate_max_assoc(generate_frequency_table(lst))
>>> print maxtable
{'A': 'id1', 'C': 'id6', 'B': 'id3'}
Finally, iterate through the original list and replace values using the table above:
>>> newlst = [[key, unused, maxtable[key]] for key, unused, val in lst]
>>> print newlst
[['A', 'abc', 'id1'], ['A', 'def', 'id1'], ['A', 'ghi', 'id1'], ['A', 'ijk', 'id1'], ['A', 'lmn', 'id1'], ['B', 'abc', 'id3'], ['B', 'def', 'id3'], ['B', 'ghi', 'id3'], ['B', 'ijk', 'id3'], ['B', 'lmn', 'id3'], ['C', 'xyz', 'id6'], ['C', 'lmn', 'id6'], ['C', 'aaa', 'id6']]