Here's what I came up with. I'd do some more refactoring, etc before calling it done
import pprint
t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')
t1tuples = [
['A', 1, 'A', 'a'],
['B', 2, 'Y', 'a'],
['Y', 4, 'B', 'b'],
['A', 1, 'Y', 'a'],
['S', 2, 'B', 'b']]
t2tuples = [
[1, 'a', 'A'],
[3, 'a', 'B'],
[1, 'a', 'Y'],
[2, 'b', 'S'],
[3, 'b', 'E']]
t1columns = set(t1atts)
t2columns = set(t2atts)
t1map = {k: i for i, k in enumerate(t1atts)}
t2map = {k: i for i, k in enumerate(t2atts)}
join_on = t1columns & t2columns
diff = t2columns - join_on
def match(row1, row2):
return all(row1[t1map[rn]] == row2[t2map[rn]] for rn in join_on)
results = []
for t1row in t1tuples:
for t2row in t2tuples:
if match(t1row, t2row):
row = t1row[:]
for rn in diff:
row.append(t2row[t2map[rn]])
results.append(row)
pprint.pprint(results)
And I get the expected results:
[['A', 1, 'A', 'a', 'A'],
['A', 1, 'A', 'a', 'Y'],
['A', 1, 'Y', 'a', 'A'],
['A', 1, 'Y', 'a', 'Y'],
['S', 2, 'B', 'b', 'S']]