Question

I am working on implementing natural join in python. The first two lines show the tables attributes and the next two lines each tables' tuples or rows.

Expected Output:

[['A', 1, 'A', 'a', 'A'], 
 ['A', 1, 'A', 'a', 'Y'], 
 ['A', 1, 'Y', 'a', 'A'], 
 ['A', 1, 'Y', 'a', 'Y'], 
 ['S', 2, 'B', 'b', 'S']]

And what I got:

[['A', 1, 'A', 'a', 'A', 'Y'], 
 ['A', 1, 'A', 'a', 'A', 'Y']]

I have looked through the code and everything seems to be right, I would appreciate any help.

t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')

t1tuples = [['A', 1, 'A', 'a'], 
            ['B', 2, 'Y', 'a'], 
            ['Y', 4, 'B', 'b'], 
            ['A', 1, 'Y', 'a'], 
            ['S', 2, 'B', 'b']]

t2tuples = [[1, 'a', 'A'], 
            [3, 'a', 'B'], 
            [1, 'a', 'Y'], 
            [2, 'b', 'S'], 
            [3, 'b', 'E']]

def findindices(t1atts, t2atts):
  t1index=[]
  t2index=[]
  for index, att in enumerate(t1atts):
    for index2, att2 in enumerate(t2atts):
      if att == att2:
        t1index.append(index)
        t2index.append(index2)
  return t1index, t2index

def main():
  tpl=0; tpl2=0; i=0; j=0; count=0; result=[]
  t1index, t2index = findindices(t1atts, t2atts)
  for tpl in t1tuples:
    while tpl2 in range(len(t2tuples)):
      i=0; j=0
      while (i in range(len(t1index))) and (j in range(len(t2index))):
          if tpl[t1index[i]] != t2tuples[tpl2][t2index[j]]:
            i=len(t1index)
            j=len(t1index)
          else:
            count+=1
          i+=1
          j+=1
      if count == len(t1index):
        extravals = [val for index, val in enumerate(t2tuples[tpl2]) if index not in t2index]
        temp = tpl
        tpl += extravals
        result.append(tpl)
        tpl = temp
      count=0
      tpl2+=1
  print result
Was it helpful?

Solution

Here's what I came up with. I'd do some more refactoring, etc before calling it done

import pprint

t1atts = ('A', 'B', 'C', 'D')
t2atts = ('B', 'D', 'E')

t1tuples = [
    ['A', 1, 'A', 'a'],
    ['B', 2, 'Y', 'a'],
    ['Y', 4, 'B', 'b'],
    ['A', 1, 'Y', 'a'],
    ['S', 2, 'B', 'b']]

t2tuples = [
    [1, 'a', 'A'],
    [3, 'a', 'B'],
    [1, 'a', 'Y'],
    [2, 'b', 'S'],
    [3, 'b', 'E']]


t1columns = set(t1atts)
t2columns = set(t2atts)
t1map = {k: i for i, k in enumerate(t1atts)}
t2map = {k: i for i, k in enumerate(t2atts)}

join_on = t1columns & t2columns
diff = t2columns - join_on

def match(row1, row2):
   return all(row1[t1map[rn]] == row2[t2map[rn]] for rn in join_on)

results = []
for t1row in t1tuples:
    for t2row in t2tuples:
        if match(t1row, t2row):
            row = t1row[:]
            for rn in diff:
                row.append(t2row[t2map[rn]])
            results.append(row)

pprint.pprint(results)

And I get the expected results:

[['A', 1, 'A', 'a', 'A'],
 ['A', 1, 'A', 'a', 'Y'],
 ['A', 1, 'Y', 'a', 'A'],
 ['A', 1, 'Y', 'a', 'Y'],
 ['S', 2, 'B', 'b', 'S']]

OTHER TIPS

Ok, here is the solution please verify and let me know if it works for you:

I change little bit of naming to understood myself:

#!/usr/bin/python

table1 = ('A', 'B', 'C', 'D')
table2 = ('B', 'D', 'E')

row1 = [['A', 1, 'A', 'a'],
        ['B', 2, 'Y', 'a'],
        ['Y', 4, 'B', 'b'],
        ['A', 1, 'Y', 'a'],
        ['S', 2, 'B', 'b']]

row2 = [[1, 'a', 'A'],
        [3, 'a', 'B'],
        [1, 'a', 'Y'],
        [2, 'b', 'S'],
        [3, 'b', 'E']]

def findindices(table1, table2):
    inter = set(table1).intersection(set(table2))
    tup_index1 = [table1.index(x) for x in inter]
    tup_index2 = [table2.index(x) for x in inter]]
    return tup_index1, tup_index2

def main():

    final_lol = list()

    tup_index1, tup_index2 = findindices(table1, table2)

    merge_tup = zip(tup_index1, tup_index2)

    for tup1 in row1:
        for tup2 in row2:
            for m in merge_tup:
                if tup1[m[0]] != tup2[m[1]]:
                    break
            else:
               ls = []
               ls.extend(tup1)
               ls.append(tup2[-1])
               final_lol.append(ls)
    return final_lol

if __name__ == '__main__':
    import pprint
    pprint.pprint(main())

Output:

[['A', 1, 'A', 'a', 'A'],
 ['A', 1, 'A', 'a', 'Y'],
 ['A', 1, 'Y', 'a', 'A'],
 ['A', 1, 'Y', 'a', 'Y'],
 ['S', 2, 'B', 'b', 'S']]
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top