Iterating through a list where I have to grab data from the first item for use in the last item

Question 1

I ended up using a bunch of conditionals to solve this problem.

The code is inelegant, and involves a lot of repetition, but for a quick-and-dirty script that I'll use over and over, I think it suffices.

##here, i process all the gibson primers to get the final list of primers##
##=======================================================================##
    construct_num = 1
    temp = []
    part_num = 1
    temp_row_num = 1
    max_seq_num = 0

    for row in gibson_primer_temp_list:

        max_seq_num = 0

        for x in gibson_primer_temp_list:
            if int(x[1]) > construct_num:
                pass
            if int(x[1]) == construct_num:
                max_seq_num += 1
##        print('Const. number counter is at ' + str(construct_num) + ' and current maximum known number of sequences is ' + str(max_seq_num))

##        print(row[1])

##        if int(row[1]) < construct_num:
##            while construct_num < int(row[1]):
##        print(max_seq_num)
##        for row in gibson_primer_temp_list:
##            if int(row[1]) == construct_num:
##                max_seq_num += 1
##            if int(row[1]) > construct_num:
##                break

        #print('Construct number is ' + str(row[1]) + ' and seq. number is ' + str(row[4]))
        #print('Const. number counter is ' + str(construct_num) + ' and max. seq. number is ' + str(max_seq_num) + '.')

        if int(row[1]) > construct_num:
            part_num = 1
            while construct_num < int(row[1]):
                #print('Construct number is ' + str(construct_num))
                construct_num += 1
##                temp_row_num += 1 #do not uncomment
            #continue - not to be added back again!

        if int(row[1]) == construct_num:

            if int(row[4]) == max_seq_num:

                #print(row)
                temp.append(row)
                temp_row_num += 1
                #print('We are going to make primers that join the first and last part in construct ' + str(construct_num))
                #print('Grabbing overhang portion from part ' + str(part_num) + ', which is sequence ' + str(row[4]) + '. It has the sequence ' + str(row[0]))
                overhang = row
                #print('Grabbing the first sequence...')
                for x in gibson_primer_temp_list:
                    #print(row[1] == x[1] and x[4] == 1)
                    if row[1] == x[1] and x[4] == 1:
                        #print(x[0])
                        anneal = x
                        #print('The first sequence is ' + str(anneal))
                        fw_primer = overhang[0] + anneal [0]
                        #print('The forward primer on the first part is: ' + str(fw_primer))
                        primer_list.append([fw_primer, construct_num, x[2], 'fw primer'])
                        break

                #print('Grabbing the third sequence...')
                for y in gibson_primer_temp_list:
                    #print(row[1] == y[1] and y[4] == 3)
                    if row[1] == y[1] and y[4] == 3:
                        #print(y[0])
                        overhang = y
                        #print('The third sequence is ' + str(overhang))
                        break

                #print('Grabbing the (n-2)th sequence...')
                steps_backward = 2
                target_seq_num = max_seq_num - steps_backward
                for z in gibson_primer_temp_list:
                    #print(row[1] == z[1] and z[4] == target_seq_num)
                    if row[1] == z[1] and z[4] == target_seq_num:
                        #print(z[0])
                        anneal = z
                        #print('The n-2th sequence is ' + str(anneal))
                        break

                re_primer = overhang[0] + anneal[0]
                primer_list.append([re_primer, construct_num, z[2], 're primer'])
                continue

            if part_num == int(row[2]) and part_num == 1: #if the part number counter = part number
                #print(row)
                temp.append(row)
                temp_row_num += 1
                continue #do NOT delete this continue

            if part_num < int(row[2]):
                #print('Current part is: ' + str(part_num) + '. Upping part number.' + '\n')
                part_num += 1
                #do NOT add in a "continue" here


            if part_num == int(row[2]) and row[3] == 'fp_anneal':
                #print(row)
                temp.append(row)
                temp_row_num += 1
                #print('Current part is: ' + str(part_num))
                #print('Grabbing tp_overhang from part ' + str(part_num - 1) + '...')
                x = 1
                for row in temp:
                    x += 1
                    if x == temp_row_num - 1:
                        prev_tp_overhang = row
                #print('Sequence of tp_overhang from part ' + str(part_num - 1) + ' is: ' + prev_tp_overhang[0])
                fw_primer_current = prev_tp_overhang[0] + row[0]
                #print('Appending to master primer list...')
                primer_list.append([fw_primer_current, construct_num, part_num, 'fw primer'])
                #print('Forward primer is: ' + str(fw_primer_current) + '\n')
                continue

            if part_num == int(row[2]) and row[3] == 'tp_anneal':
                #print(row)
                temp.append(row)
                temp_row_num += 1
                continue


            if part_num == int(row[2]) and row[3] == 'fp_overhang':
                #print(row)
                temp.append(row)
                temp_row_num += 1
                #print('Current temp_row_num is ' + str(temp_row_num))
                #print('Current part is: ' + str(part_num))
                #print('Grabbing tp_anneal from part ' + str(part_num - 1) + '...')
                x = 1
                for row in temp:
                    x += 1
                    if x == temp_row_num - 5:
                        prev_tp_anneal = row
                        #print(row)
                        pass
                #print('Sequence of tp_anneal from part ' + str(part_num - 1) + ' is: ' + prev_tp_anneal[0])
                re_primer_prev = row[0] + prev_tp_anneal[0]
                #print('Appending to master primer list...')
                primer_list.append([re_primer_prev, construct_num, part_num - 1, 're primer'])
                #print('Reverse primer for previous part is: ' + str(re_primer_prev) + '\n')
                part_num += 1
                continue

            if part_num == int(row[2]) and row[3] == 'tp_overhang':
                #print(row)
                temp.append(row)
                temp_row_num += 1
                continue

            continue

Thanks everybody for the help!

Question 2

Martineau probably has the right idea with a domain-specific language. I don't have any experience with that, but here's what I came up with in a half hour.

I haven't run or debugged or tested this, but if you have any questions, let me know. This code also assumes that the fragment lengths aren't so long as to have problems with holding them in memory. If that assumption doesn't hold true, this approach won't be very efficient.

I was also lazy in my design of the fragments dictionary. It really shouldn't be a global variable, and this entire code should be in a class.

def head(seq, count):
    return seq[count:]

def tail(seq, count):
    return seq[:count]

def reverse(nt):
    if nt in 'Gg':
        return 'C'
    elif nt in 'Cc':
        return 'G'
    elif nt in 'Aa':
        return 'T'
    elift nt in 'Tt':
        return 'A'
    raise Exception('invalid nucleotide')

def complement(seq):
    return (reverse(i) for i in seq)

def identity(seq):
    return seq

fragments = {}
def register(fragment, name):
    if name not in fragments:
        fragments[name] = fragment
    else:
        raise Exception('a fragment of that name is already registered!')

def make_combination(portions):
    """ Each entry in portions should be a tuple consisting of:
    (sequence name, count, accessor function, complement function)"""
    output = []
    for entry in portions:
        name, count, select, order = entry
        section = order(select(fragments[name], count))
        output.append(str(section))
    # return a string, not a list
    return ''.join(output)

def example()
    a = 'gattaca'
    b = 'tagacat'
    c = 'gagatac'
    register(a, 'a')
    register(b, 'b')
    register(c, 'c')
    # the last 20 nucleotides (n.t.) of C with the first 40 n.t. of A
    first = (('c', 20, tail, identity),
             ('a', 40, head, identity))
    # RC of the first 20 n.t. of B with the RC of the last n.t. of A,
    second = (('b', 20, head, complement),
              ('a', 1, tail, identity))