In the end I made my own Python function, thought I may as well share it.
It takes an aligned peptide sequence with gaps and the corresponding un-aligned nucleotide sequence and gives an aligned nucleotide sequence:
Function
def gapsFromPeptide( peptide_seq, nucleotide_seq ):
""" Transfers gaps from aligned peptide seq into codon partitioned nucleotide seq (codon alignment)
- peptide_seq is an aligned peptide sequence with gaps that need to be transferred to nucleotide seq
- nucleotide_seq is an un-aligned dna sequence whose codons translate to peptide seq"""
def chunks(l, n):
""" Yield successive n-sized chunks from l."""
for i in xrange(0, len(l), n):
yield l[i:i+n]
codons = [codon for codon in chunks(nucleotide_seq,3)] #splits nucleotides into codons (triplets)
gappedCodons = []
codonCount = 0
for aa in peptide_seq: #adds '---' gaps to nucleotide seq corresponding to peptide
if aa!='-':
gappedCodons.append(codons[codonCount])
codonCount += 1
else:
gappedCodons.append('---')
return(''.join(gappedCodons))
Usage
>>> unaligned_dna_seq = 'ATGATGATG'
>>> aligned_peptide_seq = 'M-MM'
>>> aligned_dna_seq = gapsFromPeptide(aligned_peptide_seq, unaligned_dna_seq)
>>> print(aligned_dna_seq)
ATG---ATGATG