Distance calculation between two atoms in pdb file

Question 1

You can start from here:

 awk '$2=="91"{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
      $2=="180" {x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
      END{print sqrt((x1-x2)^2 + (y1-y2)^2 + (z1-z2)^2)}' file    # calculate the distance.

4.88067

Not sure why ^ is not supported in GAWK 3.1.5, how about this?

 awk '$2=="91"{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
      $2=="180" {x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
      END{print sqrt((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2) + (z1-z2)*(z1-z2))}' file    # calculate the distance.

Question 2

This is a complete script that incorporates the suggested awk command. Change the pdb file to your below and place this in the same directory as the pdb file.

#!/usr/bin/env bash

echo " "
echo "Use one letter abbreviations. Case doesn't matter." 
echo "Example: A 17 CA or n 162 cg"

echo " - - - - - - - - - - - - - - - - - -"
#------------First Selection------------

read -e -p "What first atom? " sel1
echo " "

# echo $sel1
sel1caps=${sel1^^}
# echo "sel1caps="$sel1caps

arr1=($sel1caps)
# echo "arr1[0]= "${arr1[0]}
# echo "arr1[1]= "${arr1[1]}
# echo "arr1[2]= "${arr1[2]}

#To convert one to three letters

if [ ${arr1[0]} = A ] ; then
    SF1=ALA
elif [ ${arr1[0]} = H ] ; then
    SF1=HIS
elif [ ${arr1[0]} = R ] ; then
    SF1=ARG
elif [ ${arr1[0]} = K ] ; then
    SF1=LYS
elif [ ${arr1[0]} = I ] ; then
    SF1=ILE
elif [ ${arr1[0]} = F ] ; then
    SF1=PHE 
elif [ ${arr1[0]} = L ] ; then
    SF1=LEU
elif [ ${arr1[0]} = W ] ; then
    SF1=TRP
elif [ ${arr1[0]} = M ] ; then
    SF1=MET
elif [ ${arr1[0]} = P ] ; then
    SF1=PRO 
elif [ ${arr1[0]} = C ] ; then
    SF1=CYS 
elif [ ${arr1[0]} = N ] ; then
    SF1=ASN
elif [ ${arr1[0]} = V ] ; then
    SF1=VAL
elif [ ${arr1[0]} = G ] ; then
    SF1=GLY 
elif [ ${arr1[0]} = S ] ; then
    SF1=SER 
elif [ ${arr1[0]} = Q ] ; then
    SF1=GLN 
elif [ ${arr1[0]} = Y ] ; then
    SF1=TYR 
elif [ ${arr1[0]} = D ] ; then
    SF1=ASP
elif [ ${arr1[0]} = E ] ; then
    SF1=GLU 
elif [ ${arr1[0]} = T ] ; then
    SF1=THR 
else
    echo "use one letter codes"
    echo "exiting"
    exit
fi

# echo "SF1 ="$SF1

#If there is nothing printing for line 1, check the expression for your pdb file. The spaces may need adjustment at the end.
line1=$(grep -E "${arr1[2]} *?${SF1}(.*?) ${arr1[1]}     " 1A23.pdb)
# echo $line1

ar_l1=($line1)
# echo "ar_l1[1]="${ar_l1[1]}

echo " - - - - - - - - - - - - - - - - - -"
#------------Second Selection------------

read -e -p "What second atom? " sel2
echo " "

# echo $sel2

sel2caps=${sel2^^}
# echo "sel2caps="$sel2caps

arr2=($sel2caps)
# echo "arr2[0]= "${arr2[0]}
# echo "arr2[1]= "${arr2[1]}
# echo "arr2[2]= "${arr2[2]}

#To convert one to three letters

if [ ${arr2[0]} = A ] ; then
    SF2=ALA
elif [ ${arr2[0]} = H ] ; then
    SF2=HIS
elif [ ${arr2[0]} = R ] ; then
    SF2=ARG
elif [ ${arr2[0]} = K ] ; then
    SF2=LYS
elif [ ${arr2[0]} = I ] ; then
    SF2=ILE
elif [ ${arr2[0]} = F ] ; then
    SF2=PHE 
elif [ ${arr2[0]} = L ] ; then
    SF2=LEU
elif [ ${arr2[0]} = W ] ; then
    SF2=TRP
elif [ ${arr2[0]} = M ] ; then
    SF2=MET
elif [ ${arr2[0]} = P ] ; then
    SF2=PRO 
elif [ ${arr2[0]} = C ] ; then
    SF2=CYS 
elif [ ${arr2[0]} = N ] ; then
    SF2=ASN
elif [ ${arr2[0]} = V ] ; then
    SF2=VAL
elif [ ${arr2[0]} = G ] ; then
    SF2=GLY 
elif [ ${arr2[0]} = S ] ; then
    SF2=SER 
elif [ ${arr2[0]} = Q ] ; then
    SF2=GLN 
elif [ ${arr2[0]} = Y ] ; then
    SF2=TYR 
elif [ ${arr2[0]} = D ] ; then
    SF2=ASP
elif [ ${arr2[0]} = E ] ; then
    SF2=GLU 
elif [ ${arr2[0]} = T ] ; then
    SF2=THR 
else
    echo "use one letter codes"
    echo "exiting"
    exit
fi

# echo "SF2 ="$SF2


line2=$(grep -E "${arr2[2]} *?${SF2}(.*?) ${arr2[1]}     " 1A23.pdb)
# echo $line2

ar_l2=($line2)
# echo "ar_l2[1]="${ar_l2[1]}
# echo "ar_l2[1]="${ar_l2[1]}

atom1=${ar_l1[1]}
atom2=${ar_l2[1]}
echo " "
echo "==========================="
# 6, 7, 8 are column numbers in the pdb file. 
# If there are multiple molecules it should be 7, 8, 9.
awk '$2=='$atom1'{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
     $2=='$atom2'{x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
     END{print sqrt((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2) + (z1-z2)*(z1-z2))}' 1A23.pdb    # calculate the distance.

echo "Angstroms"
echo "==========================="