Question

I have a pdb files such as like this below.

ATOM     76  N   ARG     6      -6.350  -3.503   0.748 -0.3479  1.5500
ATOM     77  H   ARG     6      -6.002  -3.513   1.699  0.2747  1.3000
ATOM     78  CA  ARG     6      -7.756  -3.896   0.534 -0.2637  1.7000
ATOM     79  HA  ARG     6      -8.024  -4.587   1.334  0.1560  1.3000
ATOM     80  CB  ARG     6      -8.589  -2.608   0.737 -0.0007  1.7000
ATOM     81  HB2 ARG     6      -8.208  -2.089   1.620  0.0327  1.3000
ATOM     82  HB3 ARG     6      -8.439  -1.940  -0.114  0.0327  1.3000
ATOM     83  CG  ARG     6     -10.086  -2.834   0.970  0.0390  1.7000
ATOM     84  HG2 ARG     6     -10.274  -3.897   1.124  0.0285  1.3000
ATOM     85  HG3 ARG     6     -10.378  -2.310   1.876  0.0285  1.3000
ATOM     86  CD  ARG     6     -10.953  -2.295  -0.177  0.0486  1.7000
ATOM     87  HD2 ARG     6     -10.985  -1.204  -0.114  0.0687  1.3000
ATOM     88  HD3 ARG     6     -10.524  -2.559  -1.143  0.0687  1.3000
ATOM     89  NE  ARG     6     -12.315  -2.833  -0.075 -0.5295  1.5500
ATOM     90  HE  ARG     6     -13.010  -2.260   0.390  0.3456  1.3000
ATOM     91  CZ  ARG     6     -12.694  -4.065  -0.346  0.8076  1.7000
ATOM     92  NH1 ARG     6     -13.877  -4.453   0.009 -0.8627  1.5500
ATOM     93 1HH1 ARG     6     -14.040  -5.443   0.120  0.4478  1.3000
ATOM     94 2HH1 ARG     6     -14.432  -3.807   0.560  0.4478  1.3000
ATOM     95  NH2 ARG     6     -11.920  -4.944  -0.916 -0.8627  1.5500
ATOM     96 1HH2 ARG     6     -12.283  -5.850  -1.134  0.4478  1.3000
ATOM     97 2HH2 ARG     6     -10.979  -4.672  -1.190  0.4478  1.3000
ATOM     98  C   ARG     6      -8.062  -4.673  -0.777  0.7341  1.7000
ATOM     99  O   ARG     6      -9.133  -4.539  -1.331 -0.5894  1.5000
......
ATOM    172  N   S1P    12     -14.038  -6.148   4.609 -0.4157  1.5500
ATOM    173  H   S1P    12     -13.159  -6.030   4.131  0.2719  1.3000
ATOM    174  CA  S1P    12     -14.531  -4.998   5.067  1.8687  1.7000
ATOM    175  HA  S1P    12     -15.438  -5.137   5.650 -0.3423  1.3000
ATOM    176  CB  S1P    12     -13.509  -4.203   5.988 -0.2136  1.7000
ATOM    177  2HB S1P    12     -14.138  -4.223   6.879  0.0612  1.3000
ATOM    178  3HB S1P    12     -12.848  -5.033   6.243  0.0612  1.3000
ATOM    179  OG  S1P    12     -12.482  -3.545   5.746 -0.5829  1.5000
ATOM    180  P   S1P    12     -11.815  -3.403   4.409  1.4551  1.8500
ATOM    181  O3P S1P    12     -10.346  -3.231   4.357 -0.8194  1.5000
ATOM    182  O1P S1P    12     -11.897  -4.712   3.815 -0.9362  1.5000
ATOM    183  H1P S1P    12     -10.980  -4.847   3.540  0.5898  0.8000
ATOM    184  O2P S1P    12     -12.533  -2.620   3.418 -0.7440  1.5000
ATOM    185  H2P S1P    12     -13.423  -2.977   3.402  0.5973  0.8000
ATOM    186  C   S1P    12     -14.940  -4.030   3.844 -0.6568  1.7000
ATOM    187  O   S1P    12     -14.809  -4.386   2.691 -0.1942  1.5000

What i want to do is I would like to find the distance between two atoms within a file.

the two atoms that i am interested is

ATOM     91  CZ  ARG     6     -12.694  -4.065  -0.346  0.8076  1.7000

and

ATOM    180  P   S1P    12     -11.815  -3.403   4.409  1.4551  1.8500

The x y z coordinate in the line for example is -11.815 -3.403 4.409

I looked around but only scripts that I could find were calculating distance for all the atoms or between two different files.

Thank you in advanced.

Was it helpful?

Solution

You can start from here:

 awk '$2=="91"{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
      $2=="180" {x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
      END{print sqrt((x1-x2)^2 + (y1-y2)^2 + (z1-z2)^2)}' file    # calculate the distance.

4.88067

Not sure why ^ is not supported in GAWK 3.1.5, how about this?

 awk '$2=="91"{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
      $2=="180" {x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
      END{print sqrt((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2) + (z1-z2)*(z1-z2))}' file    # calculate the distance.

OTHER TIPS

This is a complete script that incorporates the suggested awk command. Change the pdb file to your below and place this in the same directory as the pdb file.

#!/usr/bin/env bash

echo " "
echo "Use one letter abbreviations. Case doesn't matter." 
echo "Example: A 17 CA or n 162 cg"

echo " - - - - - - - - - - - - - - - - - -"
#------------First Selection------------

read -e -p "What first atom? " sel1
echo " "

# echo $sel1
sel1caps=${sel1^^}
# echo "sel1caps="$sel1caps

arr1=($sel1caps)
# echo "arr1[0]= "${arr1[0]}
# echo "arr1[1]= "${arr1[1]}
# echo "arr1[2]= "${arr1[2]}

#To convert one to three letters

if [ ${arr1[0]} = A ] ; then
    SF1=ALA
elif [ ${arr1[0]} = H ] ; then
    SF1=HIS
elif [ ${arr1[0]} = R ] ; then
    SF1=ARG
elif [ ${arr1[0]} = K ] ; then
    SF1=LYS
elif [ ${arr1[0]} = I ] ; then
    SF1=ILE
elif [ ${arr1[0]} = F ] ; then
    SF1=PHE 
elif [ ${arr1[0]} = L ] ; then
    SF1=LEU
elif [ ${arr1[0]} = W ] ; then
    SF1=TRP
elif [ ${arr1[0]} = M ] ; then
    SF1=MET
elif [ ${arr1[0]} = P ] ; then
    SF1=PRO 
elif [ ${arr1[0]} = C ] ; then
    SF1=CYS 
elif [ ${arr1[0]} = N ] ; then
    SF1=ASN
elif [ ${arr1[0]} = V ] ; then
    SF1=VAL
elif [ ${arr1[0]} = G ] ; then
    SF1=GLY 
elif [ ${arr1[0]} = S ] ; then
    SF1=SER 
elif [ ${arr1[0]} = Q ] ; then
    SF1=GLN 
elif [ ${arr1[0]} = Y ] ; then
    SF1=TYR 
elif [ ${arr1[0]} = D ] ; then
    SF1=ASP
elif [ ${arr1[0]} = E ] ; then
    SF1=GLU 
elif [ ${arr1[0]} = T ] ; then
    SF1=THR 
else
    echo "use one letter codes"
    echo "exiting"
    exit
fi

# echo "SF1 ="$SF1

#If there is nothing printing for line 1, check the expression for your pdb file. The spaces may need adjustment at the end.
line1=$(grep -E "${arr1[2]} *?${SF1}(.*?) ${arr1[1]}     " 1A23.pdb)
# echo $line1

ar_l1=($line1)
# echo "ar_l1[1]="${ar_l1[1]}

echo " - - - - - - - - - - - - - - - - - -"
#------------Second Selection------------

read -e -p "What second atom? " sel2
echo " "

# echo $sel2

sel2caps=${sel2^^}
# echo "sel2caps="$sel2caps

arr2=($sel2caps)
# echo "arr2[0]= "${arr2[0]}
# echo "arr2[1]= "${arr2[1]}
# echo "arr2[2]= "${arr2[2]}

#To convert one to three letters

if [ ${arr2[0]} = A ] ; then
    SF2=ALA
elif [ ${arr2[0]} = H ] ; then
    SF2=HIS
elif [ ${arr2[0]} = R ] ; then
    SF2=ARG
elif [ ${arr2[0]} = K ] ; then
    SF2=LYS
elif [ ${arr2[0]} = I ] ; then
    SF2=ILE
elif [ ${arr2[0]} = F ] ; then
    SF2=PHE 
elif [ ${arr2[0]} = L ] ; then
    SF2=LEU
elif [ ${arr2[0]} = W ] ; then
    SF2=TRP
elif [ ${arr2[0]} = M ] ; then
    SF2=MET
elif [ ${arr2[0]} = P ] ; then
    SF2=PRO 
elif [ ${arr2[0]} = C ] ; then
    SF2=CYS 
elif [ ${arr2[0]} = N ] ; then
    SF2=ASN
elif [ ${arr2[0]} = V ] ; then
    SF2=VAL
elif [ ${arr2[0]} = G ] ; then
    SF2=GLY 
elif [ ${arr2[0]} = S ] ; then
    SF2=SER 
elif [ ${arr2[0]} = Q ] ; then
    SF2=GLN 
elif [ ${arr2[0]} = Y ] ; then
    SF2=TYR 
elif [ ${arr2[0]} = D ] ; then
    SF2=ASP
elif [ ${arr2[0]} = E ] ; then
    SF2=GLU 
elif [ ${arr2[0]} = T ] ; then
    SF2=THR 
else
    echo "use one letter codes"
    echo "exiting"
    exit
fi

# echo "SF2 ="$SF2


line2=$(grep -E "${arr2[2]} *?${SF2}(.*?) ${arr2[1]}     " 1A23.pdb)
# echo $line2

ar_l2=($line2)
# echo "ar_l2[1]="${ar_l2[1]}
# echo "ar_l2[1]="${ar_l2[1]}

atom1=${ar_l1[1]}
atom2=${ar_l2[1]}
echo " "
echo "==========================="
# 6, 7, 8 are column numbers in the pdb file. 
# If there are multiple molecules it should be 7, 8, 9.
awk '$2=='$atom1'{x1=$6;y1=$7;z1=$8}                                 # get the ATOM 1
     $2=='$atom2'{x2=$6;y2=$7;z2=$8}                               # get the ATOM 2
     END{print sqrt((x1-x2)*(x1-x2) + (y1-y2)*(y1-y2) + (z1-z2)*(z1-z2))}' 1A23.pdb    # calculate the distance.

echo "Angstroms"
echo "==========================="
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top