reading your comment: " Also, I am wondering if for example process 3 is faster than process 2, if i can use the same way but as soon rank 1 finishes with the routine to notify rank 3 to run the routine and then rank 3 to notify rank 2. Is there any automatic way of this? to know which rank waits before the post-processing step longer?"
This can be addressed exactly by letting all the I/O be performed on process with irank==0 and using buffered sends.
In this case you don't want to let the processes wait, no barriers here, but you want to let them dispatch their result as soon as it's ready, and then continue calculating. When it's time for process 0, it will receive all the buffered data and write them, then it write its own data. You can try to use standard MPI_SEND (it's buffered up to a prefixed size), but the best way is to use MPI_BSEND and attach a correctly sized buffer with MPI_BUFFER_ATTACH(). Something like this:
subroutine post_process(i, var1, var2, irank)
integer:: i, irank
real*8:: var1, var2
integer:: ir
real*8:: var1r, var2r
character buffer(100)
integer ipos
boolean flag
if (irank .gt. 0) then
ipos = 0
call MPI_PACK(i, 1, MPI_INTEGER, buffer, 100, ipos, MPI_COMM_WORLD, ierr)
call MPI_PACK(var1, 1, MPI_REAL8, buffer, 100, ipos, MPI_COMM_WORLD, ierr)
call MPI_PACK(var2, 1, MPI_REAL8, buffer, 100, ipos, MPI_COMM_WORLD, ierr)
call MPI_BSend( buffer, ipos, MPI_PACKED, 0, 0, MPI_COMM_WORLD, ierr)
else
do
call MPI_IPROBE(MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, flag, MPI_STATUS_IGNORE, ierr)
if (flag .eq. false) exit
call MPI_RECV(buffer, 100, MPI_PACKED, MPI_ANY_SOURCE, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr)
ipos = 0
call MPI_UNPACK(buffer, 100, ipos, ir, 1, MPI_INTEGER, MPI_COMM_WORLD, ierr)
call MPI_UNPACK(buffer, 100, ipos, var1r, 1, MPI_REAL8, MPI_COMM_WORLD, ierr)
call MPI_UNPACK(buffer, 100, ipos, var2r, 1, MPI_REAL8, MPI_COMM_WORLD, ierr)
write(111,*) ir, var1r, var2r
enddo
write(111,*) i, var1, var2
end if
end subroutine post_process