functor with nested calls to CUDA::thrust functors operating as zip_iterator

Question

There were a number of problems with your code. I'm not sure I will capture all of them here, but:

MASS was undefined.
Your p_dot and q_dot functors needed additional __device__ decorations
Your usage of the p and q variables inside the euler functor did not make any sense; they are not defined anywhere, nor is this the correct way to return values into P and Q vectors, if that was your intent.
We don't return data through the variable passed to the functor at instantiation. Therefore to return the er variable at each timestep, I create a separate vector (ERP and ERQ) to do so.

Here is a modified code which has the above issues and various other issues fixed. It seems to return sensible results, although I've not checked the arithmetic in detail.

#include <thrust/device_vector.h>
#include <thrust/transform.h>
#include <thrust/sequence.h>
#include <thrust/copy.h>
#include <thrust/fill.h>
#include <thrust/replace.h>
#include <thrust/functional.h>
#include <thrust/for_each.h>
#include <thrust/iterator/zip_iterator.h>

#include <iostream>
#include <math.h>

#define MASS 1.0f

__host__ __device__ float f1(float x)
{
  return sinf(x);
}

__host__ __device__ float f2(float x)
{
  return cosf(x);
}

__host__ __device__ float Vx(float x)
{
  return sinf(x);
}

struct q_dot
{
  float x;
  float delta;
  __host__ __device__
  q_dot(float _x,float _delta): x(_x),delta(_delta){};
  template <typename Tuple>
  __host__ __device__
  float operator()(Tuple t)
  {
    float p = thrust::get<1>(t) + delta;
    return  p/MASS;
  }
};


struct p_dot
{
  float x;
  float delta;
  __host__ __device__
  p_dot(float _x,float _delta): x(_x),delta(_delta){};
  template <typename Tuple>
  __host__ __device__
  float operator()(Tuple t)
  {
    float q = thrust::get<0>(t) +   delta;
    return  -Vx(q);
  }
};



struct euler_functor
{
  unsigned fn;
  float h;
  float x0;

  euler_functor(unsigned _fn,float _x0,float _h) : fn(_fn),h(_h),x0(_x0) {};
  template <typename Tuple>
  __host__ __device__
  void operator()(const Tuple &t) {
    // if (fn == 1) y = h*f1(y);
    //else if (fn == 2) y = h*f2(y); 
    float t0, t1, t2, t3;
    t0 =  h*p_dot(x0,h/2.0f)(t);
    t1 =  h*q_dot(x0,h/2.0f)(t);
    t2=0.5*h*p_dot(x0,h/2.0f)(t);
    t3=0.5*h*q_dot(x0,h/2.0f)(t);
    thrust::get<0>(t) = t0;
    thrust::get<1>(t) = t1;
    thrust::get<2>(t) = t2;
    thrust::get<3>(t) = t3;

  }
};


int main(void)
{
  float t=0;
  float t_step=0.1;


  const unsigned N = 8;
  // allocate three device_vectors with 10 elements
  thrust::device_vector<float> Q(N),P(N), ERP(N), ERQ(N);
  // initilaize to some values
  thrust::sequence(Q.begin(), Q.end(),  0.0f, (float)(6.283/(float)N));
  // initilaize to some values
  thrust::sequence(P.begin(), P.end(),  0.0f, (float)(10.283/(float)N));
  for(int i = 0; i < N; i++) std::cout<< Q[i]<<" "<<P[i]<< " "<< ERP[i] << " " << ERQ[i] << std::endl;
  std::cout<< "*****" << std::endl;
  // apply euler for each element of Q and P
  //thrust::for_each(X.begin(),X.end(),euler_functor(1,t,t_step,error)); this becomes:
  thrust::for_each(thrust::make_zip_iterator(thrust::make_tuple(Q.begin(),P.begin(),ERP.begin(), ERQ.begin())),thrust::make_zip_iterator(thrust::make_tuple(Q.end(),P.end(),ERP.end(), ERQ.end())),euler_functor(1,t,t_step));
  // print the values
  for(int i = 0; i < N; i++) std::cout<< Q[i]<<" "<<P[i]<< " "<< ERP[i] << " " << ERQ[i] << std::endl;
}