Question

I run into a strange issue on Mac OS X 10.8 with XCode Version 4.6.3 (4H1503) and clang: Apple LLVM version 4.2 (clang-425.0.28) (based on LLVM 3.2svn) Target: x86_64-apple-darwin12.4.0 Thread model: posix

I created a very basic thread_pool class and wanted to stress test it.

    struct last_item_exception {};

    template<class Task>
    struct thread_reclaimer
    {

      explicit
        thread_reclaimer(impl::basic_task_queue<Task>& q)noexcept
          : q_(q)
      {}

      void operator()()
      {
        try
        {
          while(true)
          {
            using namespace boost;
            q_.wait_and_pop()();
          }
        }
        catch(last_item_exception)
        {
          TRACE_LOG << "caught last item exception";
        }
      }

    private:
      impl::basic_task_queue<Task>& q_;
    };



    template<class Task>
    thread_reclaimer<Task> make_thread_reclaimer
      (impl::basic_task_queue<Task>& queue)noexcept
    {
      return std::move(thread_reclaimer<Task>(queue));
    }



  class thread_pool : public boost::noncopyable
  {
    typedef std::function<void()> task_type;

  public:
    explicit thread_pool(size_t number_of_threads)
      : threads_created_()
      , pool_()
      , queue_()
      , done_()
    {
      try
      {
        TRACE_LOG << "thread_pool: creating " <<number_of_threads << " threads";

        while(threads_created_<number_of_threads)
        {
          ++threads_created_;
          pool_.create_thread(make_thread_reclaimer(queue_));
          TRACE_LOG << "thread_pool: created thread number: "
                    << threads_created_ 
          ;
        }
        TRACE_LOG << "thread_pool: all threads started";
      }
      catch(...)
      {
        TRACE_LOG << "thread_pool: exception occured";
        finish_and_join_all();
        throw;
      }
      TRACE_LOG << "thread_pool: constructor finished";
    }

    ~thread_pool()
    {
      finish_and_join_all();
    }

    bool enqueue(task_type t)
    {
      if(done_) return false;
      queue_.push(t, done_);
      return !done_; //if done was set inbetween, no push occured!
    }

    void finish_and_join_all()
    {
      TRACE_LOG << "entered: finish_and_join_all & done is: " << done_;
      if(done_) return;
      done_ = true;

      std::vector<task_type> cancellation_tasks
        ( threads_created_
        , []
          {
            TRACE_LOG << "throwing last item exception";
            throw impl::last_item_exception();
          }
        )
      ;

      TRACE_LOG << "pushing last item to the queue";
      // atomically pushes all cancellation tasks
      queue_.push(cancellation_tasks.begin(), cancellation_tasks.end());

      threads_created_ = 0;

      TRACE_LOG << "waiting for all threads to finish";
      pool_.join_all();
    }

  private:
    size_t threads_created_;
    boost::thread_group pool_;
    //thread safe producer consumer queue, which uses condition variables for sync
    impl::basic_task_queue<std::function<void()>> queue_;
    std::atomic<bool> done_;
  };

My test case creates and destroys a thread_pool object with 1 thread, i.e.

for(size_t i = 0; i<100; ++i)
    thread_pool pool(1);

On second loop iteration test case fails because done_ upon entering first time finish_and_join_all() is true. Changing std::atomic<bool> type to volatile bool fixes the problem.

The output of the test is below (which causes a condition_variable) failure, because destructor did not wait for all threads to finish:

Entering test case "ctor_stress_test"
Assertion failed: (!ret), function ~condition_variable, file /usr/local/include/boost/thread/pthread/condition_variable_fwd.hpp, line 86.
[2013-07-03 22:17:32.462265] [0x76f73180] [trace]   thread_pool: creating 1 threads
[2013-07-03 22:17:32.462303] [0x76f73180] [trace]   thread_pool: created thread number: 1
[2013-07-03 22:17:32.462312] [0x76f73180] [trace]   thread_pool: all threads started
[2013-07-03 22:17:32.462320] [0x76f73180] [trace]   thread_pool: constructor finished
[2013-07-03 22:17:32.462326] [0x76f73180] [trace]   entered: finish_and_join_all & done is: false
[2013-07-03 22:17:32.462332] [0x76f73180] [trace]   pushing last item to the queue
[2013-07-03 22:17:32.462357] [0x76f73180] [trace]   waiting for all threads to finish
[2013-07-03 22:17:32.462383] [0x10d9e000] [trace]   throwing last item exception
[2013-07-03 22:17:32.462404] [0x10d9e000] [trace]   caught last item exception
[2013-07-03 22:17:32.462434] [0x76f73180] [trace]   executing ~basic_task_queue
[2013-07-03 22:17:32.462447] [0x76f73180] [trace]   thread_pool: creating 1 threads
[2013-07-03 22:17:32.462474] [0x76f73180] [trace]   thread_pool: created thread number: 1
[2013-07-03 22:17:32.462480] [0x76f73180] [trace]   thread_pool: all threads started
[2013-07-03 22:17:32.462485] [0x76f73180] [trace]   thread_pool: constructor finished
[2013-07-03 22:17:32.462490] [0x76f73180] [trace]   entered: finish_and_join_all & done is: true
[2013-07-03 22:17:32.462495] [0x76f73180] [trace]   executing ~basic_task_queue
unknown location:0: fatal error in "ctor_stress_test": signal: SIGABRT (application abort requested)
Leaving test case "ctor_stress_test"; testing time: 448mks

Is this an intended behaviour of std::atomic?

Was it helpful?

Solution

Initialization of an atomic object is not an atomic operation and, moreover, default initialization leaves it in uninitialized state (per §29.6.5[atomics.types.operations.req]/4).

Set it to false using an atomic, or at least well-defined operation in the beginning of the constructor.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top