doxygen/html/thread__pool__workers_8hpp_source.html

#ifndef LIBJMMCG_CORE_THREAD_POOL_WORKERS_HPP

#define LIBJMMCG_CORE_THREAD_POOL_WORKERS_HPP

/******************************************************************************

** Copyright © 2004 by J.M.McGuiness, coder@hussar.me.uk

**

** This library is free software; you can redistribute it and/or

** modify it under the terms of the GNU Lesser General Public

** License as published by the Free Software Foundation; either

** version 2.1 of the License, or (at your option) any later version.

**

** This library is distributed in the hope that it will be useful,

** but WITHOUT ANY WARRANTY; without even the implied warranty of

** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

** Lesser General Public License for more details.

**

** You should have received a copy of the GNU Lesser General Public

** License along with this library; if not, write to the Free Software

** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

*/


#include "private_/fixed_threads_container.hpp"

#include "private_/thread_pool_queue_model.hpp"

#include "private_/pool_thread.hpp"


namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE { namespace ppd {


   namespace private_ {


      template<class S>

      class wrkr_accumulate_across_threads {

      public:

         using statistics_type=S;

         using result_type=typename statistics_type::ave_stats_type;


      private:

         struct agg_vert_stats {

            template<class V>

            result_type __fastcall FORCE_INLINE

            operator()(result_type acc, V const &v) const noexcept(true) {

               return acc.update(v.statistics().total_vertical_work().total());

            }

         };

         struct agg_hrz_stats {

            template<class V>

            result_type __fastcall FORCE_INLINE

            operator()(result_type acc, V const &v) const noexcept(true) {

               return acc.update(v.statistics().total_hrz_work().total());

            }

         };


      public:

         template<class P>

         static result_type __fastcall FORCE_INLINE

         vertical_work(P const &pool) noexcept(true) {

            return std::accumulate(

               pool.colln().begin(),

               pool.colln().end(),

               result_type(),

               agg_vert_stats()

            );

         }

         template<class P>

         static result_type __fastcall FORCE_INLINE

         hrz_work(P const &pool) noexcept(true) {

            return std::accumulate(

               pool.colln().begin(),

               pool.colln().end(),

               result_type(),

               agg_hrz_stats()

            );

         }

      };

      template<class T>

      class wrkr_accumulate_across_threads<no_statistics<T>> {

      public:

         using statistics_type=no_statistics<T>;

         using result_type=typename statistics_type::ave_stats_type;


         template<class P>

         static constexpr result_type __fastcall FORCE_INLINE

         vertical_work(P const &) noexcept(true) {

            return result_type();

         }

         template<class P>

         static constexpr result_type __fastcall FORCE_INLINE

         hrz_work(P const &) noexcept(true) {

            return result_type();

         }

      };


   }


   /// This pool has a specified size, and the worker pool_threads steal work from a centrally-held signalled_work_queue.

   /**

      To reduce calls to the global operator new, and enhance scalability, the parallel algorithms (for_each(), transform(), etc, except merge() and sort()) internally pre-allocate a contiguous buffer of memory for their internal operations. This buffer grows O(p.log(p)) for p processors, with a constant that is ~200 bytes, so for millions of processors, megabytes could be allocated per parallel algorithm. This buffer is guaranteed to be released after all mutations on the elements within the collection are complete, which is usually before any waiting execution_context, constructed by the call to the parallel algorithm, would be released, but is not guaranteed. For each parallel algorithm (except merge() and sort()) exactly 2 calls to the global operator new are made by the library. For merge() the internal buffer grows as O(p.log(p).log(n)) with O(log(n)) calls to the global operator new, and for sort(), O(p.log(p).log(n)^2)) and O(log(n)^2) respectively.

   */

   template<

      class PTT

   >

   class thread_pool<pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>, pool_traits::size_mode_t::fixed_size, PTT> final

   : public private_::thread_pool_queue_model<

      pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>,

      pool_traits::size_mode_t::fixed_size,

      PTT,

      private_::fixed_pool_of_threads<

         PTT,

         pool::private_::thread_types::steal<

            PTT::result_traits_,

            typename PTT::os_traits,

            PTT,

            pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>::queue_model

         >

      >

   > {

   public:

      using base_t=private_::thread_pool_queue_model<

         pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>,

         pool_traits::size_mode_t::fixed_size,

         PTT,

         private_::fixed_pool_of_threads<

            PTT,

            pool::private_::thread_types::steal<

               PTT::result_traits_,

               typename PTT::os_traits,

               PTT,

               pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>::queue_model

            >

         >

      >;

      using pool_traits_type=typename base_t::pool_traits_type;

      using os_traits=typename base_t::os_traits;

      using thread_traits=typename base_t::thread_traits;

      using api_params_type=typename base_t::api_params_type;

      using pool_type=typename base_t::pool_type;

      using statistics_type=typename base_t::statistics_type;

      using work_distribution_mode=typename base_t::work_distribution_mode;

      using signalled_work_queue_type=typename base_t::signalled_work_queue_type;

      using GSSk_batching_type=typename base_t::GSSk_batching_type;


      BOOST_MPL_ASSERT((std::is_same<typename std::is_same<typename PTT::os_traits::thread_traits::model_type, sequential_mode>::type, std::false_type>));


      enum class erase_states {

         failed_to_erase,

         ignoring_result,

         erased_successfully

      };


      /// Create the thread pool.

      /**

         \param num_threads   The number of threads in the pool, which must be greater than zero.

      */

      explicit __stdcall thread_pool(const typename base_t::pool_type::size_type num_threads) noexcept(false) FORCE_INLINE

      : base_t(num_threads, num_threads), batch_details(os_traits::thread_traits::get_current_thread()) {

         assert(this->max_num_threads_in_pool>0);

         if (!this->max_num_threads_in_pool) {

            throw typename base_t::exception_type(

               _T("Cannot have an empty thread pool."),

               info::function(

                  __LINE__,

                  __PRETTY_FUNCTION__,

                  typeid(*this),

                  info::function::argument(

                     _T("const typename pool_traits_type::pool_type::size_type max_num_threads"),

                     tostring(num_threads)

                  )

               ),

               JMMCG_REVISION_HDR(_T(LIBJMMCG_VERSION_NUMBER))

            );

         }

      }


      /**

         The destruction of the collection of threads is sequential, but the threads themselves can exit in parallel, thus speeding up the clean-up of the pool.

      */

      __stdcall ~thread_pool() noexcept(false) FORCE_INLINE {

         exit();

      }


      /// Erase the specified, queued work.

      /**

         Note that if the work has started processing, it will not be erased.


         \param ec   The execution context of the work to erase.

         \return  The outcome of the erase request, which may be successful, or failed because the work may be being processed.


         \see erase_states

      */

      template<typename ExecT>

      erase_states __fastcall FORCE_INLINE

      erase(ExecT &ec) noexcept(false) {

         erase_states ret=erase_states::failed_to_erase;

         if (!ec.erase()) {

            // i.e. we won't wait forever for a result from work that has been erased. (Although we may discard a calculated result. If we can't erase the work from the execution context, then wherever that work is, allow it to be processed to avoid deadlocking that waiting client.)

            ret=(this->signalled_work_queue.erase(ec.wk_queue_item()) ? erase_states::erased_successfully : erase_states::ignoring_result);

         }

         return ret;

      }


      /// Obtain access to any statistics data collected by the operation of the thread_pool.

      /**

         Algorithmic complexity when specialised with no_statistics: constant time, otherwise O(pool_size()).

         Note that the value computed for the statistics_type::total_vertical_work() is guaranteed to be accurate. The value computed for the statistics_type::total_hrz_work() is guaranteed not be more than the value as if it were computed atomically. Therefore the following holds:

         statistics_type::total_work_added()>=statistics_type::total_vertical_work()+statistics_type::total_hrz_work()

      */

      statistics_type const __fastcall statistics() const noexcept(true) override FORCE_INLINE {

         using acc_t=private_::wrkr_accumulate_across_threads<statistics_type>;


         statistics_type stats(batch_details.statistics());

         stats.add_vertical_work(acc_t::vertical_work(this->pool));

         stats.add_hrz_work(acc_t::hrz_work(this->pool));

         return stats;

      }


      void exit() noexcept(false) {

         this->exit_requested().set(pool_traits_type::template exit_requested_type<typename work_distribution_mode::queue_model>::states::exit_requested);

         // The destruction of the collection of threads is sequential, but the threads themselves can exit in parallel, thus speeding up the clean-up of the pool.

         // The natural object-destruction order causes the threads in the pool to be destroyed too late, so the pool must be emptied now.

         this->pool.clear();

         // We must empty the queue after deleting the threads, because of the tricky way the pool_threads steal work from the signalled_work_queue can cause the pool_threads to crash is the queue is emptied whilst the threads are stealing work. Basically the pool_threads steal work atomically with respect to each other, but not this clear() method.)

         this->signalled_work_queue.clear();

      }


   private:

      /// This is the batch that the main thread will process.

      GSSk_batching_type batch_details;


      statistics_type &__fastcall set_statistics() noexcept(true) override FORCE_INLINE {

         return batch_details.statistics();

      }


      bool __fastcall add_work_to_batch(const typename thread_traits::api_params_type::tid_type tid, typename signalled_work_queue_type::value_type &&wk) noexcept(true) override FORCE_INLINE {

         return batch_details.add_work_to_batch(this->pool, tid, std::forward<typename signalled_work_queue_type::value_type>(wk));

      }


      /**

         Try to add the new work to this thread's batch, if empty, to avoid locking the main queue in the pool. This is very important: it helps maintain throughput of work, by avoiding having to place work on the shared signalled_work_queue in the thread_pool, which involves locks and signals, as placing the work directly in the pool_thread's batch can be done lock-free.

      */

      void __fastcall add_nonjoinable_work(typename signalled_work_queue_type::value_type &&wk) noexcept(false) override FORCE_INLINE {

// TODO Stops "overlapped_write_held_by_reads" working:        if (!this->add_work_to_batch(os_traits::thread_traits::get_current_thread(), wk)) {

            thread_traits::sleep(0);   // This sleep seems to be vital to ensure that all threads manage to remove work from the queue, ensuring load distribution.

            this->signalled_work_queue.push_back(std::forward<typename signalled_work_queue_type::value_type>(wk));

            // Try to allow the pool_threads a chance to process the work before this horizontal thread is created that might compete with the pool_threads for the work.

            os_traits::thread_traits::sleep(0);

            batch_details.statistics().update_max_queue_len(this->queue_size());

//       }

         batch_details.statistics().added_work();

      }


      /**

         Try to add the new work to this thread's batch, if empty, to avoid locking the main queue in the pool. This is very important: it helps maintain throughput of work, by avoiding having to place work on the shared signalled_work_queue in the thread_pool, which involves locks and signals, as placing the work directly in the pool_thread's batch can be done lock-free.

      */

      typename signalled_work_queue_type::value_type __fastcall add_joinable_work(typename signalled_work_queue_type::value_type &&wk) noexcept(false) override FORCE_INLINE {

// TODO Stops "overlapped_write_held_by_reads" working:        if (!this->add_work_to_batch(os_traits::thread_traits::get_current_thread(), wk)) {

            thread_traits::sleep(0);   // This sleep seems to be vital to ensure that all threads manage to remove work from the queue, ensuring load distribution.

            this->signalled_work_queue.push_back(std::forward<typename signalled_work_queue_type::value_type>(wk));

            // Try to allow the pool_threads a chance to process the work before this horizontal thread is created that might compete with the pool_threads for the work.

            os_traits::thread_traits::sleep(0);

            batch_details.statistics().update_max_queue_len(this->queue_size());

//       }

         batch_details.statistics().added_work();

         return std::move(wk);

      }


      bool __fastcall process_a_batch_item(const typename thread_traits::api_params_type::tid_type tid, typename os_traits::thread_exception const &ex) noexcept(false) override FORCE_INLINE {

         return batch_details.process_a_batch_item(this->pool, tid, ex);

      }

   };


   /// This pool has a maximum specified size to which it will grow and reduce from, and the worker pool_threads steal work from a centrally-held signalled_work_queue.

   /**

      The internal signalled_work_queue is reasonably efficiently implemented: if there is enough work in the signalled_work_queue, the addition of the input_work to it can occur independently of the removal of the input_work by

      the worker pool_threads.

   */

   template<

      class PTT

   >

   class thread_pool<pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>, pool_traits::size_mode_t::tracks_to_max, PTT>

   : public private_::thread_pool_queue_model<

      pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>,

      pool_traits::size_mode_t::tracks_to_max,

      PTT,

      private_::fixed_pool_of_threads<

         PTT,

         pool::private_::thread_types::steal<

            PTT::result_traits_,

            typename PTT::os_traits,

            PTT,

            pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>::queue_model

         >

      >

   > {

   public:

      using base_t=private_::thread_pool_queue_model<

         pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>,

         pool_traits::size_mode_t::tracks_to_max,

         PTT,

         private_::fixed_pool_of_threads<

            PTT,

            pool::private_::thread_types::steal<

               PTT::result_traits_,

               typename PTT::os_traits,

               PTT,

               pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>::queue_model

            >

         >

      >;

      using pool_traits_type=typename base_t::pool_traits_type;

      using os_traits=typename base_t::os_traits;

      using thread_traits=typename base_t::thread_traits;

      using api_params_type=typename base_t::api_params_type;

      using pool_type=typename base_t::pool_type;

      using GSSk_batching_type=typename base_t::GSSk_batching_type;

      using statistics_type=typename base_t::statistics_type;

      using work_distribution_mode=typename base_t::work_distribution_mode;

      using signalled_work_queue_type=typename base_t::signalled_work_queue_type;


      BOOST_MPL_ASSERT((std::is_same<typename std::is_same<typename PTT::os_traits::thread_traits::model_type, sequential_mode>::type, std::false_type>));


      enum class erase_states {

         failed_to_erase,

         ignoring_result,

         erased_successfully

      };


      /// Create the thread pool.

      /**

         \todo JMG: Need to complete this... What's the thread creation policy????


         \param num_threads   The number of threads in the pool, which must be greater than zero.

      */

/*

      explicit __stdcall thread_pool(const typename pool_traits_type::pool_type::size_type num_threads) noexcept(false) FORCE_INLINE

      : base_t(num_threads,num_threads) {

         assert(this->max_num_threads_in_pool>0);

         if (!this->max_num_threads_in_pool) {

            throw typename base_t::exception(

               _T("Cannot have an empty thread pool."),

               info::function(

                  __LINE__,

                  __PRETTY_FUNCTION__,

                  typeid(*this),

                  info::function::argument(

                     _T("const typename pool_traits_type::pool_type::size_type max_num_threads"),

                     tostring(num_threads)

                  )

               ),

               JMMCG_REVISION_HDR(_T(LIBJMMCG_VERSION_NUMBER))

            );

         }

      }

*/

      thread_pool(thread_pool const &)=delete;


      /**

         The destruction of the collection of threads is sequential, but the threads themselves can exit in parallel, thus speeding up the clean-up of the pool.

      */

      __stdcall ~thread_pool() noexcept(false) FORCE_INLINE {

         exit();

      }


      /// Erase the specified, queued work.

      /**

         Note that if the work has started processing, it will not be erased.


         \param ec   The execution context of the work to erase.

         \return  The outcome of the erase request, which may be successful, or failed because the work may be being processed.


         \see erase_states

      */

      template<typename ExecT_>

      erase_states __fastcall FORCE_INLINE

      erase(ExecT_ &ec)  {

         erase_states ret=erase_states::failed_to_erase;

         if (!ec.erase()) {

            // i.e. we won't wait forever for a result from work that has been erased. (Although we may discard a calculated result. If we can't erase the work from the execution context, then wherever that work is, allow it to be processed to avoid deadlocking that waiting client.)

            ret=(this->signalled_work_queue.erase(ec.wk_queue_item()) ? erase_states::erased_successfully : erase_states::ignoring_result);

         }

         return ret;

      }


      /// Obtain access to any statistics data collected by the operation of the thread_pool.

      /**

         Algorithmic complexity: O(pool_size())

         Note that the value computed for the statistics_type::total_vertical_work() is guaranteed to be accurate. The value computed for the statistics_type::total_hrz_work() is guaranteed not be more than the value as if it were computed atomically. Therefore the following holds:

         statistics_type::total_work_added()>=statistics_type::total_vertical_work()+statistics_type::total_hrz_work()

      */

      statistics_type const __fastcall statistics() const noexcept(true) FORCE_INLINE {

         using acc_t=private_::wrkr_accumulate_across_threads<statistics_type>;


         statistics_type stats(batch_details.statistics());

         stats.processed_vertical_work(acc_t::vertical_work(this->pool));

         stats.processed_hrz_work(acc_t::hrz_work(this->pool));

         return stats;

      }


      void exit() noexcept(false) {

         this->signalled_work_queue.clear();

         this->exit_requested().set(pool_traits_type::template exit_requested_type<typename work_distribution_mode::queue_model>::states::exit_requested);

         // The natural object-destruction order causes the threads in the pool to be destroyed too late, so the pool must be emptied now.

         this->pool.clear();

      }


   private:

      /// This is the batch that the main thread will process.

      GSSk_batching_type batch_details;


      statistics_type &__fastcall set_statistics() noexcept(true) FORCE_INLINE {

         return batch_details.statistics();

      }

/*

      void __fastcall add_nonjoinable_work(typename signalled_work_queue_type::value_type &&wk) FORCE_INLINE {

         thread_traits::sleep(0);   // This sleep seems to be vital to ensure that all threads manage to remove work from the queue, ensuring load distribiution.

         this->signalled_work_queue.push_front(std::forward<typename signalled_work_queue_type::value_type>(wk));

         // Try to allow the pool_threads a chance to process the work before this horizontal thread is created that might compete with the pool_threads for the work.

         os_traits::thread_traits::sleep(0);

         batch_details.statistics().added_work();

// TODO JMG: Need to make the threads get the work according to our type...

      }


      typename signalled_work_queue_type::value_type __fastcall add_joinable_work(typename signalled_work_queue_type::value_type &&wk) FORCE_INLINE {

         thread_traits::sleep(0);   // This sleep seems to be vital to ensure that all threads manage to remove work from the queue, ensuring load distribiution.

         this->signalled_work_queue.push_front(std::forward<typename signalled_work_queue_type::value_type>(wk));

         // Try to allow the pool_threads a chance to process the work before this horizontal thread is created that might compete with the pool_threads for the work.

         os_traits::thread_traits::sleep(0);

         batch_details.statistics().added_work();

// TODO JMG: Need to make the threads get the work according to our type...

         return this->signalled_work_queue.front();

      }

*/


      bool __fastcall process_a_batch_item(const typename thread_traits::api_params_type::tid_type tid, typename os_traits::thread_exception const &exception_thrown_in_thread) FORCE_INLINE {

         return batch_details.process_a_batch_item(this->pool, tid, exception_thrown_in_thread);

      }

   };


   /// This pool has a specified size, and the worker pool_threads steal each have a signalled_work_queue.

   /**

      To reduce calls to the global operator new, and enhance scalability, the parallel algorithms (for_each(), transform(), etc, except merge() and sort()) internally pre-allocate a contiguous buffer of memory for their internal operations. This buffer grows O(p.log(p)) for p processors, with a constant that is ~200 bytes, so for millions of processors, megabytes could be allocated per parallel algorithm. This buffer is guaranteed to be released after all mutations on the elements within the collection are complete, which is usually before any waiting execution_context, constructed by the call to the parallel algorithm, would be released, but is not guaranteed. For each parallel algorithm (except merge() and sort()) exactly 2 calls to the global operator new are made by the library. For merge() the internal buffer grows as O(p.log(p).log(n)) with O(log(n)) calls to the global operator new, and for sort(), O(p.log(p).log(n)^2)) and O(log(n)^2) respectively.

   */

   template<

      class PTT

   >

   class thread_pool<pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::thread_owns_queue<pool_traits::work_distribution_mode_t::queue_model_t::stealing_mode_t::random>>, pool_traits::size_mode_t::fixed_size, PTT>

   : public private_::thread_pool_queue_model<

      pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::thread_owns_queue<pool_traits::work_distribution_mode_t::queue_model_t::stealing_mode_t::random>>,

      pool_traits::size_mode_t::fixed_size,

      PTT,

      private_::fixed_pool_of_threads<

         PTT,

         pool::private_::thread_types::steal<

            PTT::result_traits_,

            typename PTT::os_traits,

            PTT,

            pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::thread_owns_queue<pool_traits::work_distribution_mode_t::queue_model_t::stealing_mode_t::random>>::queue_model

         >

      >

   > {

   public:

      using base_t=private_::thread_pool_queue_model<

         pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::thread_owns_queue<pool_traits::work_distribution_mode_t::queue_model_t::stealing_mode_t::random>>,

         pool_traits::size_mode_t::fixed_size,

         PTT,

         private_::fixed_pool_of_threads<

            PTT,

            pool::private_::thread_types::steal<

               PTT::result_traits_,

               typename PTT::os_traits,

               PTT,

               pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::thread_owns_queue<pool_traits::work_distribution_mode_t::queue_model_t::stealing_mode_t::random>>::queue_model

            >

         >

      >;

      using pool_traits_type=typename base_t::pool_traits_type;

      using os_traits=typename base_t::os_traits;

      using thread_traits=typename base_t::thread_traits;

      using api_params_type=typename base_t::api_params_type;

      using pool_type=typename base_t::pool_type;

      using statistics_type=typename base_t::statistics_type;

      using work_distribution_mode=typename base_t::work_distribution_mode;

      using signalled_work_queue_type=typename base_t::signalled_work_queue_type;


      BOOST_MPL_ASSERT((std::is_same<typename std::is_same<typename PTT::os_traits::thread_traits::model_type, sequential_mode>::type, std::false_type>));


      enum class erase_states {

         failed_to_erase,

         ignoring_result,

         erased_successfully

      };


      /// Create the thread pool.

      /**

         \param num_threads   The number of threads in the pool, which must be greater than zero.

      */

      explicit __stdcall thread_pool(const typename base_t::pool_type::size_type num_threads) noexcept(false) FORCE_INLINE

      : base_t(num_threads, num_threads), main_queue() {

         assert(this->max_num_threads_in_pool>0);

         if (!this->max_num_threads_in_pool) {

            throw typename base_t::exception_type(

               _T("Cannot have an empty thread pool."),

               info::function(

                  __LINE__,

                  __PRETTY_FUNCTION__,

                  typeid(*this),

                  info::function::argument(

                     _T("const typename pool_traits_type::pool_type::size_type max_num_threads"),

                     tostring(num_threads)

                  )

               ),

               JMMCG_REVISION_HDR(_T(LIBJMMCG_VERSION_NUMBER))

            );

         }

      }

      thread_pool(thread_pool const &)=delete;


      /**

         The destruction of the collection of threads is sequential, but the threads themselves can exit in parallel, thus speeding up the clean-up of the pool.

      */

      __stdcall ~thread_pool() noexcept(false) FORCE_INLINE {

         exit();

      }


      /// Erase the specified, queued work.

      /**

         \see erase_states

      */

      template<typename ExecT>

      erase_states __fastcall erase(ExecT &ec) noexcept(false)=delete;


      /// Obtain access to any statistics data collected by the operation of the thread_pool.

      /**

         Algorithmic complexity when specialised with no_statistics: constant time, otherwise O(pool_size()).

         Note that the value computed for the statistics_type::total_vertical_work() is guaranteed to be accurate. The value computed for the statistics_type::total_hrz_work() is guaranteed not be more than the value as if it were computed atomically. Therefore the following holds:

         statistics_type::total_work_added()>=statistics_type::total_vertical_work()+statistics_type::total_hrz_work()

      */

      statistics_type const __fastcall statistics() const noexcept(true) override FORCE_INLINE {

         using acc_t=private_::wrkr_accumulate_across_threads<statistics_type>;


         statistics_type stats(statistics_);

         stats.add_vertical_work(acc_t::vertical_work(this->pool));

         stats.add_hrz_work(acc_t::hrz_work(this->pool));

         return stats;

      }


      void exit() noexcept(false) {

         this->exit_requested().set(pool_traits_type::template exit_requested_type<typename work_distribution_mode::queue_model>::states::exit_requested);

         // The destruction of the collection of threads is sequential, but the threads themselves can exit in parallel, thus speeding up the clean-up of the pool.

         // The natural object-destruction order causes the threads in the pool to be destroyed too late, so the pool must be emptied now.

         this->pool.clear();

         // We must empty the queue after deleting the threads, because of the tricky way the pool_threads steal work from the signalled_work_queue can cause the pool_threads to crash is the queue is emptied whilst the threads are stealing work. Basically the pool_threads steal work atomically with respect to each other, but not this clear() method.)

         this->main_queue.clear();

      }


   private:

      /// This is the batch that the main thread will process.

      signalled_work_queue_type main_queue;

      statistics_type statistics_;


      statistics_type &__fastcall set_statistics() noexcept(true) override FORCE_INLINE {

         return statistics_;

      }


      bool __fastcall add_work_to_batch(const typename thread_traits::api_params_type::tid_type, typename signalled_work_queue_type::value_type &&wk) noexcept(true) override FORCE_INLINE {

         main_queue.push_front(std::forward<typename signalled_work_queue_type::value_type>(wk));

         return true;

      }


      /**

      */

      void __fastcall add_nonjoinable_work(typename signalled_work_queue_type::value_type &&wk) noexcept(false) override FORCE_INLINE {

         this->pool.first_thread().push_front(std::forward<typename signalled_work_queue_type::value_type>(wk));

         statistics_.added_work();

      }


      /**

         Try to add the new work to this thread's batch, if empty, to avoid locking the main queue in the pool. This is very important: it helps maintain throughput of work, by avoiding having to place work on the shared signalled_work_queue in the thread_pool, which involves locks and signals, as placing the work directly in the pool_thread's batch can be done lock-free.

      */

      typename signalled_work_queue_type::value_type __fastcall add_joinable_work(typename signalled_work_queue_type::value_type &&wk) noexcept(false) override FORCE_INLINE {

         this->pool.first_thread().push_front(std::forward<typename signalled_work_queue_type::value_type>(wk));

         statistics_.added_work();

         return std::move(wk);

      }


      bool __fastcall process_a_batch_item(const typename thread_traits::api_params_type::tid_type tid) noexcept(false) FORCE_INLINE {

         return main_queue.process_a_batch_item(this->pool, tid);

      }

   };


} } }


#endif