doxygen/html/thread__pool__queue__model_8hpp_source.html

#ifndef LIBJMMCG_CORE_PRIVATE_THREAD_POOL_QUEUE_MODEL_HPP

#define LIBJMMCG_CORE_PRIVATE_THREAD_POOL_QUEUE_MODEL_HPP


/******************************************************************************

** Copyright © 2010 by J.M.McGuiness, coder@hussar.me.uk

**

** This library is free software; you can redistribute it and/or

** modify it under the terms of the GNU Lesser General Public

** License as published by the Free Software Foundation; either

** version 2.1 of the License, or (at your option) any later version.

**

** This library is distributed in the hope that it will be useful,

** but WITHOUT ANY WARRANTY; without even the implied warranty of

** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

** Lesser General Public License for more details.

**

** You should have received a copy of the GNU Lesser General Public

** License along with this library; if not, write to the Free Software

** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

*/


#include "thread_pool_base.hpp"


namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE { namespace ppd { namespace private_ {


/// This is the batch that the main thread will process.

template<

   unsigned long GSSkSz,

   class PTT,

   class Pt,

   class QM

>

class GSSk_batching {

public:

   typedef PTT pool_traits_type;

   typedef typename pool_traits_type::os_traits os_traits;

   typedef Pt pool_type;

   typedef typename os_traits::thread_traits thread_traits;

   using signalled_work_queue_type=typename pool_traits_type::template signalled_work_queue_type<QM>;

   /// Return a container of GSSkSz items from the front of the queue to implement the GSS(k) or bakers' scheduling algorithm.

   using batch_details_type=batch_details<pool_traits_type::GSSk, signalled_work_queue_type, typename remove_shared_ptr<typename pool_type::value_type, api_lock_traits<platform_api, sequential_mode>>::value_type::statistics_type>;

   typedef typename batch_details_type::statistics_type statistics_type;

   static constexpr unsigned long GSSk=batch_details_type::GSSk;


   explicit GSSk_batching(const typename thread_traits::api_params_type::tid_type mtid) noexcept(true) FORCE_INLINE

   : main_tid(mtid) {

   }


   /// Process an closure_base-derived closure item from the batch of a pool_thread.

   /**

      \param   pool  The thread pool.

      \param   tid   The thread_id of the pool_thread to query the batch for more work.

      \return true if there is more closure_base-derived closure to process() in the pool_thread's batch, otherwise false.


      \see batch_details::process_a_batch_item()

   */

   bool __fastcall process_a_batch_item(pool_type &pool, const typename thread_traits::api_params_type::tid_type tid, typename os_traits::thread_exception const &exception_thrown_in_thread) noexcept(false) FORCE_INLINE {

      if (tid==main_tid) {

         return batch.process_a_batch_item();

      } else {

         const typename pool_type::container_type::iterator thread=pool.find(tid);

            if (thread!=pool.end()) {

            assert(dynamic_cast<typename pool_type::container_type::mapped_type::value_type *>(&*thread->second));

            return thread->second->process_a_batch_item(exception_thrown_in_thread);

         } else {

            // We might have a horizontal thread spawned by a horizontal thread, so the ancestor_thread_id will no longer be in that of a pool_thread in the thread_pool. But this feature is used to flush the batch of a pool_thread of any more work, but horizontal threads only have one item in their batch, the active closure_base-derived closure, i.e. no backed-up work, so just return that all work have been done.

            return false;

         }

      }

   }


   /// Put the closure_base-derived closure in the batch, if it is empty.

   /**

      Note that this function runs with no locks, as it presumes that the caller is the same pool_thread that consumes the work from the batch.


      \param pool The thread_pool_base-derived thread pool to which the wk will be transferred.

      \param tid  The thread_id of the pool_thread to which the closure_base-derived closure should be added, if possible.

      \param wk   The closure_base-derived closure to attempt to add.

      \return  true if the closure_base-derived closure was added, false otherwise.


      \see batch_details::add_work_to_batch()

   */

   bool __fastcall add_work_to_batch(pool_type &pool, const typename thread_traits::api_params_type::tid_type tid, typename signalled_work_queue_type::value_type &&wk) noexcept(true) FORCE_INLINE {

      if (tid==main_tid) {

         return batch.add_work_to_batch(std::forward<typename signalled_work_queue_type::value_type>(wk));

      } else {

         const auto thread=pool.find(tid);

         if (thread!=pool.end()) {

            assert(dynamic_cast<typename pool_type::container_type::mapped_type::value_type *>(&*thread->second));

            return thread->second->add_work_to_batch(std::forward<typename signalled_work_queue_type::value_type>(wk));

         }

      }

      // We might have a horizontal thread spawned by a horizontal thread, so the ancestor_thread_id will no longer be that of a pool_thread in the thread_pool. But horizontal threads only have one item in their batch, the active closure_base-derived closure, i.e. no backed-up work, so just return that the work couldn't be added.

      return false;

   }


   statistics_type const &__fastcall statistics() const noexcept(true) FORCE_INLINE {

      return batch.statistics();

   }

   statistics_type &__fastcall statistics() noexcept(true) FORCE_INLINE {

      return batch.statistics();

   }


private:

   batch_details_type batch;

   const typename thread_traits::api_params_type::tid_type main_tid;

};


/// Implements the specifics of how the queue(s) within the thread_pool or pool_threads are implemented.

/**

   \see thread_pool_base

*/

template<

   class DM,

   pool_traits::size_mode_t Ps,

   typename PTT,

   class Pt

>

class thread_pool_queue_model;


/// Implements the case when the signalled_work_queue is contained within the thread_pool & shared by the pool_threads.

/**

   This implies that the cost of executing the input_work is larger than the locking & serialisation cost that having a single queue from which all of the pool_threads compete to steal work from.


   \see thread_pool_base, signalled_work_queue

*/

template<

   template<class> class QM,

   pool_traits::size_mode_t Ps,

   typename PTT,

   class Pt

>

class thread_pool_queue_model<QM<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>, Ps, PTT, Pt> : public thread_pool_base<QM<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>, Ps, PTT, Pt>, protected PTT::template thread_pool_queue_details<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue> {

public:

   using base_t=typename PTT::template thread_pool_queue_details<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>;

   using base1_t=thread_pool_base<QM<pool_traits::work_distribution_mode_t::queue_model_t::pool_owns_queue>, Ps, PTT, Pt>;

   using pool_traits_type=typename base1_t::pool_traits_type;

   using os_traits=typename base1_t::os_traits;

   using pool_type=typename base1_t::pool_type;

   using queue_size_type=typename base1_t::queue_size_type;

   using pool_size_type=typename base1_t::pool_size_type;

   using pool_thread_type=typename base1_t::pool_thread_type;

   using exception_type=typename base1_t::exception_type;

   using thread_traits=typename base1_t::thread_traits;

   using api_params_type=typename base1_t::api_params_type;

   using priority_type=typename base1_t::priority_type;

   using work_distribution_mode=typename base1_t::work_distribution_mode;

   using signalled_work_queue_type=typename base1_t::signalled_work_queue_type;

   using queue_model=typename base_t::queue_model;


   /**

      To assist in allowing compile-time computation of the algorithmic order of the threading model.

   */

   static constexpr generic_traits::memory_access_modes memory_access_mode=base1_t::memory_access_mode;


   using GSSk_batching_type=GSSk_batching<pool_traits_type::GSSk, pool_traits_type, pool_type, typename work_distribution_mode::queue_model>;

   /// The type of statistics collected related to the operation of the thread_pool.

   /**

      The general concept behind this type is that the cost of gathering the statistics should be as small as possible, even to the extent to the statistics being inaccurate under-estimations, to ensure the cost is minimised.


      \see no_statistics

      \see basic_statistics

   */

   using statistics_type=typename base1_t::statistics_type;

   using cfg_type=typename base1_t::cfg_type;


   /// Returns true if there no threads in the thread_pool.

   /**

      \return  true if there no threads in the thread_pool.

   */

   bool __fastcall pool_empty() const noexcept(true) override FORCE_INLINE {

      return pool.empty();

   }

   /// Returns the current number of threads in the thread_pool.

   /**

      \return  The current number of threads in the thread_pool.

   */

   const pool_size_type __fastcall pool_size() const noexcept(true) override FORCE_INLINE final {

      return pool.size();

   }

   /**

      \return  true if there is no input_work to process by the thread_pool.

   */

   bool __fastcall queue_empty() const noexcept(true) override FORCE_INLINE {

      return this->signalled_work_queue.empty();

   }

   /**

      \return  The current amount of outstanding, unscheduled input_work items to be processed by the thread_pool.

   */

   const queue_size_type __fastcall queue_size() const noexcept(true) override FORCE_INLINE {

      return this->signalled_work_queue.size();

   }


   void __fastcall queue_clear() noexcept(false) override FORCE_INLINE {

      this->signalled_work_queue.clear();

   }


   /// Return the theoretical minimum time in computations according to section 3.3 & Theorem 3.3 in [1] required to complete the current work with the current number of threads in the pool using a CREW-PRAM and according to section 1.3.2, Theorem 1.2 in [2] for an EREW-PRAM.

   /**

      The allows the user to determine the current computational efficiency of their thread_pool with the supplied thread-safe adapted container, safe_colln, as they can use this to profile their code and adjust the size of the thread_pool for the target architecture.


      [1] Alan Gibbons, Wojciech Rytter, "Efficient Parallel Algorithms", Cambridge University Press, 1989.

      [2] Casanova, H., Legrand, A., Robert, Y., "Parallel Algorithms", CRC Press, 2008.


      \return  The minimum number of computations


      \todo It would be nice if there was some result for returning this with respect to the memory access models of the work within the queue (which may be a mix of CREW & EREW memory models) for the current thread_pool.


      \see safe_colln

   */

   unsigned long __fastcall

   min_time(generic_traits::memory_access_modes mode) const noexcept(true) override FORCE_INLINE;

   template<class T>

   unsigned long __fastcall FORCE_INLINE

   min_time(T const &) const noexcept(true);


   /// Return the theoretical minimum number of processors required to achieve the minimum computation time according to section 3.3 & Theorem 3.3 in [1] required to complete the current work using a CREW-PRAM.

   /**

      The allows the user to determine the current computational efficiency of their thread_pool with the supplied thread-safe adapted container, safe_colln, as they can use this to profile their code and adjust the size of the thread_pool for the target architecture.


      [1] Alan Gibbons, Wojciech Rytter, "Efficient Parallel Algorithms", Cambridge University Press, 1989.


      \return  The minimum number of processors


      \todo It would be nice if there was some result for returning this with respect to the memory access models of the work within the queue (which may be a mix of CREW & EREW memory models) for the current thread_pool.


      \see safe_colln

   */

   unsigned long __fastcall

   min_processors(generic_traits::memory_access_modes mode) const noexcept(true) override FORCE_INLINE;

   template<class T>

   unsigned long __fastcall FORCE_INLINE

   min_processors(T const &) const noexcept(true);


protected:

   pool_type pool;


// TODO  BOOST_MPL_ASSERT((std::is_same<typename base_t::exit_requested_type, typename pool_type::have_work_type::atomic_t>));


   __stdcall thread_pool_queue_model(const pool_size_type max_num_threads, const pool_size_type num_threads) noexcept(false) FORCE_INLINE

   : base1_t(max_num_threads), base_t(), pool(num_threads, this->exit_requested_, this->signalled_work_queue) {

   }


   queue_size_type __fastcall

   batch_size(queue_size_type const sz) const noexcept(true) FORCE_INLINE;


   signalled_work_queue_type & __fastcall queue() noexcept(true) override FORCE_INLINE {

      return this->signalled_work_queue;

   }

   signalled_work_queue_type const & __fastcall queue() const noexcept(true) override FORCE_INLINE final {

      return this->signalled_work_queue;

   }

   /**

      \param   wk closure_base-derived closure to be process()ed by a pool_thread.

      \return  True if the closure_base-derived closure was added to the internal batch_details of the specified pool_thread.

   */

   virtual bool __fastcall add_work_to_batch(const typename thread_traits::api_params_type::tid_type, typename signalled_work_queue_type::value_type &&wk) noexcept(true) FORCE_INLINE {

      return false;

   }


   typename base_t::exit_requested_type &exit_requested() noexcept(true) override FORCE_INLINE {

      return this->exit_requested_;

   }


private:

   template<class TPB> friend class joinable_t;

   template<class TPB> friend class nonjoinable_t;

   template<class TPB> friend class nonjoinable_buff_t;

   template<template<class> class Joinability, class TPB, typename TPB::priority_type Pri> friend class priority_t;

   template<class DM1, generic_traits::return_data RD, class TPB, class Wk> friend class execution_context_stack_type;

   template<class DM1, generic_traits::return_data RD, class TPB, template<class, class, template<class> class, template<class> class> class CoreWk, class AlgoWrapT, class Wk> friend class execution_context_algo_stack_type;

   template<generic_traits::return_data RD, class TPB, template<class> class Del, template<class> class AtCtr> friend class horizontal_execution;

};


/// Implements the case when there is a signalled_work_queue contained within each pool_thread, and an algorithm is used to steal work from the pool_thread by other pool_threads.

/**

   This implies that the cost of executing the input_work is similar to the cost of the locking & serialisation costs of that input_work.


   \see thread_pool_base, signalled_work_queue

*/

template<

   pool_traits::work_distribution_mode_t::queue_model_t::stealing_mode_t SM,

   pool_traits::size_mode_t Ps,

   typename PTT,

   class Pt

>

class thread_pool_queue_model<pool_traits::work_distribution_mode_t::worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::thread_owns_queue<SM>>, Ps, PTT, Pt> : public thread_pool_base<pool_traits::work_distribution_mode_t::template worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::template thread_owns_queue<SM>>, Ps, PTT, Pt>, protected PTT::template thread_pool_queue_details<pool_traits::work_distribution_mode_t::queue_model_t::template thread_owns_queue<SM>> {

public:

   using base_t=typename PTT::template thread_pool_queue_details<pool_traits::work_distribution_mode_t::queue_model_t::template thread_owns_queue<SM>>;

   using base1_t=thread_pool_base<pool_traits::work_distribution_mode_t::template worker_threads_get_work<pool_traits::work_distribution_mode_t::queue_model_t::template thread_owns_queue<SM>>, Ps, PTT, Pt>;

   using pool_traits_type=typename base1_t::pool_traits_type;

   using os_traits=typename base1_t::os_traits;

   using pool_type=typename base1_t::pool_type;

   using queue_size_type=typename base1_t::queue_size_type;

   using pool_size_type=typename base1_t::pool_size_type;

   using pool_thread_type=typename base1_t::pool_thread_type;

   using exception_type=typename base1_t::exception_type;

   using thread_traits=typename base1_t::thread_traits;

   using api_params_type=typename base1_t::api_params_type;

   using priority_type=typename base1_t::priority_type;

   using work_distribution_mode=typename base1_t::work_distribution_mode;

   using signalled_work_queue_type=typename base1_t::signalled_work_queue_type;

   using queue_model=typename base_t::queue_model;


   /**

      To assist in allowing compile-time computation of the algorithmic order of the threading model.

   */

   static constexpr generic_traits::memory_access_modes memory_access_mode=base1_t::memory_access_mode;


   /// GSS(k) batching is not supported.

   BOOST_MPL_ASSERT((std::is_same<std::integral_constant<unsigned long, pool_traits_type::GSSk>, std::integral_constant<unsigned long, 1UL>>));


   using GSSk_batching_type=GSSk_batching<pool_traits_type::GSSk, pool_traits_type, pool_type, typename work_distribution_mode::queue_model>;

   /// The type of statistics collected related to the operation of the thread_pool.

   /**

      The general concept behind this type is that the cost of gathering the statistics should be as small as possible, even to the extent to the statistics being inaccurate under-estimations, to ensure the cost is minimised.


      \see no_statistics

      \see basic_statistics

   */

   using statistics_type=typename base1_t::statistics_type;

   using cfg_type=typename base1_t::cfg_type;


   /// Returns true if there no threads in the thread_pool.

   /**

      \return  true if there no threads in the thread_pool.

   */

   bool __fastcall pool_empty() const noexcept(true) override FORCE_INLINE {

      return pool.empty();

   }

   /// Returns the current number of threads in the thread_pool.

   /**

      \return  The current number of threads in the thread_pool.

   */

   const pool_size_type __fastcall pool_size() const noexcept(true) override FORCE_INLINE {

      return pool.size();

   }


   /// Return the theoretical minimum time in computations according to section 3.3 & Theorem 3.3 in [1] required to complete the current work with the current number of threads in the pool using a CREW-PRAM and according to section 1.3.2, Theorem 1.2 in [2] for an EREW-PRAM.

   /**

      The allows the user to determine the current computational efficiency of their thread_pool with the supplied thread-safe adapted container, safe_colln, as they can use this to profile their code and adjust the size of the thread_pool for the target architecture.


      [1] Alan Gibbons, Wojciech Rytter, "Efficient Parallel Algorithms", Cambridge University Press, 1989.

      [2] Casanova, H., Legrand, A., Robert, Y., "Parallel Algorithms", CRC Press, 2008.


      \return  The minimum number of computations


      \todo It would be nice if there was some result for returning this with respect to the memory access models of the work within the queue (which may be a mix of CREW & EREW memory models) for the current thread_pool.


      \see safe_colln

   */

   unsigned long __fastcall

   min_time(generic_traits::memory_access_modes mode) const noexcept(true) override FORCE_INLINE;

   template<class T>

   unsigned long __fastcall FORCE_INLINE

   min_time(T const &) const noexcept(true);


   /// Return the theoretical minimum number of processors required to achieve the minimum computation time according to section 3.3 & Theorem 3.3 in [1] required to complete the current work using a CREW-PRAM.

   /**

      The allows the user to determine the current computational efficiency of their thread_pool with the supplied thread-safe adapted container, safe_colln, as they can use this to profile their code and adjust the size of the thread_pool for the target architecture.


      [1] Alan Gibbons, Wojciech Rytter, "Efficient Parallel Algorithms", Cambridge University Press, 1989.


      \return  The minimum number of processors


      \todo It would be nice if there was some result for returning this with respect to the memory access models of the work within the queue (which may be a mix of CREW & EREW memory models) for the current thread_pool.


      \see safe_colln

   */

   unsigned long __fastcall

   min_processors(generic_traits::memory_access_modes mode) const noexcept(true) FORCE_INLINE;

   template<class T>

   unsigned long __fastcall FORCE_INLINE

   min_processors(T const &) const noexcept(true);


protected:

   pool_type pool;


   BOOST_MPL_ASSERT((std::is_same<typename base_t::exit_requested_type, typename pool_type::exit_requested_type>));


   __stdcall thread_pool_queue_model(const pool_size_type max_num_threads, const pool_size_type num_threads) noexcept(false) FORCE_INLINE

   : base1_t(max_num_threads), base_t(), pool(num_threads, this->exit_requested_) {

   }


   queue_size_type __fastcall

   batch_size(queue_size_type const sz) const noexcept(true) FORCE_INLINE;


   /**

      \param   wk closure_base-derived closure to be process()ed by a pool_thread.

      \return  True if the closure_base-derived closure was added to the internal batch_details of the specified pool_thread.

   */

   virtual bool __fastcall add_work_to_batch(const typename thread_traits::api_params_type::tid_type, typename signalled_work_queue_type::value_type &&wk) noexcept(true) FORCE_INLINE {

      return false;

   }


   typename base_t::exit_requested_type &exit_requested() noexcept(true) override FORCE_INLINE {

      return this->exit_requested_;

   }


private:

   template<class TPB> friend class joinable_t;

   template<class TPB> friend class nonjoinable_t;

   template<class TPB> friend class nonjoinable_buff_t;

   template<template<class> class Joinability, class TPB, typename TPB::priority_type Pri> friend class priority_t;

   template<class DM1, generic_traits::return_data RD, class TPB, class Wk> friend class execution_context_stack_type;

   template<class DM1, generic_traits::return_data RD, class TPB, template<class, class, template<class> class, template<class> class> class CoreWk, class AlgoWrapT, class Wk> friend class execution_context_algo_stack_type;

   template<generic_traits::return_data RD, class TPB, template<class> class Del, template<class> class AtCtr> friend class horizontal_execution;


   signalled_work_queue_type & __fastcall queue() noexcept(true) override FORCE_INLINE {}

   signalled_work_queue_type const & __fastcall queue() const noexcept(true) override FORCE_INLINE {}

   /**

      \return  true if there is no input_work to process by the pool_threads.

   */

   bool __fastcall queue_empty() const noexcept(true) override FORCE_INLINE {}

   /**

      \return  The approximate amount of outstanding, unscheduled input_work items to be processed by the pool_threads.

   */

   const queue_size_type __fastcall queue_size() const noexcept(true) override FORCE_INLINE {}

   void __fastcall queue_clear() noexcept(true) override FORCE_INLINE {}

};


} } } }


#include "thread_pool_queue_model_impl.hpp"


#endif