doxygen/html/thread__pool_8hpp_source.html

#ifndef LIBJMMCG_CORE_PRIVATE_THREAD_POOL_HPP

#define LIBJMMCG_CORE_PRIVATE_THREAD_POOL_HPP


/******************************************************************************

** Copyright © 2004 by J.M.McGuiness, coder@hussar.me.uk

**

** This library is free software; you can redistribute it and/or

** modify it under the terms of the GNU Lesser General Public

** License as published by the Free Software Foundation; either

** version 2.1 of the License, or (at your option) any later version.

**

** This library is distributed in the hope that it will be useful,

** but WITHOUT ANY WARRANTY; without even the implied warranty of

** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

** Lesser General Public License for more details.

**

** You should have received a copy of the GNU Lesser General Public

** License along with this library; if not, write to the Free Software

** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

*/


#include "parallel_algorithms.hpp"

#include "../../core/thread_pool_aspects.hpp"


#include <numeric>


namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE { namespace ppd { namespace private_ {


   /// Hacking to avoid using the full execution_context and create a cheaper, more simple type for sequential use.

   namespace void_chooser {

      template<class Elem>

      class type {

      public:

         typedef Elem element_type;


         constexpr type() noexcept(true) FORCE_INLINE {}

         type(element_type const &v) noexcept(true) FORCE_INLINE

         : value(v) {}

         element_type const & val() const noexcept(true) FORCE_INLINE {

            return value;

         }

         element_type & val() noexcept(true) FORCE_INLINE {

            return value;

         }


         template<class InpWk>

         void FORCE_INLINE

         process(InpWk &wk) {

            wk.process(value);

         }


      private:

         element_type value;

      };

      template<>

      struct type<void> {

         typedef void element_type;


         constexpr type() noexcept(true) FORCE_INLINE {}

         static element_type

         val() noexcept(true) FORCE_INLINE {

         }


         template<class InpWk>

         static void FORCE_INLINE

         process(InpWk &wk) {

            wk.process();

         }

      };

   }


   /// A specialisation for sequential "threading", i.e. everything runs on the main thread.

   /**

      So that the use can make use of the thread library, and automatically switch between real threading and sequential mode without changing their user code apart from some simple typedefs.

   */

   template<

      class DM,

      pool_traits::size_mode_t Ps,

      typename P

   >

   class sequential_pool {

   private:

      /// An opaque, hack, for the thread-type in the "pool".

      class fake_thread;

   public:

      using pool_traits_type=P;

      /// A hack to give us a size_type member-typedef.

      using pool_type=std::vector<fake_thread *>;

      using work_distribution_mode=DM;

      using signalled_work_queue_type=typename pool_traits_type::template signalled_work_queue_type<typename work_distribution_mode::queue_model>;

      using queue_size_type=typename signalled_work_queue_type::size_type;

      typedef typename pool_type::size_type pool_size_type;

      /// Just stub statistics - to support the interface. No actual statistics are collected.

      using statistics_type=typename pool_traits_type::template statistics_type<typename work_distribution_mode::queue_model>;

      /// The type of the control-flow graph that will be generated at run-time, if supported.

      /**

         \see cfg(), dummy_control_flow_graph, control_flow_graph

      */

      typedef typename pool_traits_type::cfg_type cfg_type;

      /// A useful typedef to easily get to the various OS traits.

      typedef typename pool_traits_type::os_traits os_traits;

      typedef typename os_traits::exception_type exception_type;

      /// A useful typedef to easily get to the various OS specific thread-traits.

      typedef typename os_traits::thread_traits thread_traits;

      /// A useful typedef to easily get to the various API details.

      typedef typename thread_traits::api_params_type api_params_type;

      /// A useful typedef to easily get to the various priorities.

      typedef typename api_params_type::priority_type priority_type;


      static const pool_traits::size_mode_t size_mode=Ps;


      /// A useful typedef to easily get to the nonjoinable grammar element.

      /**

         \see nonjoinable_t

      */

      typedef private_::nonjoinable_t<sequential_pool> nonjoinable;

      /// A useful typedef to easily get to the joinable grammar element.

      /**

         \see joinable_t

      */

      typedef private_::joinable_t<sequential_pool> joinable;

      /// A useful typedef to easily get to the nonjoinable_buff grammar element.

      /**

         \see nonjoinable_buff_t

      */

      typedef private_::nonjoinable_buff_t<sequential_pool> nonjoinable_buff;

      template<priority_type Pri> struct priority {};

      /// Used by the library to implicitly generate a closure from the InpWk type.

      template<

         typename InpWk,   ///< The closure_base-derived closure type. The result_type is inferred from the process(result_type) or process() member-functions declared in the Wk type. Note that the process() member-function must not be overloaded, or this will not work, also that it must use the __fastcall calling-convention on those platforms that support it.

         class FnType=decltype(&std::remove_reference<InpWk>::type::process)  ///< The default mutator function is called process, but you could provide an alternative member-function name if desired, as long as the signature is correct.

// TODO           FnType FnPtr=&InpWk::process

      >

      struct create_direct : public private_::create_direct<pool_traits_type, InpWk, FnType, &std::remove_reference<InpWk>::type::process> {

         typedef private_::create_direct<pool_traits_type, InpWk, FnType, &std::remove_reference<InpWk>::type::process> base_t;

         typedef typename base_t::process_fn_traits process_fn_traits;

         using closure_t=typename base_t::closure_t;

         typedef typename process_fn_traits::result_type result_type;

         /// This is a useful typedef to get at the execution_context.

         /**

            The execution_context is created by joinably transferring work into the pool. It has various uses, but is primarily used to atomically and synchronously wait on the results of the work on the closure_base-derived closure-derived object, as specified by the thread_wk_t object transferred into the pool. But it can also pass back specified exceptions that may be thrown by the work. It can also be used to asynchronously test if the work has been completed, and delete the work from the pool, if it has not been started.


            \see execution_context_type_stack

            \see joinable

            \see closure_base

         */

         typedef private_::execution_context_stack_type<pool_traits::work_distribution_mode_t::one_thread_distributes<>, pool_traits_type::result_traits_, sequential_pool, InpWk> execution_context_stack;

      };

      /// This is a useful typedef to get at the execution_context.

      /**

         The execution_context is created by joinably transferring work into the pool. It has various uses, but is primarily used to atomically and synchronously wait on the results of the work on the closure_base-derived closure-derived object, as specified by the thread_wk_t object transferred into the pool. But it can also pass back specified exceptions that may be thrown by the work. It can also be used to asynchronously test if the work has been completed, and delete the work from the pool, if it has not been started.


         \see create_direct

         \see execution_context_stack_type

         \see joinable

         \see closure_base

      */

      template<class InpWk>

      class execution_context_stack : protected non_allocatable {

      public:

         typedef typename create_direct<InpWk>::result_type result_type;


      private:

         typedef void_chooser::type<result_type> element_type;


      public:

         constexpr execution_context_stack() noexcept(true) FORCE_INLINE {}

         template<class Wk> FORCE_INLINE

         execution_context_stack(sequential_pool &pool, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &, Wk &&wk)

         : res() {

            pool.statistics_.added_work();

            Wk work(wk);

            res.process(work);

            pool.statistics_.processed_vertical_work();

         }


         typename add_ref_if_not_void<result_type const>::type operator*() const noexcept(true) FORCE_INLINE {

            return res.val();

         }

         typename add_ref_if_not_void<result_type const>::type operator*() noexcept(true) FORCE_INLINE {

            return res.val();

         }

         result_type const * operator->() const noexcept(true) FORCE_INLINE {

            return &res.val();

         }

         result_type * operator->() noexcept(true) FORCE_INLINE {

            return &res.val();

         }


         execution_context_stack const &operator&() const noexcept(true) FORCE_INLINE {

            return *this;

         }


         void operator&()=delete;


      private:

         element_type res;

      };

      struct void_work {

         typedef void result_type;

         constexpr void process() noexcept(true) FORCE_INLINE {}

      };

      typedef execution_context_stack<void_work> void_exec_cxt;

      template<class Res>

      class execution_context_algo_stack : protected non_allocatable {

      public:

         typedef Res result_type;


      private:

         typedef void_chooser::type<result_type> element_type;


      public:

         execution_context_algo_stack(element_type const &wk) noexcept(true) FORCE_INLINE

         : res(wk) {

         }


         typename add_ref_if_not_void<result_type const>::type operator*() const noexcept(true) FORCE_INLINE {

            return res.val();

         }

         typename add_ref_if_not_void<result_type const>::type operator*() noexcept(true) FORCE_INLINE {

            return res.val();

         }

         result_type const * operator->() const noexcept(true) FORCE_INLINE {

            return &res.val();

         }

         result_type * operator->() noexcept(true) FORCE_INLINE {

            return &res.val();

         }


         execution_context_algo_stack const &operator&() const noexcept(true) FORCE_INLINE {

            return *this;

         }


         void operator&()=delete;


      private:

         element_type res;

      };

      typedef private_::stl_functor_result_type<bool> boolean_result_type;


      /// A modifier to allow joinably transferring the work to the pool.

      struct algo_hack_t {

         typedef void_exec_cxt execution_context;


         static constexpr execution_context

         process(const typename sequential_pool::pool_type::size_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) noexcept(true) FORCE_INLINE {

            return execution_context();

         }

      };

      /// A modifier to allow joinably transferring the work to the pool.

      struct algo_hack_stack_t {

         typedef void_exec_cxt execution_context;


         static constexpr execution_context

         process(const typename sequential_pool::pool_type::size_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) noexcept(true) FORCE_INLINE {

            return execution_context();

         }

      };


      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln,

         typename Pred  ///< The predicate to be used to find the value to be counted.

      >

      class count_if_t {

      public:

         typedef Pred operation_type;

         typedef long num_elems_ct_t;

         typedef execution_context_algo_stack<num_elems_ct_t> execution_context;


         /**

            \param   c  The adapted collection to iterate over.

            \param   pr The predicate to compare against in the collection.

         */

         __stdcall count_if_t(Colln const &c, operation_type const &pr) noexcept(true) FORCE_INLINE

         : pred(pr), colln(c) {

         }


         /// Joinably transfer the predicate to the pool.

         /**

            \return  An execution_context for obtaining the number of matching items in the collection.


            \see execution_context_stack

         */

         execution_context __fastcall

         process(cliques::element_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) const FORCE_INLINE {

            return execution_context(num_elems_ct_t(std::count_if(colln.colln().begin(), colln.colln().end(), pred)));

         }


      private:

         operation_type const pred;

         Colln const &colln;

      };


      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln

      >

      class count_t : public count_if_t<

         Colln,

         decltype(

            std::bind(std::equal_to<typename Colln::value_type>(), typename Colln::value_type(), std::placeholders::_1)

         )

      > {

      private:

         typedef count_if_t<

            Colln,

            decltype(

               std::bind(std::equal_to<typename Colln::value_type>(), typename Colln::value_type(), std::placeholders::_1)

            )

         > base_t;


      public:

         typedef typename base_t::execution_context execution_context;


         __stdcall count_t(Colln const &c, typename Colln::value_type const &v) noexcept(true) FORCE_INLINE

         : base_t(c, typename base_t::operation_type(std::equal_to<typename Colln::value_type>(), v, std::placeholders::_1)) {

         }

      };


      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln,   ///< The collection to search.

         typename Pred  ///< The predicate to be used to find the value to be counted.

      >

      class find_if_t {

      public:

         typedef Pred operation_type;

         typedef bool found_t;

         typedef execution_context_algo_stack<found_t> execution_context;


         /**

            \param   c  The adapted collection to search.

            \param   pr The predicate to compare against in the collection.

         */

         __stdcall find_if_t(Colln const &c, operation_type const &pr) noexcept(true) FORCE_INLINE

         : pred(pr), colln(c) {

         }


         /// Joinably transfer the predicate to the pool.

         /**

            \return  An execution_context for obtaining the number of matching items in the collection.


            \see execution_context_stack

         */

         execution_context __fastcall

         process(cliques::element_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) const FORCE_INLINE {

            return execution_context(found_t(std::find_if(colln.colln().begin(), colln.colln().end(), pred)!=colln.colln().end()));

         }


      private:

         operation_type const pred;

         Colln const &colln;

      };


      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln

      >

      struct find_t : public find_if_t<

         Colln,

         decltype(

            std::bind(std::equal_to<typename Colln::value_type>(), typename Colln::value_type(), std::placeholders::_1)

         )

      > {

         typedef find_if_t<

            Colln,

            decltype(

               std::bind(std::equal_to<typename Colln::value_type>(), typename Colln::value_type(), std::placeholders::_1)

            )

         > base_t;

         typedef typename base_t::execution_context execution_context;


         __stdcall find_t(Colln const &c, typename Colln::value_type const &v) noexcept(true) FORCE_INLINE

         : base_t(c, typename base_t::operation_type(std::equal_to<typename Colln::value_type>(), v, std::placeholders::_1)) {

         }

      };


      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln,   ///< The collection to search.

         typename BinOp ///< The BinOp to be used to accumulate the result.

      >

      class accumulate_op_processor {

      public:

         typedef BinOp operation_type;

         typedef typename operation_type::result_type accumulated_res_t;

         typedef execution_context_algo_stack<accumulated_res_t> execution_context;


         /**

            \param   c  The adapted collection to iterate over.

            \param   v  The value with which the accumulate operation should be initialised.

            \param   op The binary operation to uses to accumulate the result.

         */

         accumulate_op_processor(Colln const &c, accumulated_res_t const &v, operation_type const &op) noexcept(true) FORCE_INLINE

         : init_val(v), binop(op), colln(c) {

         }

         /// Joinably transfer the predicate to the pool.

         /**

            \return  An execution_context for obtaining the number of matching items in the collection.


            \see execution_context_stack

         */

         execution_context __fastcall

         process(cliques::element_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) const FORCE_INLINE {

            return execution_context(accumulated_res_t(std::accumulate(colln.colln().begin(), colln.colln().end(), init_val, binop)));

         }


      private:

         accumulated_res_t const init_val;

         operation_type const binop;

         Colln const &colln;

      };

      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln,   ///< The collection to search.

         class V

      >

      struct accumulate_processor : public accumulate_op_processor<Colln, std::plus<V>> {

         typedef accumulate_op_processor<Colln, std::plus<V>> base_t;


         accumulate_processor(Colln const &colln, V const &v) noexcept(true) FORCE_INLINE

         : base_t(colln, v, typename base_t::operation_type()) {

         }

      };

      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln,   ///< The collection to search.

         class Comp=std::less<typename Colln::value_type>   ///< The comparator to use to compare the items.

      >

      class max_element_t {

      public:

         typedef Comp operation_type;

         typedef typename Colln::value_type result_type;

         typedef execution_context_algo_stack<result_type> execution_context;


         __stdcall max_element_t(Colln const &c, Comp const &comp) noexcept(true) FORCE_INLINE

         : colln(c), compare(comp) {

         }

         max_element_t(max_element_t const &m) noexcept(true) FORCE_INLINE

         : colln(m.colln), compare(m.compare) {

         }


         /// Joinably transfer the predicate to the pool.

         /**

            \return     An execution_context for obtaining the number of matching items in the collection.


            \see execution_context

         */

         execution_context __fastcall

         process(cliques::element_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) const FORCE_INLINE {

            const typename Colln::container_type::const_iterator i(std::max_element(colln.colln().begin(), colln.colln().end(), compare));

            return execution_context(result_type(i!=colln.colln().end() ? *i : std::numeric_limits<typename Colln::value_type>::min()));

         }


      private:

         Colln const &colln;

         Comp const &compare;

      };

      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln,   ///< The collection to search.

         class Comp=std::less<typename Colln::value_type>   ///< The comparator to use to compare the items.

      >

      class min_element_t {

      public:

         typedef Comp operation_type;

         typedef typename Colln::value_type result_type;

         typedef execution_context_algo_stack<result_type> execution_context;


         __stdcall min_element_t(Colln const &c, Comp const &comp) noexcept(true) FORCE_INLINE

         : colln(c), compare(comp) {

         }

         min_element_t(min_element_t const &m) noexcept(true) FORCE_INLINE

         : colln(m.colln), compare(m.compare) {

         }


         /// Joinably transfer the predicate to the pool.

         /**

            \return     An execution_context for obtaining the number of matching items in the collection.


            \see execution_context

         */

         execution_context __fastcall

         process(cliques::element_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) const FORCE_INLINE {

            const typename Colln::container_type::const_iterator i(std::min_element(colln.colln().begin(), colln.colln().end(), compare));

            return execution_context(result_type(i!=colln.colln().end() ? *i : std::numeric_limits<typename Colln::value_type>::max()));

         }


      private:

         Colln const &colln;

         Comp const &compare;

      };


      /// A modifier to allow joinably transferring the work to the pool.

      template<

         typename CollnIn1,

         typename CollnIn2,

         typename CollnOut,

         typename Compare

      >

      struct merge_t {

         typedef void_exec_cxt execution_context;


         static constexpr execution_context __fastcall

         process(const typename sequential_pool::pool_type::size_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) noexcept(true) FORCE_INLINE {

            return execution_context();

         }

      };

      /// A modifier to allow joinably transferring the work to the pool.

      template<

         class Colln,

         typename Compare

      >

      struct sort_t {

         typedef void_exec_cxt execution_context;


         static constexpr execution_context __fastcall

         process(const typename sequential_pool::pool_type::size_type, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params const &) noexcept(true) FORCE_INLINE {

            return execution_context();

         }

      };


      constexpr __stdcall sequential_pool() noexcept(true) FORCE_INLINE

      : pool_size_() {

      }

      explicit __stdcall sequential_pool(pool_size_type sz) noexcept(true) FORCE_INLINE

      : pool_size_(sz), statistics_() {

      }

      sequential_pool(sequential_pool const &)=delete;


      __stdcall ~sequential_pool() FORCE_INLINE {

      }


      /// A stub just for compatibility. Always returns "true".

      static constexpr bool __fastcall pool_empty() noexcept(true) FORCE_INLINE {

         return true;

      }

      /// A stub just for compatibility. Always returns "0".

      constexpr pool_size_type __fastcall pool_size() const noexcept(true) FORCE_INLINE {

         return pool_size_;

      }

      /// A stub just for compatibility. Always returns "true".

      static constexpr bool __fastcall queue_empty() noexcept(true) FORCE_INLINE {

         return true;

      }

      /// A stub just for compatibility. Always returns "0".

      static constexpr pool_size_type __fastcall queue_size() noexcept(true) FORCE_INLINE {

         return 0;

      }


      /// A stub just for compatibility. Always returns "0".

      static void __fastcall queue_clear() noexcept(true) FORCE_INLINE {

      }


      /// A stub just for compatibility.

      statistics_type const &__fastcall statistics() const noexcept(true) FORCE_INLINE {

         return statistics_;

      }


      /// Return the theoretical minimum time in computations required to complete the current work.

      static constexpr unsigned long __fastcall

      min_time(generic_traits::memory_access_modes) noexcept(true) FORCE_INLINE {

         return 0;

      }

      template<class T>

      static constexpr unsigned long __fastcall FORCE_INLINE

      min_time(T) noexcept(true) {

         return 0;

      }


      /// Return the theoretical minimum number of processors required to achieve the minimum computation time required to complete the current work.

      static constexpr unsigned long __fastcall

      min_processors(generic_traits::memory_access_modes) noexcept(true) FORCE_INLINE {

         return 0;

      }

      template<class T>

      static constexpr unsigned long __fastcall FORCE_INLINE

      min_processors(T) noexcept(true) {

         return 0;

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \param   fn A unary_function-type that need not be thread-safe, nor support reentrancy, but should not have side-effects, but in other respects the same as for std::for_each().

         \return  An atomic object that may be waited upon to determine when all of the applications of f are complete.


         \see std::for_each

      */

      template<

         class Colln,

         class Fn

      > parallel_algorithm<algo_hack_stack_t> __fastcall FORCE_INLINE

      for_each(Colln const &c, Fn const &fn) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         std::for_each(c.colln().begin(), c.colln().end(), fn);

         return reduction_t(typename reduction_t::operation_type());

      }


      /**

         \param   c  A collection.

         \param   p  The predicate to use to count the matching values.

         \return  An execution_context that may be waited upon to determine when all of the applications of p are complete, and obtain the count.


         \see std::count_if

         \see execution_context

      */

      template<

         class Colln,

         typename Pred

      > parallel_algorithm<count_if_t<Colln, Pred> > __fastcall FORCE_INLINE

      count_if(Colln &c, Pred const &p) const {

         typedef parallel_algorithm<count_if_t<Colln, Pred> > reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         return reduction_t(typename reduction_t::operation_type(c, p));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \param   v  The value to find and be counted.

         \return  An execution_context that may be waited upon, and obtain the count.


         \see std::count

         \see execution_context

      */

      template<

         class Colln

      > parallel_algorithm<count_t<Colln> > __fastcall FORCE_INLINE

      count(Colln &c, typename Colln::value_type const &v) const {

         typedef parallel_algorithm<count_t<Colln> > reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         return reduction_t(typename reduction_t::operation_type(c, v));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \param   p  The predicate to use to find the matching value.

         \return  An execution_context that may be waited upon to determine when all of the applications of p are complete, and obtain a boolean indicating if the item was found.


         \see std::find_if

         \see execution_context

      */

      template<

         class Colln,

         class Pred

      > parallel_algorithm<find_if_t<Colln, Pred> > __fastcall FORCE_INLINE

      find_if(Colln const &c, Pred const &p) const {

         typedef parallel_algorithm<find_if_t<Colln, Pred> > reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         return reduction_t(typename reduction_t::operation_type(c, p));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \param   v  The value to be found.

         \return  An execution_context that may be waited upon, and obtain a boolean indicating if the item was found.


         \see std::find

         \see execution_context

      */

      template<

         class Colln

      > parallel_algorithm<find_t<Colln> > __fastcall FORCE_INLINE

      find(Colln const &c, typename Colln::value_type const &v) const {

         typedef parallel_algorithm<find_t<Colln> > reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         return reduction_t(typename reduction_t::operation_type(c, v));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   in A collection.

         \param   out   Another collection.

         \param   op The unary operator to apply to each element of in the output placed into out.

         \return  An execution_context that may be waited upon to determine when all of the applications of op are complete.


         \see std::transform

         \see execution_context

      */

      template<

         typename CollnIn,

         typename CollnOut,

         class UniOp

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      transform(CollnIn const &in, CollnOut &out, UniOp const &op) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;


         statistics_.update_colln_stats(in.colln().size());

         out.resize_noinit_nolk(in.size());

         std::transform(

            in.colln().begin(),

            in.colln().end(),

            out.colln().begin(),

            op

         );

         return reduction_t(typename reduction_t::operation_type());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   in1   The first collection.

         \param   in2   The second collection.

         \param   out   Another collection.

         \param   op The unary operator to apply to each element of in the output placed into out.

         \return  An execution_context that may be waited upon to determine when all of the applications of op are complete.


         \see std::transform

         \see execution_context

      */

      template<

         typename CollnIn1,

         typename CollnIn2,

         typename CollnOut,

         class BinOp

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      transform(CollnIn1 const &in1, CollnIn2 const &in2, CollnOut &out, BinOp const &op) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;


         statistics_.update_colln_stats(in1.colln().size());

         out.resize_noinit_nolk(in1.size());

         std::transform(

            in1.colln().begin(),

            in1.colln().end(),

            in2.colln().begin(),

            out.colln().begin(),

            op

         );

         return reduction_t(typename reduction_t::operation_type());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   in A collection.

         \param   out   Another collection.

         \return  An execution_context that may be waited upon to determine when all of the applications of f are complete.


         \see std::copy

      */

      template<

         typename CollnIn,

         typename CollnOut

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      copy(CollnIn const &in, CollnOut &out) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;


         statistics_.update_colln_stats(in.colln().size());

         out.resize_noinit_nolk(in.size());

         std::copy(

            in.colln().begin(),

            in.colln().end(),

            out.colln().begin()

         );

         return reduction_t(typename reduction_t::operation_type());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \param   v  The initial value.

         \param   binop The binary operation to use.

         \return  An execution_context that may be waited upon to determine when all of the applications of op are complete, and obtain the result.


         \see std::accumulate

         \see execution_context

      */

      template<

         class Colln,

         typename BinOp

      > parallel_algorithm<accumulate_op_processor<Colln, BinOp> > __fastcall FORCE_INLINE

      accumulate(Colln const &c, typename BinOp::result_type const &v, BinOp const &binop) const {

         typedef parallel_algorithm<accumulate_op_processor<Colln, BinOp> > reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         return reduction_t(typename reduction_t::operation_type(c, v, binop));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \param   v  The initial value.

         \return  An execution_context that may be waited upon to determine when all of the applications of op are complete, and obtain the result.


         \see std::accumulate

         \see execution_context

      */

      template<

         class Colln,

         class V

      > parallel_algorithm<accumulate_processor<Colln, V>> __fastcall FORCE_INLINE

      accumulate(Colln const &c, V const &v) const {

         typedef parallel_algorithm<accumulate_processor<Colln, V>> reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         return reduction_t(typename reduction_t::operation_type(c, v));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection to be filled.

         \param   sz The number of items to place into the collection.

         \param   v  The value used to copy into the collection.


         \see std::fill_n

      */

      template<

         class Colln

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      fill_n(Colln &c, typename Colln::size_type sz, typename Colln::value_type const &v) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;


         c.resize_noinit_nolk(sz);

         statistics_.update_colln_stats(c.colln().size());

         std::fill(c.colln().begin(), c.colln().end(), v);

         return reduction_t(typename reduction_t::operation_type());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection to be filled.

         \param   v  The value used to copy into the collection.


         \see std::fill

      */

      template<

         class Colln

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      fill(Colln &c, typename Colln::value_type const &v) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;


         statistics_.update_colln_stats(c.colln().size());

         std::fill(c.colln().begin(), c.colln().end(), v);

         return reduction_t(typename reduction_t::operation_type());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection to be filled.


         \see std::reverse

      */

      template<

         typename Colln

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      reverse(Colln &c) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;


         statistics_.update_colln_stats(c.colln().size());

         std::reverse(c.colln().begin(), c.colln().end());

         return reduction_t(typename reduction_t::operation_type());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \param   comp  The comparator to use to compare the items.

         \return  An execution_context that may be waited upon to obtain the result, which is the largest value in the collection, not an iterator to it.


         \see std::max_element

         \see execution_context

      */

      template<

         class Colln,

         class Comp

      > parallel_algorithm<max_element_t<Colln, Comp>> __fastcall FORCE_INLINE

      max_element(Colln const &c, Comp const &comp) const {

         typedef parallel_algorithm<max_element_t<Colln, Comp>> reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         return reduction_t(typename reduction_t::operation_type(c, comp));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \return  An execution_context that may be waited upon to obtain the result, which is the largest value in the collection, not an iterator to it.


         \see std::max_element

         \see execution_context

      */

      template<

         typename Colln

      > parallel_algorithm<max_element_t<Colln, std::less<typename Colln::value_type>>> __fastcall FORCE_INLINE

      max_element(Colln const &c) const {

         return max_element(c, std::less<typename Colln::value_type>());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \param   comp  The comparator to use to compare the items.

         \return  An execution_context that may be waited upon to obtain the result, which is the smallest value in the collection, not an iterator to it.


         \see std::min_element

         \see execution_context

      */

      template<

         class Colln,

         class Comp

      > parallel_algorithm<min_element_t<Colln, Comp>> __fastcall FORCE_INLINE

      min_element(Colln const &c, Comp const &comp) const {

         typedef parallel_algorithm<min_element_t<Colln, Comp>> reduction_t;

         statistics_.update_colln_stats(c.colln().size());

         return reduction_t(typename reduction_t::operation_type(c, comp));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  A collection.

         \return  An execution_context that may be waited upon to obtain the result, which is the smallest value in the collection, not an iterator to it.


         \see std::min_element

         \see execution_context

      */

      template<

         typename Colln

      > parallel_algorithm<min_element_t<Colln, std::less<typename Colln::value_type>>> __fastcall FORCE_INLINE

      min_element(Colln const &c) const {

         return min_element(c, std::less<typename Colln::value_type>());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  The collection to be sorted.

         \param   comp  The comparison operator.

         \return  An execution_context that may be waited upon to determine when the sort is complete.


         \see std::sort()

         \see execution_context

      */

      template<

         typename Colln,

         class Compare

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      sort(Colln &c, Compare const &comp) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;


         statistics_.update_colln_stats(c.colln().size());

         std::sort(

            c.colln().begin(),

            c.colln().end(),

            comp

         );

         return reduction_t(typename reduction_t::operation_type());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   c  The collection to be sorted.

         \return  An execution_context that may be waited upon to determine when the sort is complete.


         \see std::sort()

         \see execution_context

      */

      template<

         typename Colln

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      sort(Colln &c) const {

         return sort(c, std::less<typename Colln::value_type>());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   in1   The first collection.

         \param   in2   The second collection.

         \param   out   Another collection.

         \param   comp  The comparison operator.

         \return  An execution_context that may be waited upon to determine when the merge is complete.


         \see std::merge()

         \see execution_context

      */

      template<

         typename CollnIn1,

         typename CollnIn2,

         typename CollnOut,

         class Compare

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      merge(CollnIn1 const &in1, CollnIn2 const &in2, CollnOut &out, Compare const &comp) const {

         typedef parallel_algorithm<algo_hack_stack_t> reduction_t;


         statistics_.update_colln_stats(in1.colln().size()+in2.colln().size());

         out.colln().reserve(in1.colln().size()+in2.colln().size());

         std::merge(

            in1.colln().begin(),

            in1.colln().end(),

            in2.colln().begin(),

            in2.colln().end(),

            std::back_inserter(out.colln()),

            comp

         );

         out.sync_size();

         assert((in1.colln().size()+in2.colln().size())==out.colln().size());

         assert((in1.size()+in2.size())==out.size());

         return reduction_t(typename reduction_t::operation_type());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      /**

         \param   in1   The first collection.

         \param   in2   The second collection.

         \param   out   Another collection.

         \return  An execution_context that may be waited upon to determine when the merge is complete.


         \see std::merge()

         \see execution_context

      */

      template<

         typename CollnIn1,

         typename CollnIn2,

         typename CollnOut

      > parallel_algorithm<algo_hack_stack_t> FORCE_INLINE

      merge(CollnIn1 const &in1, CollnIn2 const &in2, CollnOut &out) const {

         return merge(in1, in2, out, std::less<typename CollnIn1::value_type>());

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      template<

         class ArgT,

         class UniFn

      >

      execution_context_stack<typename private_::unary_fun_work_type<ArgT, UniFn, sequential_pool>> FORCE_INLINE

      unary_fun(ArgT &&a, UniFn const &op=UniFn()) {

         typedef private_::unary_fun_work_type<ArgT, UniFn, sequential_pool> work_type;


         return execution_context_stack<work_type>(*this, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params(), work_type(std::forward<ArgT>(a), op, *this));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      template<

         class LHSArg,

         class RHSArg,

         class BinFn

      >

      execution_context_stack<typename private_::binary_fun_work_type<LHSArg, RHSArg, BinFn, sequential_pool>> FORCE_INLINE

      binary_fun(LHSArg &&lhs, RHSArg &&rhs, BinFn const &op=BinFn()) {

         typedef private_::binary_fun_work_type<LHSArg, RHSArg, BinFn, sequential_pool> work_type;


         return execution_context_stack<work_type>(*this, typename pool_traits_type::thread_wk_elem_type::cfg_details_type::params(), work_type(std::forward<LHSArg>(lhs), std::forward<RHSArg>(rhs), op, *this));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      template<

         class T

      >

      execution_context_stack<typename private_::binary_fun_work_type<T, T, std::logical_and<bool>, sequential_pool>> FORCE_INLINE

      logical_and(T &&lhs, T &&rhs) {

         return this->binary_fun<T, T, std::logical_and<bool>>(std::forward<T>(lhs), std::forward<T>(rhs));

      }


      /// This just forwards to the STL algorithm of the same name, for compatibility with the parallel version.

      template<

         class T

      >

      execution_context_stack<typename private_::binary_fun_work_type<T, T, std::logical_or<bool>, sequential_pool>> FORCE_INLINE

      logical_or(T &&lhs, T &&rhs) {

         return this->binary_fun<T, T, std::logical_or<bool>>(std::forward<T>(lhs), std::forward<T>(rhs));

      }


      /// Non-joinably transfer the closure_base-derived closure into the thread_pool.

      /**

         \todo JMG: Hubert Matthews suggested that potentially expression templates could be used here to concatenate the thread_wk_t's that are transferred into the pool; also as an implementation of back_batching, i.e. GSS(k) scheduling.


         \see nonjoinable

      */

      nonjoinable __fastcall

      operator<<(nonjoinable &&nj) noexcept(true) FORCE_INLINE {

         return nonjoinable(std::forward<nonjoinable>(nj), *this);

      }


      /// Non-joinably transfer the closure_base-derived closure into the thread_pool.

      /**

         \param njb  Initialised with a suitable buffer for allocating the internal work items into.


         \todo JMG: Hubert Matthews suggested that potentially expression templates could be used here to concatenate the thread_wk_t's that are transferred into the pool; also as an implementation of back_batching, i.e. GSS(k) scheduling.


         \see nonjoinable_buff, algo_thread_wk_buffered

      */

      nonjoinable_buff __fastcall

      operator<<(nonjoinable_buff &&njb) noexcept(true) FORCE_INLINE {

         return nonjoinable_buff(std::forward<nonjoinable>(njb), *this);

      }


      /// Joinably transfer the closure_base-derived closure into the thread_pool.

      /**

         \todo JMG: Hubert Matthews suggested that potentially expression templates could be used here to concatenate the thread_wk_t's that are transferred into the pool; also as an implementation of back_batching, i.e. GSS(k) scheduling.


         \see joinable_t

      */

      joinable_t<sequential_pool> __fastcall

      operator<<(joinable_t<sequential_pool> &&j) noexcept(true) FORCE_INLINE {

         return joinable_t<sequential_pool>(std::forward<joinable_t<sequential_pool>>(j), *this);

      }


      /**

         \todo Implement using the advice given in "Standard C++ IOStreams and Locales" by A.Langer & K.Kreft, page 170.

      */

      friend inline tostream &__fastcall

      operator<<(tostream &os, sequential_pool const &t) FORCE_INLINE {

         os

            <<_T("Pool=0x")<<&t

            <<_T(", type: ")<<sequential_pool::thread_traits::demangle_name(typeid(t));

         return os;

      }


      /// Access the control-flow graph, if supported.

      cfg_type & cfg() noexcept(true) FORCE_INLINE {

         return cfg_;

      }

      /// Access the control-flow graph, if supported.

      cfg_type const & cfg() const noexcept(true) FORCE_INLINE {

         return cfg_;

      }


   private:

      template<class TPB> friend class joinable_t;

      template<class TPB> friend class nonjoinable_t;

      template<class TPB> friend class nonjoinable_buff_t;

      template<template<class> class Joinability, class TPB, typename TPB::priority_type Pri> friend class priority_t;

      template<class DM1, generic_traits::return_data RD, class TPB, class Wk> friend class execution_context_stack_type;

      template<class DM1, generic_traits::return_data RD, class TPB, template<class, class, template<class> class, template<class> class> class CoreWk, class AlgoWrapT, class Wk> friend class execution_context_algo_stack_type;

      template<generic_traits::return_data RD, class TPB, template<class> class Del, template<class> class AtCtr> friend class horizontal_execution;


      template<class ExecCtx>

      typename ExecCtx::chk_argument_type __fastcall FORCE_INLINE

      make_arg(typename signalled_work_queue_type::value_type &&async_wk) {

         return ExecCtx::template make_arg<typename ExecCtx::result_type>(

            std::forward<typename signalled_work_queue_type::value_type>(async_wk),

            this

         );

      }


      const pool_size_type pool_size_;

      mutable statistics_type statistics_;

      cfg_type cfg_;


      void __fastcall add_nonjoinable_work(typename signalled_work_queue_type::value_type &&wk) FORCE_INLINE {

         statistics_.added_work();

         wk->process_nonjoinable(cfg_type::sequential_edge_annotation);

         statistics_.processed_vertical_work();

      }

      typename signalled_work_queue_type::value_type __fastcall add_joinable_work(typename signalled_work_queue_type::value_type &&wk) FORCE_INLINE {

         statistics_.added_work();

         wk->process_nonjoinable(cfg_type::sequential_edge_annotation);

         statistics_.processed_vertical_work();

         return wk;

      }

   };


}


} } }


#endif