doxygen/html/subdivide__n__gen__wk_8hpp_source.html

#ifndef LIBJMMCG_CORE_PRIVATE_SUBDIVIDE_N_GEN_WK_HPP

#define LIBJMMCG_CORE_PRIVATE_SUBDIVIDE_N_GEN_WK_HPP

/******************************************************************************

** Copyright © 2010 by J.M.McGuiness, coder@hussar.me.uk

**

** This library is free software; you can redistribute it and/or

** modify it under the terms of the GNU Lesser General Public

** License as published by the Free Software Foundation; either

** version 2.1 of the License, or (at your option) any later version.

**

** This library is distributed in the hope that it will be useful,

** but WITHOUT ANY WARRANTY; without even the implied warranty of

** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

** Lesser General Public License for more details.

**

** You should have received a copy of the GNU Lesser General Public

** License along with this library; if not, write to the Free Software

** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

*/


#include "thread_client_context.hpp"


namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE { namespace ppd { namespace private_ {


   template<class P, class Collns>

   class unlock_collections {

   public:

      typedef Collns containers_type;

      typedef P pool_traits_type;

      typedef typename pool_traits_type::os_traits os_traits;

      typedef typename os_traits::lock_traits::atomic_state_type atomic_state_type;

      typedef typename os_traits::lock_traits::timeout_type timeout_type;

      typedef typename os_traits::lock_traits::template atomic_counter_type<unsigned long> num_tasks_spawned_t;


      __stdcall unlock_collections(typename num_tasks_spawned_t::value_type const i, containers_type const &c) noexcept(true) FORCE_INLINE

      : num_tasks_spawned(i), containers_(c) {

      }

      unlock_collections(unlock_collections const &)=delete;


      containers_type const &__fastcall containers() const noexcept(true) FORCE_INLINE {

         return containers_;

      }

      containers_type &__fastcall containers() noexcept(true) FORCE_INLINE {

         return containers_;

      }

      void resize_output(typename containers_type::size_type const out_colln_size) noexcept(false) FORCE_INLINE {

         containers_.resize_output(out_colln_size);

      }

      void __fastcall lock_containers() noexcept(true) FORCE_INLINE {

         containers_.lock();

      }


      void __fastcall add_a_task() noexcept(true) FORCE_INLINE {

         ++num_tasks_spawned;

      }


      virtual atomic_state_type __fastcall set() noexcept(true);


   private:

      num_tasks_spawned_t num_tasks_spawned;

      containers_type containers_;

   };


   template<class P, class Collns>

   class counted_event final : public P::async_thread_wk_elem_type::work_complete_t, public unlock_collections<P, Collns> {

   public:

      typedef unlock_collections<P, Collns> base_t;

      typedef typename base_t::pool_traits_type pool_traits_type;

      typedef typename base_t::os_traits os_traits;

      typedef typename base_t::atomic_state_type atomic_state_type;

      typedef typename base_t::timeout_type timeout_type;

      typedef typename base_t::containers_type containers_type;

      typedef typename base_t::num_tasks_spawned_t num_tasks_spawned_t;

      typedef typename os_traits::lock_traits::anon_event_type all_wk_done_lk_t;


      __stdcall counted_event(typename num_tasks_spawned_t::value_type const i, containers_type const &c) noexcept(true) FORCE_INLINE

      : base_t(i, c), all_wk_done(os_traits::lock_traits::atom_unset) {

      }


      atomic_state_type __fastcall set() noexcept(true) override FORCE_INLINE {

         const atomic_state_type state=base_t::set();

         if (state==pool_traits_type::os_traits::lock_traits::atom_unset) {

            return state;

         } else {

            return all_wk_done.set();

         }

      }


   private:

      all_wk_done_lk_t all_wk_done;


      atomic_state_type __fastcall try_lock() noexcept(true) override FORCE_INLINE {

         return all_wk_done.try_lock();

      }

      atomic_state_type __fastcall lock() noexcept(false) override FORCE_INLINE {

         return all_wk_done.lock();

      }

      atomic_state_type __fastcall lock(const timeout_type t) noexcept(false) override FORCE_INLINE {

         return all_wk_done.lock(t);

      }

      atomic_state_type __fastcall unlock() noexcept(true) override FORCE_INLINE {

         return all_wk_done.unlock();

      }

      atomic_state_type __fastcall reset() noexcept(true) override FORCE_INLINE {

         return all_wk_done.reset();

      }

   };


   /**

      Make sure that the work is marked as complete, even in the face of exceptions.

   */

   template<class WkC>

   class ensure_wk_complete {

   public:

      typedef WkC work_complete_t;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=work_complete_t::memory_access_mode;


      explicit __stdcall ensure_wk_complete(work_complete_t &w) noexcept(true) FORCE_INLINE

      : all_done(w) {

      }

      ensure_wk_complete(ensure_wk_complete const &)=delete;

      __stdcall ~ensure_wk_complete() noexcept(true) FORCE_INLINE {

         all_done.set();

      }


   private:

      work_complete_t &all_done;

   };


   /// Assist with implementing the parallel versions of the standard algorithms.

   /**

      \see for_each

      \see alg_work_wrap

   */

   template<class P, class Wk, generic_traits::return_data RD_>

   class alg_wrapper1 : public Wk {

   public:

      typedef P pool_traits_type;

      typedef typename pool_traits_type::os_traits os_traits;

      typedef Wk work_wrap;

      typedef typename work_wrap::result_type result_type;

      typedef typename work_wrap::containers_type::input_t::container_type container_type;

      typedef counted_event<P, typename work_wrap::containers_type> work_complete_t;

      typedef ensure_wk_complete<work_complete_t> ensure_wk_complete_t;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=(

         work_wrap::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && work_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && ensure_wk_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         ? generic_traits::memory_access_modes::crew_memory_access

         : generic_traits::memory_access_modes::erew_memory_access

      );


      alg_wrapper1(work_wrap &&wk, work_complete_t &w) noexcept(true) FORCE_INLINE

      : work_wrap(std::forward<work_wrap>(wk)), all_done(w) {

         all_done.add_a_task();

      }


      void __fastcall process() noexcept(false) FORCE_INLINE {

         const ensure_wk_complete_t e(all_done);

         work_wrap::process();

      }


      constexpr bool __fastcall operator<(alg_wrapper1 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      work_complete_t &all_done;

   };

   template<class P, class Wk>

   class alg_wrapper1<P, Wk, generic_traits::return_data::nonjoinable> : public Wk {

   public:

      typedef P pool_traits_type;

      typedef typename pool_traits_type::os_traits os_traits;

      typedef Wk work_wrap;

      typedef typename work_wrap::result_type result_type;

      typedef typename work_wrap::containers_type::input_t::container_type container_type;

      typedef unlock_collections<P, typename work_wrap::containers_type> work_complete_t;

      typedef ensure_wk_complete<work_complete_t> ensure_wk_complete_t;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=(

         work_wrap::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && work_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && ensure_wk_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         ? generic_traits::memory_access_modes::crew_memory_access

         : generic_traits::memory_access_modes::erew_memory_access

      );


      __stdcall alg_wrapper1(work_wrap &&wk, work_complete_t &w) noexcept(true) FORCE_INLINE

      : work_wrap(std::forward<work_wrap>(wk)), all_done(w) {

         all_done.add_a_task();

      }


      void __fastcall process() noexcept(false) FORCE_INLINE {

         const ensure_wk_complete_t e(all_done);

         work_wrap::process();

      }


      constexpr bool __fastcall operator<(alg_wrapper1 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      work_complete_t &all_done;

   };


   /// Assist with implementing the parallel versions of the standard algorithms.

   /**

      \see transform

      \see alg_work_wrap

   */

   template<class P, class Wk, generic_traits::return_data RD_>

   class alg_wrapper2 : public Wk {

   public:

      typedef P pool_traits_type;

      typedef typename pool_traits_type::os_traits os_traits;

      typedef Wk work_wrap;

      typedef typename work_wrap::result_type result_type;

      typedef typename work_wrap::containers_type::input_t::container_type container_type;

      typedef counted_event<P, typename work_wrap::containers_type> work_complete_t;

      typedef ensure_wk_complete<work_complete_t> ensure_wk_complete_t;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=(

         work_wrap::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && work_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && ensure_wk_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         ? generic_traits::memory_access_modes::crew_memory_access

         : generic_traits::memory_access_modes::erew_memory_access

      );


      __stdcall alg_wrapper2(work_wrap &&wk, work_complete_t &w) noexcept(true) FORCE_INLINE

      : work_wrap(std::forward<work_wrap>(wk)), all_done(w) {

         all_done.add_a_task();

      }


      void __fastcall process() noexcept(false) FORCE_INLINE {

         const ensure_wk_complete_t e(all_done);

         work_wrap::process();

      }


      constexpr bool __fastcall operator<(alg_wrapper2 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      work_complete_t &all_done;

   };

   template<class P, class Wk>

   class alg_wrapper2<P, Wk, generic_traits::return_data::nonjoinable> : public Wk {

   public:

      typedef P pool_traits_type;

      typedef typename pool_traits_type::os_traits os_traits;

      typedef Wk work_wrap;

      typedef typename work_wrap::result_type result_type;

      typedef typename work_wrap::containers_type::input_t::container_type container_type;

      typedef unlock_collections<P, typename work_wrap::containers_type> work_complete_t;

      typedef ensure_wk_complete<work_complete_t> ensure_wk_complete_t;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=(

         work_wrap::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && work_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && ensure_wk_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         ? generic_traits::memory_access_modes::crew_memory_access

         : generic_traits::memory_access_modes::erew_memory_access

      );


      __stdcall alg_wrapper2(work_wrap &&wk, work_complete_t &w) noexcept(true) FORCE_INLINE

      : work_wrap(std::forward<work_wrap>(wk)), all_done(w) {

         all_done.add_a_task();

      }


      void __fastcall process() noexcept(false) FORCE_INLINE {

         const ensure_wk_complete_t e(all_done);

         work_wrap::process();

      }


      constexpr bool __fastcall operator<(alg_wrapper2 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      work_complete_t &all_done;

   };


   /// Assist with implementing the parallel versions of the standard algorithms.

   /**

      \see transform

      \see alg_work_wrap

   */

   template<class P, class Wk, generic_traits::return_data RD_>

   class alg_wrapper3 : public Wk {

   public:

      typedef P pool_traits_type;

      typedef typename pool_traits_type::os_traits os_traits;

      typedef Wk work_wrap;

      typedef typename work_wrap::result_type result_type;

      typedef typename work_wrap::containers_type::input1_t::container_type container_type;

      typedef counted_event<P, typename work_wrap::containers_type> work_complete_t;

      typedef ensure_wk_complete<work_complete_t> ensure_wk_complete_t;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=(

         work_wrap::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && work_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && ensure_wk_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         ? generic_traits::memory_access_modes::crew_memory_access

         : generic_traits::memory_access_modes::erew_memory_access

      );


      __stdcall alg_wrapper3(work_wrap &&wk, work_complete_t &w) FORCE_INLINE

      : work_wrap(std::forward<work_wrap>(wk)), all_done(w) {

         all_done.add_a_task();

      }


      void __fastcall process() noexcept(false) FORCE_INLINE {

         const ensure_wk_complete_t e(all_done);

         work_wrap::process();

      }


      constexpr bool __fastcall operator<(alg_wrapper3 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      work_complete_t &all_done;

   };

   template<class P, class Wk>

   class alg_wrapper3<P, Wk, generic_traits::return_data::nonjoinable> : public Wk {

   public:

      typedef P pool_traits_type;

      typedef typename pool_traits_type::os_traits os_traits;

      typedef Wk work_wrap;

      typedef typename work_wrap::result_type result_type;

      typedef typename work_wrap::containers_type::input_t::container_type container_type;

      typedef unlock_collections<P, typename work_wrap::containers_type> work_complete_t;

      typedef ensure_wk_complete<work_complete_t> ensure_wk_complete_t;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=(

         work_wrap::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && work_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         && ensure_wk_complete_t::memory_access_mode==generic_traits::memory_access_modes::crew_memory_access

         ? generic_traits::memory_access_modes::crew_memory_access

         : generic_traits::memory_access_modes::erew_memory_access

      );


      __stdcall alg_wrapper3(work_wrap &&wk, work_complete_t &w) noexcept(true) FORCE_INLINE

      : work_wrap(std::forward<work_wrap>(wk)), all_done(w) {

         all_done.add_a_task();

      }


      void __fastcall process() noexcept(false) FORCE_INLINE {

         const ensure_wk_complete_t e(all_done);

         work_wrap::process();

      }


      constexpr bool __fastcall operator<(alg_wrapper3 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      work_complete_t &all_done;

   };


   /// Distribute the input range [begin, end) across the thread_pool_type recursively as a collection of tasks.

   /**

      This algorithm recursively creates tasks non-joinably until it terminates, when it reaches the leaves which contain contiguous sub-ranges [begin, end) of the initial range and the functor, fn. i.e. it distributes the initial range across the threads_per_clique within the thread_pool_type. The algorithm contains all_done, a counter, that records the number of outstanding tasks, and when that counter reaches zero, the execution_context is released, as all of the sub-tasks have completed, the counter is required because the tasks are transferred non-joinably.

   */

   template<

      pool_traits::size_mode_t Ps,

      class TPB,  ///< The thread_pool type.

      class Alg   ///< Housekeeping to ensure that the read_lock_type on the input- & [read_lock_type|write_lock_type] output-container_type is released once all work has been completed, also that once the work has been completed the execution_context is correctly signalled. Also includes the function to be applied to each element in the container_type in some unspecified order. Including the container_type on which to apply the function, of size n.

   >

   class subdivide_n_gen_wk {

   public:

      typedef TPB thread_pool_type;

      typedef typename thread_pool_type::pool_traits_type pool_traits_type;

      typedef void result_type;

      typedef Alg alg_wrap_t;

      typedef typename alg_wrap_t::container_type container_type;

      typedef typename alg_wrap_t::os_traits os_traits;

      typedef typename alg_wrap_t::work_wrap::in_iterator in_iterator;

      typedef typename alg_wrap_t::work_wrap::operation_type operation_type;

      /// An object that signals to the execution_context when all of the closure_base-derived closures has been process()ed, including being distributed across the threads_per_clique in the thread_pool via the tasks spawned by subdivide_n_gen_wk::process().

      /**

         This is used so that we don't have to generate an execution_context at each branch, and wait upon it, thus causing vertical pool_threads to be held, and horizontal_execution would have to occur instead, i.e. reducing resources consumed.

      */

      typedef typename alg_wrap_t::ensure_wk_complete_t ensure_wk_complete_t;

      /**

         This type is needed because closure::algo_thread_wk_buffered is dependent upon the exact type of subdivide_n_gen_wk, so is declared after it, but algo_thread_wk_buffered contains the custom memory-buffer. So we have to use a char * to the buffer, and the stride, rather than a more accurate type.

      */

      struct algo_work_heap_type;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=alg_wrap_t::memory_access_mode;


      const typename thread_pool_type::pool_type::size_type threads_per_clique;


      static typename thread_pool_type::pool_type::size_type

      compute_threads_per_clique(typename thread_pool_type::pool_type::size_type num_threads, typename thread_pool_type::pool_type::size_type const cliques) noexcept(true) FORCE_INLINE;


      /**

         This computation is intimately related to the way subdivide_n_gen_wk::process() spawns sub-tasks, and the two must operate in a similar manner, otherwise we might get memory-allocation errors. Note that it over-allocates memory, because it doesn't allow for memory re-use: children could re-use memory of parents.


         \todo This is an O(n) operation, and we might want a faster algorithm, it doesn't have to be perfect, as long as the result is >= the true value.


         \return The number of items allocated in the tree that subdivide_n_gen_wk::process() will generate. Not in bytes, but items.


         \see subdivide_n_gen_wk::process()

      */

      static typename thread_pool_type::pool_type::size_type

      compute_buffer_items(typename thread_pool_type::pool_type::size_type const num_threads_per_clique) noexcept(true) FORCE_INLINE;


   protected:

      algo_work_heap_type const work_heap;

      thread_pool_type &pool;

      operation_type &fn;

      typename alg_wrap_t::work_complete_t &all_done;

      in_iterator const begin;

      in_iterator const end;


      in_iterator

      compute_end(typename std::iterator_traits<in_iterator>::difference_type const number_subranges) const noexcept(true) FORCE_INLINE;

      typename container_type::size_type

      num_wk_items_spawned() const noexcept(true) FORCE_INLINE;


      /**

         \return This is units of items, not bytes.

      */

      std::ptrdiff_t odd_third_buff_range() const noexcept(true) FORCE_INLINE;

      /**

         \return This is units of items, not bytes.

      */

      std::ptrdiff_t even_half_buff_range() const noexcept(true) FORCE_INLINE;

      typename algo_work_heap_type::buffer_type first_buff_part() const noexcept(true) FORCE_INLINE;

      typename algo_work_heap_type::buffer_type even_second_buff_part() const noexcept(true) FORCE_INLINE;

      typename algo_work_heap_type::buffer_type odd_second_buff_part() const noexcept(true) FORCE_INLINE;

      typename algo_work_heap_type::buffer_type odd_third_buff_part() const noexcept(true) FORCE_INLINE;


      __stdcall subdivide_n_gen_wk(

         thread_pool_type &p,

         operation_type &f,

         typename alg_wrap_t::work_complete_t &w,

         algo_work_heap_type const &wh) noexcept(true) FORCE_INLINE;


      /**

         \param number_subranges Reduce the size of the range over which this algorithm operates by the amount specified by this number.

      */

      __stdcall subdivide_n_gen_wk(

         thread_pool_type &p,

         operation_type &f,

         typename alg_wrap_t::work_complete_t &w,

         algo_work_heap_type const &wh,

         typename std::iterator_traits<in_iterator>::difference_type const number_subranges,

         typename thread_pool_type::pool_type::size_type const cliques) noexcept(true) FORCE_INLINE;


      __stdcall subdivide_n_gen_wk(

         thread_pool_type &p,

         operation_type &f,

         typename alg_wrap_t::work_complete_t &w,

         algo_work_heap_type const &wh,

         in_iterator const &b,

         in_iterator const &e) noexcept(true) FORCE_INLINE;


      __stdcall subdivide_n_gen_wk(

         thread_pool_type &p,

         operation_type &f,

         typename alg_wrap_t::work_complete_t &w,

         algo_work_heap_type const &wh,

         in_iterator const &b,

         in_iterator const &e,

         typename thread_pool_type::pool_type::size_type const t_per_c) noexcept(true) FORCE_INLINE;


      virtual ~subdivide_n_gen_wk() FORCE_INLINE {}

   };

   /// Distribute the input range [begin, end) across the thread_pool_type recursively as a collection of tasks.

   template<

      class TPB,  ///< The thread_pool type.

      class Alg   ///< Housekeeping to ensure that the read_lock_type on the input- & [read_lock_type|write_lock_type] output-container_type is released once all work has been completed, also that once the work has been completed the execution_context is correctly signalled. Also includes the function to be applied to each element in the container_type in some unspecified order. Including the container_type on which to apply the function, of size n.

   >

   class subdivide_n_gen_wk<pool_traits::size_mode_t::infinite, TPB, Alg> {

   public:

      typedef TPB thread_pool_type;

      typedef typename thread_pool_type::pool_traits_type pool_traits_type;

      typedef void result_type;

      typedef Alg alg_wrap_t;

      typedef typename alg_wrap_t::container_type container_type;

      typedef typename alg_wrap_t::os_traits os_traits;

      typedef typename alg_wrap_t::work_wrap::in_iterator in_iterator;

      typedef typename alg_wrap_t::work_wrap::operation_type operation_type;

      /// An object that signals to the execution_context when all of the closure_base-derived closures has been process()ed, including being distributed across the threads_per_clique in the thread_pool via the tasks spawned by subdivide_n_gen_wk::process().

      /**

         This is used so that we don't have to generate an execution_context at each branch, and wait upon it, thus causing vertical pool_threads to be held, i.e. increasing resources consumed.

      */

      typedef typename alg_wrap_t::ensure_wk_complete_t ensure_wk_complete_t;

      struct algo_work_heap_type;


      /**

         To assist in allowing compile-time computation of the algorithmic order of the threading model.

      */

      static constexpr generic_traits::memory_access_modes memory_access_mode=alg_wrap_t::memory_access_mode;


      const typename thread_pool_type::pool_type::size_type threads_per_clique;


      static constexpr typename thread_pool_type::pool_type::size_type

      compute_threads_per_clique(typename thread_pool_type::pool_type::size_type, typename thread_pool_type::pool_type::size_type const) noexcept(true) FORCE_INLINE;


      static constexpr typename thread_pool_type::pool_type::size_type

      compute_buffer_items(typename thread_pool_type::pool_type::size_type const) noexcept(true) FORCE_INLINE;


   protected:

      thread_pool_type &pool;

      operation_type &fn;

      typename alg_wrap_t::work_complete_t &all_done;

      in_iterator const begin;

      in_iterator const end;


      __stdcall subdivide_n_gen_wk(

         thread_pool_type &p,

         operation_type &f,

         typename alg_wrap_t::work_complete_t &w) noexcept(true) FORCE_INLINE;


      __stdcall subdivide_n_gen_wk(

         thread_pool_type &p,

         operation_type &f,

         typename alg_wrap_t::work_complete_t &w,

         in_iterator const &b,

         in_iterator const &e) noexcept(true) FORCE_INLINE;


      virtual ~subdivide_n_gen_wk() FORCE_INLINE {}

   };


   /**

      This recursive process ensures that it takes O(log(n)) time to submit the work to the pool.


      Algorithm derived from [1].

      Note that if the wrapped collection implements CREW or EREW semantics, as safe_colln does, then this algorithm is an implementation CREW/EREW P-RAM model, therefore if the thread_pool is large enough, it implements an optimal schedule according to section 3.3 & Theorem 3.3 in [1].


      [1] Alan Gibbons, Wojciech Rytter, "Efficient Parallel Algorithms", Cambridge University Press, 1989.


      \see safe_colln

   */

   template<

      pool_traits::size_mode_t Ps,

      class TPB,  ///< The thread_pool type.

      class Fn,   ///< The functor to be applied to each element in the collection in some unspecified order.

      class Conts,   ///< The collection on which to apply the function, of size n.

      template<class, class> class Alg ///< The algorithm to apply.

   >

   class subdivide_n_gen_wk1 : private subdivide_n_gen_wk<

      Ps,

      TPB,

      alg_wrapper1<

         typename TPB::pool_traits_type,

         Alg<Conts, Fn>,

         TPB::pool_traits_type::result_traits_

      >

   > {

   public:

      typedef subdivide_n_gen_wk<

         Ps,

         TPB,

         alg_wrapper1<

            typename TPB::pool_traits_type,

            Alg<Conts, Fn>,

            TPB::pool_traits_type::result_traits_

         >

      > base_t;

      typedef typename base_t::container_type container_type;

      typedef typename base_t::in_iterator in_iterator;

      typedef typename base_t::operation_type operation_type;

      typedef typename base_t::result_type result_type;

      typedef typename base_t::alg_wrap_t alg_wrap_t;

      typedef typename base_t::thread_pool_type thread_pool_type;

      typedef typename base_t::pool_traits_type pool_traits_type;

      typedef typename base_t::os_traits os_traits;

      typedef typename base_t::ensure_wk_complete_t ensure_wk_complete_t;

      typedef typename base_t::algo_work_heap_type algo_work_heap_type;

      using base_t::compute_threads_per_clique;

      using base_t::compute_buffer_items;

      using base_t::memory_access_mode;


   private:

      __stdcall subdivide_n_gen_wk1(

         thread_pool_type &p,

         operation_type &f,

         typename alg_wrap_t::work_complete_t &w,

         algo_work_heap_type const &wh,

         in_iterator const &b,

         in_iterator const &e,

         typename thread_pool_type::pool_type::size_type const threads_per_clique) noexcept(true) FORCE_INLINE;


   public:

      __stdcall subdivide_n_gen_wk1(thread_pool_type &p, operation_type &f, typename alg_wrap_t::work_complete_t &w, algo_work_heap_type const &wh, typename std::iterator_traits<in_iterator>::difference_type const number_subranges, typename thread_pool_type::pool_type::size_type const cliques) noexcept(true) FORCE_INLINE;


      /// Recursively call subdivide_n_gen_wk1::process(), on disjoint left and right-subsets (assuming even numbers of processors in the clique) of the input collection, until the number of work items generated is 2^n just larger than the number of threads in the pool, which implements a form of GSS(k) scheduling.

      /**

         As the subsets are disjoint inter-subset operations are effectively CRCW operations, whereas intra-subset operations are strictly EREW. This subdivision is valid according to Proposition 1.1 in section 1.2 and Brent's Theorem [1].


         [1] Casanova, H., Legrand, A., Robert, Y., "Parallel Algorithms", CRC Press, 2008.


         \see nonjoinable, nonjoinable_buff

      */

      void __fastcall

      process() noexcept(false);


      constexpr bool __fastcall operator<(subdivide_n_gen_wk1 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }

   };

   template<

      class TPB,  ///< The thread_pool type.

      class Fn,   ///< The functor to be applied to each element in the collection in some unspecified order.

      class Conts,   ///< The collection on which to apply the function, of size n.

      template<class, class> class Alg ///< The algorithm to apply.

   >

   class subdivide_n_gen_wk1<pool_traits::size_mode_t::infinite, TPB, Fn, Conts, Alg> : private subdivide_n_gen_wk<

      pool_traits::size_mode_t::infinite,

      TPB,

      alg_wrapper1<

         typename TPB::pool_traits_type,

         Alg<Conts, Fn>,

         TPB::pool_traits_type::result_traits_

      >

   > {

   public:

      typedef subdivide_n_gen_wk<

         pool_traits::size_mode_t::infinite,

         TPB,

         alg_wrapper1<

            typename TPB::pool_traits_type,

            Alg<Conts, Fn>,

            TPB::pool_traits_type::result_traits_

         >

      > base_t;

      typedef typename base_t::container_type container_type;

      typedef typename base_t::in_iterator in_iterator;

      typedef typename base_t::operation_type operation_type;

      typedef typename base_t::result_type result_type;

      typedef typename base_t::alg_wrap_t alg_wrap_t;

      typedef typename base_t::thread_pool_type thread_pool_type;

      typedef typename base_t::pool_traits_type pool_traits_type;

      typedef typename base_t::os_traits os_traits;

      typedef typename base_t::ensure_wk_complete_t ensure_wk_complete_t;

      typedef typename base_t::algo_work_heap_type algo_work_heap_type;

      using base_t::compute_threads_per_clique;

      using base_t::compute_buffer_items;

      using base_t::memory_access_mode;


   private:

      __stdcall subdivide_n_gen_wk1(

         thread_pool_type &p,

         operation_type &f,

         typename alg_wrap_t::work_complete_t &w,

         in_iterator const &b,

         in_iterator const &e) noexcept(true) FORCE_INLINE;


   public:

      __stdcall subdivide_n_gen_wk1(thread_pool_type &p, operation_type &f, typename alg_wrap_t::work_complete_t &w, algo_work_heap_type const &, typename std::iterator_traits<in_iterator>::difference_type const, typename thread_pool_type::pool_type::size_type const) noexcept(true) FORCE_INLINE;


      /// Recursively call subdivide_n_gen_wk1::process(), on disjoint left and right-subsets (assuming even numbers of processors in the clique) of the input collection, until the number of work items generated is 2^n just larger than the number of threads in the pool, which implements a form of GSS(k) scheduling.

      /**

         As the subsets are disjoint inter-subset operations are effectively CRCW operations, whereas intra-subset operations are strictly EREW. This subdivision is valid according to Proposition 1.1 in section 1.2 and Brent's Theorem [1].


         [1] Casanova, H., Legrand, A., Robert, Y., "Parallel Algorithms", CRC Press, 2008.


         \see nonjoinable_buff

      */

      void __fastcall

      process() noexcept(false);


      constexpr bool __fastcall operator<(subdivide_n_gen_wk1 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }

   };


   /**

      This recursive process ensures that it takes O(log(n)) time to submit the work to the pool.


      Algorithm derived from section 3.3 & Theorem 3.3 in [1].


      Note that if the wrapped collection implements CREW or EREW semantics, as safe_colln does, then this algorithm is an implementation CREW/EREW P-RAM model, therefore if the thread_pool is large enough, it implements an optimal schedule according to [1].


      [1] Alan Gibbons, Wojciech Rytter, "Efficient Parallel Algorithms", Cambridge University Press, 1989.


      \see safe_colln

   */

   template<

      pool_traits::size_mode_t Ps,

      class TPB,  ///< The thread_pool type.

      class UniOp,   ///< The unary operation to be applied to each element in the input collection in some unspecified order.

      class Conts,   ///< The collections on which to apply the function, of size n.

      template<class, class> class Alg ///< The algorithm to apply.

   >

   class subdivide_n_gen_wk2 : private subdivide_n_gen_wk<

      Ps,

      TPB,

      alg_wrapper2<

         typename TPB::pool_traits_type,

         Alg<Conts, UniOp>,

         TPB::pool_traits_type::result_traits_

      >

   > {

   public:

      typedef subdivide_n_gen_wk<

         Ps,

         TPB,

         alg_wrapper2<

            typename TPB::pool_traits_type,

            Alg<Conts, UniOp>,

            TPB::pool_traits_type::result_traits_

         >

      > base_t;

      typedef typename base_t::container_type container_type;

      typedef typename base_t::in_iterator in_iterator;

      typedef typename base_t::operation_type operation_type;

      typedef typename base_t::result_type result_type;

      typedef typename base_t::alg_wrap_t alg_wrap_t;

      typedef typename alg_wrap_t::work_wrap::out_iterator out_iterator;

      typedef typename base_t::thread_pool_type thread_pool_type;

      typedef typename base_t::pool_traits_type pool_traits_type;

      typedef typename base_t::os_traits os_traits;

      typedef typename base_t::ensure_wk_complete_t ensure_wk_complete_t;

      typedef typename base_t::algo_work_heap_type algo_work_heap_type;

      using base_t::compute_threads_per_clique;

      using base_t::compute_buffer_items;

      using base_t::memory_access_mode;


   private:

      __stdcall subdivide_n_gen_wk2(

         thread_pool_type &p,

         operation_type &o,

         typename alg_wrap_t::work_complete_t &w,

         algo_work_heap_type const &wh,

         in_iterator const &ib,

         in_iterator const &ie,

         out_iterator const &ob,

         out_iterator const &oe,

         const typename thread_pool_type::pool_type::size_type threads_per_clique) noexcept(true) FORCE_INLINE;


   public:

      __stdcall subdivide_n_gen_wk2(thread_pool_type &p, operation_type &o, typename alg_wrap_t::work_complete_t &w, algo_work_heap_type const &wh, typename std::iterator_traits<in_iterator>::difference_type const number_subranges, typename thread_pool_type::pool_type::size_type const cliques) noexcept(true) FORCE_INLINE;


      /// Recursively call subdivide_n_gen_wk2::process(), on disjoint left and right-subsets (assuming even numbers of processors in the clique) of the input collection, until the number of work items generated is 2^n just larger than the number of threads in the pool, which implements a form of GSS(k) scheduling.

      /**

         As the subsets are disjoint inter-subset operations are effectively CRCW operations, whereas intra-subset operations are strictly EREW. This subdivision is valid according to Proposition 1.1 in section 1.2 and Brent's Theorem [1].


         [1] Casanova, H., Legrand, A., Robert, Y., "Parallel Algorithms", CRC Press, 2008.


         \see nonjoinable_buff

      */

      void __fastcall

      process() noexcept(false);


      constexpr bool __fastcall operator<(subdivide_n_gen_wk2 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      out_iterator out_begin;

      out_iterator const out_end;

   };

   template<

      class TPB,  ///< The thread_pool type.

      class UniOp,   ///< The unary operation to be applied to each element in the input collection in some unspecified order.

      class Conts,   ///< The collections on which to apply the function, of size n.

      template<class, class> class Alg ///< The algorithm to apply.

   >

   class subdivide_n_gen_wk2<pool_traits::size_mode_t::infinite, TPB, UniOp, Conts, Alg> : private subdivide_n_gen_wk<

      pool_traits::size_mode_t::infinite,

      TPB,

      alg_wrapper2<

         typename TPB::pool_traits_type,

         Alg<Conts, UniOp>,

         TPB::pool_traits_type::result_traits_

      >

   > {

   public:

      typedef subdivide_n_gen_wk<

         pool_traits::size_mode_t::infinite,

         TPB,

         alg_wrapper2<

            typename TPB::pool_traits_type,

            Alg<Conts, UniOp>,

            TPB::pool_traits_type::result_traits_

         >

      > base_t;

      typedef typename base_t::container_type container_type;

      typedef typename base_t::in_iterator in_iterator;

      typedef typename base_t::operation_type operation_type;

      typedef typename base_t::result_type result_type;

      typedef typename base_t::alg_wrap_t alg_wrap_t;

      typedef typename alg_wrap_t::work_wrap::out_iterator out_iterator;

      typedef typename base_t::thread_pool_type thread_pool_type;

      typedef typename base_t::pool_traits_type pool_traits_type;

      typedef typename base_t::os_traits os_traits;

      typedef typename base_t::ensure_wk_complete_t ensure_wk_complete_t;

      typedef typename base_t::algo_work_heap_type algo_work_heap_type;

      using base_t::compute_threads_per_clique;

      using base_t::compute_buffer_items;

      using base_t::memory_access_mode;


   private:

      __stdcall subdivide_n_gen_wk2(

         thread_pool_type &p,

         operation_type &o,

         typename alg_wrap_t::work_complete_t &w,

         in_iterator const &ib,

         in_iterator const &ie,

         out_iterator const &ob,

         out_iterator const &oe) noexcept(true) FORCE_INLINE;


   public:

      __stdcall subdivide_n_gen_wk2(thread_pool_type &p, operation_type &o, typename alg_wrap_t::work_complete_t &w, algo_work_heap_type const &, typename std::iterator_traits<in_iterator>::difference_type const, typename thread_pool_type::pool_type::size_type const) noexcept(true) FORCE_INLINE;


      /// Recursively call subdivide_n_gen_wk2::process(), on disjoint left and right-subsets (assuming even numbers of processors in the clique) of the input collection, until the number of work items generated is 2^n just larger than the number of threads in the pool, which implements a form of GSS(k) scheduling.

      /**

         As the subsets are disjoint inter-subset operations are effectively CRCW operations, whereas intra-subset operations are strictly EREW. This subdivision is valid according to Proposition 1.1 in section 1.2 and Brent's Theorem [1].


         [1] Casanova, H., Legrand, A., Robert, Y., "Parallel Algorithms", CRC Press, 2008.


         \see nonjoinable_buff

      */

      void __fastcall

      process() noexcept(false);


      constexpr bool __fastcall operator<(subdivide_n_gen_wk2 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      out_iterator out_begin;

      out_iterator const out_end;

   };


   /**

      This recursive process ensures that it takes O(log(n)) time to submit the work to the pool.


      Algorithm derived from [1].


      Note that if the wrapped collection implements CREW or EREW semantics, as safe_colln does, then this algorithm is an implementation CREW/EREW P-RAM model, therefore if the thread_pool is large enough, it implements an optimal schedule according to section 3.3 & Theorem 3.3 in [1].


      [1] Alan Gibbons, Wojciech Rytter, "Efficient Parallel Algorithms", Cambridge University Press, 1989.


      \see safe_colln

   */

   template<

      pool_traits::size_mode_t Ps,

      class TPB,  ///< The thread_pool type.

      class BinOp,   ///< The binary operation to be applied to each element in the input collection in some unspecified order.

      class Conts,   ///< The collections on which to apply the function, of size n.

      template<class, class> class Alg ///< The algorithm to apply.

   >

   class subdivide_n_gen_wk3 : private subdivide_n_gen_wk<

      Ps,

      TPB,

      alg_wrapper3<

         typename TPB::pool_traits_type,

         Alg<Conts, BinOp>,

         TPB::pool_traits_type::result_traits_

      >

   > {

   public:

      typedef subdivide_n_gen_wk<

         Ps,

         TPB,

         alg_wrapper3<

            typename TPB::pool_traits_type,

            Alg<Conts, BinOp>,

            TPB::pool_traits_type::result_traits_

         >

      > base_t;

      typedef typename base_t::container_type container_type;

      typedef typename base_t::in_iterator in_iterator;

      typedef typename base_t::operation_type operation_type;

      typedef typename base_t::result_type result_type;

      typedef typename base_t::alg_wrap_t alg_wrap_t;

      typedef typename alg_wrap_t::work_wrap::in2_iterator in2_iterator;

      typedef typename alg_wrap_t::work_wrap::out_iterator out_iterator;

      typedef typename base_t::thread_pool_type thread_pool_type;

      typedef typename base_t::pool_traits_type pool_traits_type;

      typedef typename base_t::os_traits os_traits;

      typedef typename base_t::ensure_wk_complete_t ensure_wk_complete_t;

      typedef typename base_t::algo_work_heap_type algo_work_heap_type;

      using base_t::compute_threads_per_clique;

      using base_t::compute_buffer_items;

      using base_t::memory_access_mode;


   private:

      __stdcall subdivide_n_gen_wk3(

         thread_pool_type &p,

         operation_type &o,

         typename alg_wrap_t::work_complete_t &w,

         algo_work_heap_type const &wh,

         in_iterator const &ib1,

         in_iterator const &ie1,

         in2_iterator const &ib2,

         in2_iterator const &ie2,

         out_iterator const &ob,

         out_iterator const &oe,

         typename thread_pool_type::pool_type::size_type const threads_per_clique) noexcept(true) FORCE_INLINE;


   public:

      __stdcall subdivide_n_gen_wk3(thread_pool_type &p, operation_type &o, typename alg_wrap_t::work_complete_t &w, algo_work_heap_type const &wh, typename std::iterator_traits<in_iterator>::difference_type const number_subranges, typename thread_pool_type::pool_type::size_type const cliques) noexcept(true) FORCE_INLINE;


      /// Recursively call subdivide_n_gen_wk3::process(), on disjoint left and right-subsets (assuming even numbers of processors in the clique) of the input collection, until the number of work items generated is 2^n just larger than the number of threads in the pool, which implements a form of GSS(k) scheduling.

      /**

         As the subsets are disjoint inter-subset operations are effectively CRCW operations, whereas intra-subset operations are strictly EREW. This subdivision is valid according to Proposition 1.1 in section 1.2 and Brent's Theorem [1].


         [1] Casanova, H., Legrand, A., Robert, Y., "Parallel Algorithms", CRC Press, 2008.


         \see nonjoinable_buff

      */

      void __fastcall

      process() noexcept(false);


      constexpr bool __fastcall operator<(subdivide_n_gen_wk3 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      in2_iterator in_begin2;

      in2_iterator const in_end2;

      out_iterator out_begin;

      out_iterator const out_end;

   };

   template<

      class TPB,  ///< The thread_pool type.

      class BinOp,   ///< The binary operation to be applied to each element in the input collection in some unspecified order.

      class Conts,   ///< The collections on which to apply the function, of size n.

      template<class, class> class Alg ///< The algorithm to apply.

   >

   class subdivide_n_gen_wk3<pool_traits::size_mode_t::infinite, TPB, BinOp, Conts, Alg> : private subdivide_n_gen_wk<

      pool_traits::size_mode_t::infinite,

      TPB,

      alg_wrapper3<

         typename TPB::pool_traits_type,

         Alg<Conts, BinOp>,

         TPB::pool_traits_type::result_traits_

      >

   > {

   public:

      typedef subdivide_n_gen_wk<

         pool_traits::size_mode_t::infinite,

         TPB,

         alg_wrapper3<

            typename TPB::pool_traits_type,

            Alg<Conts, BinOp>,

            TPB::pool_traits_type::result_traits_

         >

      > base_t;

      typedef typename base_t::container_type container_type;

      typedef typename base_t::in_iterator in_iterator;

      typedef typename base_t::operation_type operation_type;

      typedef typename base_t::result_type result_type;

      typedef typename base_t::alg_wrap_t alg_wrap_t;

      typedef typename alg_wrap_t::work_wrap::in2_iterator in2_iterator;

      typedef typename alg_wrap_t::work_wrap::out_iterator out_iterator;

      typedef typename base_t::thread_pool_type thread_pool_type;

      typedef typename base_t::pool_traits_type pool_traits_type;

      typedef typename base_t::os_traits os_traits;

      typedef typename base_t::ensure_wk_complete_t ensure_wk_complete_t;

      typedef typename base_t::algo_work_heap_type algo_work_heap_type;

      using base_t::compute_threads_per_clique;

      using base_t::compute_buffer_items;

      using base_t::memory_access_mode;


   private:

      __stdcall subdivide_n_gen_wk3(

         thread_pool_type &p,

         operation_type &o,

         typename alg_wrap_t::work_complete_t &w,

         in_iterator const &ib1,

         in_iterator const &ie1,

         in2_iterator const &ib2,

         in2_iterator const &ie2,

         out_iterator const &ob,

         out_iterator const &oe) noexcept(true) FORCE_INLINE;


   public:

      __stdcall subdivide_n_gen_wk3(thread_pool_type &p, operation_type &o, typename alg_wrap_t::work_complete_t &w, algo_work_heap_type const &, typename std::iterator_traits<in_iterator>::difference_type const, typename thread_pool_type::pool_type::size_type const) noexcept(true) FORCE_INLINE;


      /// Recursively call subdivide_n_gen_wk3::process(), on disjoint left and right-subsets (assuming even numbers of processors in the clique) of the input collection, until the number of work items generated is 2^n just larger than the number of threads in the pool, which implements a form of GSS(k) scheduling.

      /**

         As the subsets are disjoint inter-subset operations are effectively CRCW operations, whereas intra-subset operations are strictly EREW. This subdivision is valid according to Proposition 1.1 in section 1.2 and Brent's Theorem [1].


         [1] Casanova, H., Legrand, A., Robert, Y., "Parallel Algorithms", CRC Press, 2008.


         \see nonjoinable_buff

      */

      void __fastcall

      process() noexcept(false);


      constexpr bool __fastcall operator<(subdivide_n_gen_wk3 const &) const noexcept(true) FORCE_INLINE {

         return true;

      }


   private:

      in2_iterator in_begin2;

      in2_iterator const in_end2;

      out_iterator out_begin;

      out_iterator const out_end;

   };


} } } }


#include "subdivide_n_gen_wk_impl.hpp"


#endif