libjmmcg  release_579_6_g8cffd
A C++ library containing an eclectic mix of useful, advanced components.
parallel_algorithms.hpp
Go to the documentation of this file.
1 #ifndef LIBJMMCG_CORE_PRIVATE_PARALLEL_ALGORITHMS_HPP
2 #define LIBJMMCG_CORE_PRIVATE_PARALLEL_ALGORITHMS_HPP
3 
4 /******************************************************************************
5 ** Copyright © 2004 by J.M.McGuiness, coder@hussar.me.uk
6 **
7 ** This library is free software; you can redistribute it and/or
8 ** modify it under the terms of the GNU Lesser General Public
9 ** License as published by the Free Software Foundation; either
10 ** version 2.1 of the License, or (at your option) any later version.
11 **
12 ** This library is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ** Lesser General Public License for more details.
16 **
17 ** You should have received a copy of the GNU Lesser General Public
18 ** License along with this library; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
25 #include "../../core/config.h"
26 #include "../../core/shared_ptr.hpp"
27 
28 #include <boost/function.hpp>
29 
30 namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE { namespace ppd { namespace private_ {
31 
32  const char shuffle_str[]="shuffle";
33  const char lhs_merge_str[]="lhs_merge";
34  const char rhs_merge_str[]="rhs_merge";
35  const char combine1_str[]="combine1";
36  const char combine2_str[]="combine2";
37  const char ascending_lhs_str[]="ascending_lhs";
38  const char descending_rhs_str[]="descending_rhs";
39  const char merge_str[]="merge";
40  const char arg_str[]="arg";
41  const char lhs_str[]="lhs";
42  const char rhs_str[]="rhs";
43  const char unary_fun_str[]="unary_fun";
44  const char binary_fun_str[]="binary_fun";
45 
46  namespace alg_wk_wrap {
47 
48  template<class V>
49  struct pass_value {
50  typedef void result_type;
51  typedef V element_type;
52 
54 
55  explicit constexpr pass_value(element_type &r) FORCE_INLINE
56  : value(r) {
57  }
58 
59  constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
60  }
61 
62  constexpr bool __fastcall operator<(pass_value const &) const noexcept(true) FORCE_INLINE {
63  return true;
64  }
65 
66  template<class CoreWk>
67  static constexpr void FORCE_INLINE resize_output(CoreWk &) noexcept(true) {}
68  };
69 
70  /// Assist with implementing the parallel versions of the standard algorithms.
71  /**
72  \see for_each_reduce
73  */
74  template<class Op>
76  typedef void result_type;
77  typedef Op operation_type;
78 
79  /**
80  To assist in allowing compile-time computation of the algorithmic order of the threading model.
81  */
83 
84  /**
85  Need this to be non-const, in case pointer-types get stuffed in here, otherwise the compiler will complain (not unreasonably) about the const-ness.
86  */
88 
89  constexpr for_each_work_type() noexcept(true) FORCE_INLINE {
90  }
91  explicit constexpr for_each_work_type(operation_type const &o) noexcept(true) FORCE_INLINE
92  : op(o) {
93  }
94 
95  constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
96  }
97 
98  constexpr bool __fastcall operator<(for_each_work_type const &) const noexcept(true) FORCE_INLINE {
99  return true;
100  }
101 
102  template<class CoreWk>
103  static constexpr void FORCE_INLINE resize_output(CoreWk &) noexcept(true) {}
104  };
105 
106  /// Assist with implementing the parallel versions of the standard algorithms.
107  /**
108  \see for_each_reduce
109  */
110  template<class Op>
112  typedef void result_type;
113  typedef Op operation_type;
114 
115  /**
116  To assist in allowing compile-time computation of the algorithmic order of the threading model.
117  */
119 
120  /**
121  Need this to be non-const, in case pointer-types get stuffed in here, otherwise the compiler will complain (not unreasonably) about the const-ness.
122  */
124 
125  constexpr transform_work_type() noexcept(true) FORCE_INLINE {
126  }
127  explicit constexpr transform_work_type(operation_type const &o) noexcept(true) FORCE_INLINE
128  : op(o) {
129  }
130 
131  constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
132  }
133 
134  constexpr bool __fastcall operator<(transform_work_type const &) const noexcept(true) FORCE_INLINE {
135  return true;
136  }
137 
138  template<class CoreWk>
139  static void FORCE_INLINE resize_output(CoreWk &wk) noexcept(false) {
140  wk.resize_output(wk.work_complete()->containers().input1.size());
141  }
142  };
143 
144  /// Assist with implementing the parallel versions of the standard algorithms.
145  /**
146  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
147 
148  \see for_each_work_type
149  \see thread_base::for_each
150  \see thread_base::alg_wrapper1
151  */
152  template<class Conts, typename Fn>
154  typedef Fn operation_type;
156  typedef Conts containers_type;
158 
159  /**
160  To assist in allowing compile-time computation of the algorithmic order of the threading model.
161  */
163 
165  : beg(b), end(e), fn(w) {
166  }
167 
168  void __fastcall process() const FORCE_INLINE {
169  std::for_each(beg, end, fn.input().op);
170  }
171 
172  constexpr bool __fastcall operator<(for_each_reduce const &) const noexcept(true) FORCE_INLINE {
173  return true;
174  }
175 
176  private:
177  const in_iterator beg, end;
178  operation_type &fn;
179  };
180 
181  /// Assist with implementing the parallel versions of the standard algorithms.
182  /**
183  \see count_if_reduce
184  \see thread_base::count_if_t
185  \see thread_base::alg_wrapper1
186  */
187  template<typename Pred, typename CTR>
189  typedef CTR result_type;
190  typedef Pred operation_type;
191 
192  /**
193  To assist in allowing compile-time computation of the algorithmic order of the threading model.
194  */
195  static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=result_type::memory_access_mode;
196 
198 
199  explicit constexpr countor_work_type(operation_type const &p) noexcept(true) FORCE_INLINE
200  : pred(p) {
201  }
202 
203  constexpr void __fastcall process(result_type &) noexcept(true) FORCE_INLINE {
204  }
205 
206  constexpr bool __fastcall operator<(countor_work_type const &) const noexcept(true) FORCE_INLINE {
207  return true;
208  }
209 
210  template<class T>
211  static constexpr void FORCE_INLINE resize_output(T const &) noexcept(true) {}
212  };
213 
214  /// Assist with implementing the parallel versions of the standard algorithms.
215  /**
216  Implements a reduction operation.
217  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it..
218 
219  \see countor_work_type
220  \see thread_base::count_if_t
221  \see thread_base::alg_wrapper1
222  */
223  template<class Conts, typename CtrPred>
225  typedef CtrPred operation_type;
227  typedef Conts containers_type;
229 
230  /**
231  To assist in allowing compile-time computation of the algorithmic order of the threading model.
232  */
237  );
238 
239  constexpr count_if_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) noexcept(true) FORCE_INLINE
240  : beg(b), end(e), fn(w) {
241  }
242 
243  void __fastcall process() FORCE_INLINE;
244 
245  constexpr bool __fastcall operator<(count_if_reduce const &) const noexcept(true) FORCE_INLINE {
246  return true;
247  }
248 
249  private:
250  const in_iterator beg, end;
251  operation_type &fn;
252  };
253 
254  /// Assist with implementing the parallel versions of the standard algorithms.
255  /**
256  \see accumulate_reduce
257  */
258  template<typename BinOp, typename Acc>
260  typedef Acc result_type;
261  typedef BinOp operation_type;
262 
263  /**
264  To assist in allowing compile-time computation of the algorithmic order of the threading model.
265  */
266  static constexpr ppd::generic_traits::memory_access_modes memory_access_mode=result_type::memory_access_mode;
267 
270 
271  constexpr accumulator_work_type(operation_type const &p, result_type &&i) noexcept(true) FORCE_INLINE
272  : binop(p), init(std::forward<result_type>(i)) {
273  }
274 
275  constexpr void process(result_type &) noexcept(true) FORCE_INLINE {
276  }
277 
278  constexpr bool __fastcall operator<(accumulator_work_type const &) const noexcept(true) FORCE_INLINE {
279  return true;
280  }
281 
282  template<class T>
283  static void FORCE_INLINE resize_output(T const &) noexcept(true) {}
284  };
285 
286  /// Assist with implementing the parallel versions of the standard algorithms.
287  /**
288  Implements a reduction operation.
289  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
290 
291  \see accumulator_work_type
292  \see thread_base::accumulate_t
293  \see thread_base::alg_wrapper1
294  */
295  template<class Conts, typename Fn>
297  typedef Fn operation_type;
299  typedef Conts containers_type;
301 
302  /**
303  To assist in allowing compile-time computation of the algorithmic order of the threading model.
304  */
309  );
310 
311  constexpr __stdcall accumulate_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE
312  : beg(b), end(e), fn(w) {
313  }
314 
315  void __fastcall process() FORCE_INLINE;
316 
317  constexpr bool __fastcall operator<(accumulate_reduce const &) const noexcept(true) FORCE_INLINE {
318  return true;
319  }
320 
321  private:
322  const in_iterator beg, end;
323  operation_type &fn;
324  };
325 
326  /// Assist with implementing the parallel versions of the standard algorithms.
327  /**
328  Note how, if the input collection is a collection of unique elements, only one item will write to the output, so no need to implement any locking on the output. Also note that once the item is found it is implementation-defined how far the search continues within the remaining range.
329  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
330 
331  \todo It would be nice, if once found, this could cancel any pending tasks.
332 
333  \see countor_work_type
334  \see thread_base::find_if_t
335  \see thread_base::alg_wrapper1
336  */
337  template<class Conts, typename CtrPred>
338  struct find_if_reduce {
339  typedef CtrPred operation_type;
341  typedef Conts containers_type;
343 
344  /**
345  To assist in allowing compile-time computation of the algorithmic order of the threading model.
346  */
351  );
352 
353  constexpr __stdcall find_if_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE;
354 
355  void __fastcall process() FORCE_INLINE;
356 
357  constexpr bool __fastcall operator<(find_if_reduce const &) const noexcept(true) FORCE_INLINE {
358  return true;
359  }
360 
361  private:
362  const in_iterator beg, end;
363  operation_type &fn;
364  };
365 
366  /// Assist with implementing the parallel versions of the standard algorithms.
367  /**
368  Implements a reduction operation.
369  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
370 
371  \see accumulator_work_type
372  \see thread_base::max_element_t
373  \see thread_base::alg_wrapper1
374  */
375  template<class Conts, typename Fn>
377  public:
378  typedef Fn operation_type;
380  typedef Conts containers_type;
383 
384  /**
385  To assist in allowing compile-time computation of the algorithmic order of the threading model.
386  */
391  );
392 
393  constexpr __stdcall max_element_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE;
394 
395  void __fastcall process() FORCE_INLINE;
396 
397  constexpr bool __fastcall operator<(max_element_reduce const &) const noexcept(true) FORCE_INLINE {
398  return true;
399  }
400 
401  private:
402  class max;
403 
404  const in_iterator beg, end;
405  operation_type &fn;
406  };
407 
408  /// Assist with implementing the parallel versions of the standard algorithms.
409  /**
410  Implements a reduction operation.
411  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
412 
413  \see accumulator_work_type
414  \see thread_base::min_element_t
415  \see thread_base::alg_wrapper1
416  */
417  template<class Conts, typename Fn>
419  public:
420  typedef Fn operation_type;
422  typedef Conts containers_type;
425 
426  /**
427  To assist in allowing compile-time computation of the algorithmic order of the threading model.
428  */
433  );
434 
435  constexpr __stdcall min_element_reduce(in_iterator const &b, in_iterator const &e, operation_type &w) FORCE_INLINE;
436 
437  void __fastcall process() FORCE_INLINE;
438 
439  constexpr bool __fastcall operator<(min_element_reduce const &) const noexcept(true) FORCE_INLINE {
440  return true;
441  }
442 
443  private:
444  class min;
445 
446  const in_iterator beg, end;
447  operation_type &fn;
448  };
449 
450  /// Assist with implementing the parallel versions of the standard algorithms.
451  /**
452  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
453 
454  \see for_each_work_type
455  \see thread_base::transform_t
456  \see thread_base::alg_wrapper2
457  */
458  template<class Conts, typename UniOp>
460  typedef UniOp operation_type;
462  typedef Conts containers_type;
465 
466  /**
467  To assist in allowing compile-time computation of the algorithmic order of the threading model.
468  */
470 
471  constexpr __stdcall transform_reduce(in_iterator const &ib, in_iterator const &ie, out_iterator const &o, operation_type const &w) FORCE_INLINE
472  : in_beg(ib), in_end(ie), out(o), fn(w) {
473  }
474 
475  void __fastcall process() FORCE_INLINE {
476  std::transform(in_beg, in_end, out, fn.input().op);
477  }
478 
479  constexpr bool __fastcall operator<(transform_reduce const &) const noexcept(true) FORCE_INLINE {
480  return true;
481  }
482 
483  private:
484  const in_iterator in_beg, in_end;
485  out_iterator out;
486  const operation_type &fn;
487  };
488 
489  /// Assist with implementing the parallel versions of the standard algorithms.
490  /**
491  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
492 
493  \see for_each_work_type
494  \see thread_base::transform2_t
495  \see thread_base::alg_wrapper2
496  */
497  template<class Conts, typename BinOp>
499  typedef BinOp operation_type;
501  typedef Conts containers_type;
505 
506  /**
507  To assist in allowing compile-time computation of the algorithmic order of the threading model.
508  */
510 
511  constexpr __stdcall transform2_reduce(in_iterator const &i1b, in_iterator const &i1e, in2_iterator const &i2b, out_iterator const &o, operation_type const &w) FORCE_INLINE
512  : in1_beg(i1b), in1_end(i1e), in2_beg(i2b), iter_out(o), fn(w) {
513  }
514 
515  void __fastcall process() FORCE_INLINE {
516  std::transform(in1_beg, in1_end, in2_beg, iter_out, fn.input().op);
517  }
518 
519  constexpr bool __fastcall operator<(transform2_reduce const &) const noexcept(true) FORCE_INLINE {
520  return true;
521  }
522 
523  private:
524  const in_iterator in1_beg, in1_end;
525  const in2_iterator in2_beg;
526  out_iterator iter_out;
527  const operation_type &fn;
528  };
529 
530  /// Assist with implementing the parallel versions of the standard algorithms.
531  /**
532  \see reverse_reduce
533  */
534  template<class Colln>
536  typedef std::pointer_to_binary_function<typename Colln::container_type::iterator, typename Colln::container_type::iterator, void> operation_type;
540 
542 
543  constexpr reverse_work_type() noexcept(true) FORCE_INLINE
545  }
546  constexpr reverse_work_type(typename Colln::container_type::iterator cb, typename Colln::container_type::iterator ce) noexcept(true) FORCE_INLINE
547  : binop(&std::iter_swap<first_argument_type, second_argument_type>), cont_beg_(cb), cont_end_(ce) {
548  }
549 
550  constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
551  }
552 
553  constexpr typename Colln::container_type::iterator __fastcall cont_beg() const noexcept(true) FORCE_INLINE {
554  return cont_beg_;
555  }
556  constexpr typename Colln::container_type::iterator __fastcall cont_end() const noexcept(true) FORCE_INLINE {
557  return cont_end_;
558  }
559 
560  constexpr bool __fastcall operator<(reverse_work_type const &) const noexcept(true) FORCE_INLINE {
561  return true;
562  }
563 
564  template<class CoreWk>
565  static constexpr void FORCE_INLINE resize_output(CoreWk &) noexcept(true) {
566  }
567 
568  private:
569  typename Colln::container_type::iterator cont_beg_;
570  typename Colln::container_type::iterator cont_end_;
571  };
572  /// Assist with implementing the parallel versions of the standard algorithms.
573  /**
574  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
575 
576  \see reverse_work_type
577  \see thread_base::reverse_t
578  \see thread_base::alg_wrapper1
579  */
580  template<class Conts, typename Fn>
581  struct reverse_reduce {
582  typedef Fn operation_type;
584  typedef Conts containers_type;
586 
587  /**
588  To assist in allowing compile-time computation of the algorithmic order of the threading model.
589  */
591 
592  constexpr __stdcall reverse_reduce(in_iterator const &bs, in_iterator const &es, operation_type const &w) FORCE_INLINE;
593 
594  void __fastcall process() const FORCE_INLINE;
595 
596  constexpr bool __fastcall operator<(reverse_reduce const &) const noexcept(true) FORCE_INLINE {
597  return true;
598  }
599 
600  private:
601  const in_iterator beg_subrange, end_subrange;
602  const operation_type &fn;
603  const typename std::iterator_traits<in_iterator>::difference_type cont_size;
604  };
605 
606  /// Assist with implementing the parallel versions of the standard algorithms.
607  /**
608  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
609 
610  \see pass_value
611  \see thread_base::fill_n
612  \see thread_base::alg_wrapper1
613  */
614  template<typename Conts, class UniOp>
615  struct fill_n_reduce {
616  typedef UniOp operation_type;
618  typedef Conts containers_type;
620 
621  /**
622  To assist in allowing compile-time computation of the algorithmic order of the threading model.
623  */
625 
626  constexpr __stdcall fill_n_reduce(in_iterator b, in_iterator e, operation_type const &op) FORCE_INLINE
627  : beg(b), end(e), val(op) {
628  }
629 
630  void __fastcall process() const FORCE_INLINE;
631 
632  constexpr bool __fastcall operator<(fill_n_reduce const &) const noexcept(true) FORCE_INLINE {
633  return true;
634  }
635 
636  private:
637  in_iterator beg, end;
638  operation_type const &val;
639  };
640 
641  /// Assist with implementing the parallel versions of the standard algorithms.
642  /**
643  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
644 
645  \see pass_value
646  \see thread_base::fill
647  \see thread_base::alg_wrapper1
648  */
649  template<typename Conts, class UniOp>
650  struct fill_reduce {
651  typedef UniOp operation_type;
653  typedef Conts containers_type;
655 
656  /**
657  To assist in allowing compile-time computation of the algorithmic order of the threading model.
658  */
660 
661  constexpr __stdcall fill_reduce(in_iterator b, in_iterator e, operation_type const &op) FORCE_INLINE
662  : beg(b), end(e), val(op) {
663  }
664 
665  void __fastcall process() const FORCE_INLINE;
666 
667  constexpr bool __fastcall operator<(fill_reduce const &) const noexcept(true) FORCE_INLINE {
668  return true;
669  }
670 
671  private:
672  in_iterator beg, end;
673  operation_type const &val;
674  };
675 
676  /// Assist with implementing the parallel versions of the standard algorithms.
677  /**
678  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
679 
680  \see for_each_work_type
681  \see thread_base::swap_ranges_t
682  \see thread_base::alg_wrapper1
683  */
684  template<class Conts, typename Pred>
686  typedef Pred operation_type;
688  typedef Conts containers_type;
691 
692  /**
693  To assist in allowing compile-time computation of the algorithmic order of the threading model.
694  */
696 
697  constexpr __stdcall swap_ranges_reduce(out_iterator const &b1, in_iterator const &e1, out_iterator const &b2, operation_type const &f) FORCE_INLINE
698  : begin1(b1), end1(e1), begin2(b2), fn(f) {
699  }
700 
701  void __fastcall process() FORCE_INLINE;
702 
703  constexpr bool __fastcall operator<(swap_ranges_reduce const &) const noexcept(true) FORCE_INLINE {
704  return true;
705  }
706 
707  private:
708  out_iterator begin1;
709  const in_iterator end1;
710  out_iterator begin2;
711  operation_type const &fn;
712  };
713 
714  /// Assist with implementing the parallel versions of the standard algorithms.
715  /**
716  \see merge_reduce
717  */
718  template<class Comp, class TPB>
720  typedef void result_type;
721  typedef Comp operation_type;
722  typedef TPB thread_pool_type;
723 
726 
727  constexpr __stdcall merge_work_type(operation_type const &o, thread_pool_type &p) noexcept(true) FORCE_INLINE
728  : comp(o), pool(p) {
729  }
730 
731  constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
732  }
733 
734  constexpr bool __fastcall operator<(merge_work_type const &) const noexcept(true) FORCE_INLINE {
735  return true;
736  }
737 
738  template<class CoreWk>
739  static void resize_output(CoreWk &wk) noexcept(false) FORCE_INLINE;
740  };
741  /*
742  These bits are so damn ugly I want to throw up, but that's optimisations for you....
743  They can't be members of batchers_bitonic_merge_reduce because of the template-template parameter to the batchers_bitonic_merge_reduce::merge class, also which needs to be specified in the sort algorithm.
744  */
745  /// The direction of the resultant output sequence from a merge or sort opration.
746  enum class direction {
747  ascending,
748  descending
749  };
750  /// The comparator operator to be used within the merge or sort operation.
751  template<direction Dir, class out_iterator, class Closure>
752  class swap_pred : public std::binary_function<typename out_iterator::value_type, typename out_iterator::value_type, bool> {
753  public:
754  static constexpr direction dir=Dir;
755 
756  explicit constexpr swap_pred(Closure const &c) noexcept(true) FORCE_INLINE
757  : arg(c) {}
758 
759  bool __fastcall operator()(typename out_iterator::value_type const &lhs, typename out_iterator::value_type const &rhs) const noexcept(noexcept(std::declval<typename Closure::argument_type>().comp(lhs, rhs))) FORCE_INLINE;
760 
761  private:
762  typename Closure::argument_type const &arg;
763  };
764  /// Merge operations are predicated upon the two input queues being sorted, so we can improve the algorithmic complexity by making use of std::merge() to merge the final sub-ranges in O(n/p) time. Note that the input is a bitonic sub-range, which makes this algorithm more complex.
765  template<class Iter, class operation_type, direction LHSDir, direction RHSDir, class Dummy>
767  static constexpr direction lhs_dir=LHSDir;
768  static constexpr direction rhs_dir=RHSDir;
769  typedef Iter out_iterator;
771  typedef swap_pred<rhs_dir, out_iterator, operation_type> swapper_t;
772  typedef boost::function<void (out_iterator, out_iterator, std::binary_negate<swapper_t>)> sort_fn_t;
775  typedef std::binary_negate<swapper_t> arg3_type;
776 
777  /**
778  \todo What about std::inplace_merge()?
779 
780  \see std::merge()
781  */
782  static void __fastcall process(Dummy const &, out_iterator const begin, out_iterator const end, operation_type const &fn) noexcept(false) FORCE_INLINE;
783  };
784  template<class Iter, class operation_type, direction LHSDir, direction RHSDir, class SortFn>
786  static constexpr direction lhs_dir=LHSDir;
787  static constexpr direction rhs_dir=RHSDir;
788  typedef Iter out_iterator;
790  typedef swap_pred<lhs_dir, out_iterator, operation_type> swapper_t;
791  typedef SortFn sort_fn_t;
792  typedef typename sort_fn_t::arg1_type arg1_type;
793  typedef typename sort_fn_t::arg2_type arg2_type;
794  typedef typename sort_fn_t::arg3_type arg3_type;
795 
796  static void __fastcall process(sort_fn_t const &sfn, out_iterator const begin, out_iterator const end, operation_type const &fn) noexcept(false) FORCE_INLINE {
797  sfn(begin, end, arg3_type(swapper_t(fn)));
798  }
799  };
800  /// Assist with implementing the parallel versions of the standard algorithms.
801  /**
802  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
803 
804  [1] <a href="http://www.cs.uoregon.edu/research/paraducks/papers/psc94.d/node2.html"/>
805 
806  \see merge_work_type
807  \see thread_base::merge_t
808  \see thread_base::alg_wrapper2
809  */
810  template<class Conts, typename Comp>
812  public:
813  typedef Comp operation_type;
817  typedef typename thread_pool_type::joinable joinable;
819  typedef Conts containers_type;
824  template<
825  direction LHSDir,
826  direction RHSDir,
827  template<class, class, direction, direction, class> class FinalSort ///< Ugly variation point for introducing an optimisation. Merging is predicated upon sorted inputs, unlike sort, so merging the final sub-ranges can be done in O(n/p) time, which is faster than O(nlog(n)/p) time, and noticeable in testing.
828  >
829  class merge;
830 
831  private:
832  /**
833  Make use of std::merge() to merge the sub-collections in O(n/p) time, which we can do, as the input collections must be sorted as a precondition for the algorithm.
834 
835  \see merge_final_sorter
836  */
837  typedef merge<direction::ascending, direction::ascending, merge_final_sorter> init_merger_t;
838  typedef typename init_merger_t::sort_fn_t sort_fn_t;
839 
840  void combine();
841 
842  public:
843  /**
844  To assist in allowing compile-time computation of the algorithmic order of the threading model.
845  */
851  );
852 
853  /**
854  Complexity: O(log^2(O(sfn)/p+log(p)))
855  */
857  virtual ~batchers_bitonic_merge_reduce() noexcept(true) FORCE_INLINE {}
858 
859  /**
860  If std::stable_sort() is used then this is on average O(n.log(n)), with enough memory.
861  */
862  void __fastcall process();
863 
864  constexpr bool __fastcall operator<(batchers_bitonic_merge_reduce const &) const noexcept(true) FORCE_INLINE {
865  return true;
866  }
867 
868  private:
869  containers_type &conts;
870  operation_type const &fn;
871  cliques::element_type const clique;
872  };
873 
874  /// Assist with implementing the parallel versions of the standard algorithms.
875  /**
876  \see sort_reduce
877  */
878  template<class Comp, class TPB>
879  struct sort_work_type {
880  typedef void result_type;
881  typedef Comp operation_type;
882  typedef TPB thread_pool_type;
883 
886 
887  constexpr __stdcall sort_work_type(operation_type const &o, thread_pool_type &p) noexcept(true) FORCE_INLINE
888  : comp(o), pool(p) {
889  }
890 
891  constexpr void __fastcall process() noexcept(true) FORCE_INLINE {
892  }
893 
894  constexpr bool __fastcall operator<(sort_work_type const &) const noexcept(true) FORCE_INLINE {
895  return true;
896  }
897 
898  template<class CoreWk>
899  static constexpr void FORCE_INLINE resize_output(CoreWk &) noexcept(true) {
900  }
901  };
902  /// Assist with implementing the parallel versions of the standard algorithms.
903  /**
904  Note that this operation should operate on an output range that no-other thread should modify, i.e. that range should have at least a read-lock taken on it.
905 
906  [1] <a href="http://www.iti.fh-flensburg.de/lang/algorithmen/sortieren/bitonic/bitonicen.htm"/>
907 
908  \see sort_work_type
909  \see thread_base::sort_t
910  \see thread_base::alg_wrapper1
911  */
912  template<typename Conts, class Comp>
914  typedef Comp operation_type;
918  typedef Conts containers_type;
921  template<direction dir>
922  class sort;
924 
925  /**
926  To assist in allowing compile-time computation of the algorithmic order of the threading model.
927  */
929 
930  constexpr bitonic_sort_reduce(containers_type &c, operation_type const &op, cliques::element_type const cl) noexcept(true) FORCE_INLINE;
931  virtual ~bitonic_sort_reduce() noexcept(true) FORCE_INLINE {
932  }
933 
934  void __fastcall process() const FORCE_INLINE;
935 
936  constexpr bool __fastcall operator<(bitonic_sort_reduce const &) const noexcept(true) FORCE_INLINE {
937  return true;
938  }
939 
940  private:
941  containers_type &cont;
942  operation_type const &fn;
943  cliques::element_type const clique;
944  };
945 
946  }
947 
948  template<class T>
950  typedef T value_type;
951 
952  /**
953  To assist in allowing compile-time computation of the algorithmic order of the threading model.
954  */
956 
958 
959  constexpr __stdcall stl_functor_result_type() noexcept(true) FORCE_INLINE {
960  }
961  constexpr __stdcall stl_functor_result_type(value_type &&r) noexcept(true) FORCE_INLINE
962  : result(std::forward<value_type>(r)) {
963  }
964  /// Note the use of an automatic conversion here.
965  constexpr __fastcall operator value_type const &() const noexcept(true) FORCE_INLINE {
966  return result;
967  }
968 
969  bool __fastcall operator<(stl_functor_result_type const &rhs) const noexcept(true) FORCE_INLINE {
970  return result<rhs.result;
971  }
972  };
973 
974  /// An adaptor to allow STL unary functions to be operated upon in the thread_pool.
975  /**
976  Note that the input is evaluated by transferring it into the pool, and the execution_context that holds the result has an automatic conversion to the result_type.
977  */
978  template<class ArgT, class UniFn, class PT>
980  public:
981  typedef PT pool_type;
982  typedef UniFn operation_type;
984  typedef ArgT argument_type;
985 
986  private:
987  struct arg_int_work_type;
988 
989  struct arg_context_t;
990 
991  using shared_ptr_t=shared_ptr<arg_context_t, api_lock_traits<platform_api, sequential_mode>>;
992 
993  public:
994  /**
995  To assist in allowing compile-time computation of the algorithmic order of the threading model.
996  */
998  shared_ptr_t::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
1001  );
1002 
1003  __stdcall unary_fun_work_type(argument_type &&a, operation_type const &o, pool_type &pool) noexcept(false) FORCE_INLINE;
1004 
1005  void __fastcall process(result_type &r) FORCE_INLINE;
1006 
1007  bool __fastcall operator<(unary_fun_work_type const &rhs) const noexcept(true) FORCE_INLINE;
1008 
1009  private:
1010  operation_type op;
1011  /// \todo This is done to prevent copying the execution contexts. If we have a transfer ctor, then we can avoid the copy. But we need to consider the fact that if the work has completed mutation, would this have a problem if the transfer is also occurring.
1012  shared_ptr_t arg_cxt;
1013  };
1014 
1015  /// An adaptor to allow STL binary functions to be operated upon in the thread_pool.
1016  /**
1017  Note that the inputs are evaluated by transferring them into the pool, and the execution_context that holds the result has an automatic conversion to the result_type.
1018  */
1019  template<class ArgT1, class ArgT2, class BinFn, class PT>
1021  public:
1022  typedef PT pool_type;
1023  typedef BinFn operation_type;
1025  typedef ArgT1 first_argument_type;
1026  typedef ArgT2 second_argument_type;
1027 
1028  private:
1029  template<class Arg>
1030  struct arg_int_work_type;
1031 
1032  struct arg_contexts_t;
1033 
1034  using shared_ptr_t=shared_ptr<arg_contexts_t, api_lock_traits<platform_api, sequential_mode>>;
1035 
1036  public:
1037  /**
1038  To assist in allowing compile-time computation of the algorithmic order of the threading model.
1039  */
1041  shared_ptr_t::memory_access_mode==ppd::generic_traits::memory_access_modes::crew_memory_access
1044  );
1045 
1046  __stdcall binary_fun_work_type(first_argument_type &&lhs, second_argument_type &&rhs, operation_type const &o, pool_type &pool) noexcept(false) FORCE_INLINE;
1047 
1048  void __fastcall process(result_type &r) FORCE_INLINE;
1049 
1050  bool __fastcall operator<(binary_fun_work_type const &rhs) const noexcept(true) FORCE_INLINE;
1051 
1052  template<class Arg1> constexpr bool __fastcall FORCE_INLINE
1053  operator<(Arg1 const &) const noexcept(true) {
1054  return true;
1055  }
1056 
1057  private:
1058  operation_type op;
1059  /// \todo This is done to prevent copying the execution contexts. If we have a transfer ctor, then we can avoid the copy. But we need to consider the fact that if the work has completed mutation, would this have a problem if the transfer is also occurring.
1060  shared_ptr_t arg_cxts;
1061  };
1062 
1063 } } } }
1064 
1066 
1067 #endif