libjmmcg  release_579_6_g8cffd
A C++ library containing an eclectic mix of useful, advanced components.
subdivide_n_gen_wk_impl.hpp
Go to the documentation of this file.
1 /******************************************************************************
2 ** Copyright © 2010 by J.M.McGuiness, coder@hussar.me.uk
3 **
4 ** This library is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU Lesser General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2.1 of the License, or (at your option) any later version.
8 **
9 ** This library is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ** Lesser General Public License for more details.
13 **
14 ** You should have received a copy of the GNU Lesser General Public
15 ** License along with this library; if not, write to the Free Software
16 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 
19 namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE { namespace ppd { namespace private_ {
20 
21 template<class P, class Collns> inline
22 typename unlock_collections<P, Collns>::atomic_state_type
23 unlock_collections<P, Collns>::set() noexcept(true) {
24  if (num_tasks_spawned.get()) {
25  --num_tasks_spawned;
26  }
27  if (num_tasks_spawned>0) {
28  return pool_traits_type::os_traits::lock_traits::atom_unset;
29  } else {
30  assert(num_tasks_spawned==0);
31  containers_.unlock();
32  return pool_traits_type::os_traits::lock_traits::atom_set;
33  }
34 }
35 
36 template<pool_traits::size_mode_t Ps, class TPB, class Alg>
37 struct subdivide_n_gen_wk<Ps, TPB, Alg>::algo_work_heap_type {
38  typedef unsigned char * buffer_type;
39 
41  std::ptrdiff_t const size; ///< This is units of items, not bytes.
42  std::size_t const stride; ///< In bytes.
43 
44  template<class T> FORCE_INLINE
45  algo_work_heap_type(T* const b, std::ptrdiff_t const sz, std::size_t const str) noexcept(true)
46  : buffer(reinterpret_cast<buffer_type const>(b)), size(sz), stride(str) {
47  assert(buffer);
48  assert(size>=1);
49  assert(stride>=1);
50  }
51 };
52 
53 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
55 subdivide_n_gen_wk<Ps, TPB, Alg>::compute_threads_per_clique(typename thread_pool_type::pool_type::size_type num_threads, typename thread_pool_type::pool_type::size_type const cliques) noexcept(true) {
56  const typename thread_pool_type::pool_type::size_type threads=std::floor(static_cast<double>(num_threads)/cliques+0.5);
57  assert(threads>=1);
58  assert(threads<=num_threads);
59  return threads;
60 }
61 
62 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
64 subdivide_n_gen_wk<Ps, TPB, Alg>::compute_buffer_items(typename thread_pool_type::pool_type::size_type const num_threads_per_clique) noexcept(true) {
65  typename thread_pool_type::pool_type::size_type evens=0, odds=0;
66  for (std::size_t i=0; i<num_threads_per_clique; ++i) {
67  if (i&1) {
68  odds+=i;
69  } else {
70  evens+=i;
71  }
72  }
73  const typename thread_pool_type::pool_type::size_type num_items=static_cast<typename thread_pool_type::pool_type::size_type>(
74  std::floor(
75  (static_cast<double>(num_threads_per_clique*(evens*2+odds*3))/2)
76  +0.5
77  )
78  )+num_threads_per_clique;
79  assert(num_items>0);
80  return num_items;
81 }
82 
83 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
84 typename subdivide_n_gen_wk<Ps, TPB, Alg>::in_iterator
85 subdivide_n_gen_wk<Ps, TPB, Alg>::compute_end(typename std::iterator_traits<in_iterator>::difference_type const number_subranges) const noexcept(true) {
86  if (number_subranges==1) {
87  return all_done.containers().input1.end();
88  } else {
89  double const end_subrange_dist=double(all_done.containers().input1.size())/number_subranges;
90  return std::next(all_done.containers().input1.begin(), static_cast<typename std::iterator_traits<in_iterator>::difference_type>(end_subrange_dist));
91  }
92 }
93 
94 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
95 typename subdivide_n_gen_wk<Ps, TPB, Alg>::container_type::size_type
96 subdivide_n_gen_wk<Ps, TPB, Alg>::num_wk_items_spawned() const noexcept(true) {
97  typename container_type::size_type const sz=all_done.containers().cont1_in.size();
98  return std::min(sz, static_cast<typename container_type::size_type>(sz/pool.batch_size(sz)));
99 }
100 
101 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
102 std::ptrdiff_t
103 subdivide_n_gen_wk<Ps, TPB, Alg>::odd_third_buff_range() const noexcept(true) {
104  const std::ptrdiff_t range=(work_heap.size-3)/3;
105  assert(range>0);
106  assert(range<work_heap.size);
107  return range;
108 }
109 
110 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
111 std::ptrdiff_t
112 subdivide_n_gen_wk<Ps, TPB, Alg>::even_half_buff_range() const noexcept(true) {
113  const std::ptrdiff_t range=(work_heap.size-2)/2;
114  assert(range>0);
115  assert(range<work_heap.size);
116  return range;
117 }
118 
119 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
120 typename subdivide_n_gen_wk<Ps, TPB, Alg>::algo_work_heap_type::buffer_type
121 subdivide_n_gen_wk<Ps, TPB, Alg>::first_buff_part() const noexcept(true) {
122  typename algo_work_heap_type::buffer_type const ret=work_heap.buffer+work_heap.stride;
123  assert(ret<=(work_heap.buffer+work_heap.stride*work_heap.size));
124  return ret;
125 }
126 
127 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
128 typename subdivide_n_gen_wk<Ps, TPB, Alg>::algo_work_heap_type::buffer_type
129 subdivide_n_gen_wk<Ps, TPB, Alg>::even_second_buff_part() const noexcept(true) {
130  typename algo_work_heap_type::buffer_type const ret=work_heap.buffer+work_heap.stride*work_heap.size/2;
131  assert(ret<=(work_heap.buffer+work_heap.stride*work_heap.size));
132  return ret;
133 }
134 
135 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
136 typename subdivide_n_gen_wk<Ps, TPB, Alg>::algo_work_heap_type::buffer_type
137 subdivide_n_gen_wk<Ps, TPB, Alg>::odd_second_buff_part() const noexcept(true) {
138  typename algo_work_heap_type::buffer_type const ret=work_heap.buffer+work_heap.stride*work_heap.size/3;
139  assert(ret<=(work_heap.buffer+work_heap.stride*work_heap.size));
140  return ret;
141 }
142 
143 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
144 typename subdivide_n_gen_wk<Ps, TPB, Alg>::algo_work_heap_type::buffer_type
145 subdivide_n_gen_wk<Ps, TPB, Alg>::odd_third_buff_part() const noexcept(true) {
146  typename algo_work_heap_type::buffer_type const ret=work_heap.buffer+work_heap.stride*2*work_heap.size/3;
147  assert(ret<=(work_heap.buffer+work_heap.stride*work_heap.size));
148  return ret;
149 }
150 
151 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
153  thread_pool_type &p,
154  operation_type &f,
155  typename alg_wrap_t::work_complete_t &w,
156  algo_work_heap_type const &wh) noexcept(true)
157 :
158  // The number of threads in to pool may not be perfectly divisible into the requested number of cliques, so ensure that the number of tasks to be generated for a clique is the next highest integer.
160  work_heap(wh),
161  pool(p),
162  fn(f),
163  all_done(w),
164  begin(all_done.containers().input1.begin()),
165  end(all_done.containers().input1.end()) {
166  // Ensure that a sub-task that actually processes work can't mark the whole task complete before any other sub-tasks have completed their processing.
167  all_done.add_a_task();
168 }
169 
170 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
172  thread_pool_type &p,
173  operation_type &f,
174  typename alg_wrap_t::work_complete_t &w,
175  algo_work_heap_type const &wh,
176  typename std::iterator_traits<in_iterator>::difference_type const number_subranges,
177  typename thread_pool_type::pool_type::size_type const cliques) noexcept(true)
178 :
179  // The number of threads in to pool may not be perfectly divisible into the requested number of cliques, so ensure that the number of tasks to be generated for a clique is the next highest integer.
181  work_heap(wh),
182  pool(p),
183  fn(f),
184  all_done(w),
185  begin(all_done.containers().input1.begin()),
186  end(compute_end(number_subranges)) {
187  assert(threads_per_clique>=1);
188  assert(threads_per_clique<=pool.pool_size());
189  // Ensure that a sub-task that actually processes work can't mark the whole task complete before any other sub-tasks have completed their processing.
190  all_done.add_a_task();
191 }
192 
193 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
195  thread_pool_type &p,
196  operation_type &f,
197  typename alg_wrap_t::work_complete_t &w,
198  algo_work_heap_type const &wh,
199  in_iterator const &b,
200  in_iterator const &e) noexcept(true)
202  work_heap(wh),
203  pool(p),
204  fn(f),
205  all_done(w),
206  begin(b),
207  end(e) {
208  // Ensure that a sub-task that actually processes work can't mark the whole task complete before any other sub-tasks have completed their processing.
209  all_done.add_a_task();
210 }
211 
212 template<pool_traits::size_mode_t Ps, class TPB, class Alg> inline
214  thread_pool_type &p,
215  operation_type &f,
216  typename alg_wrap_t::work_complete_t &w,
217  algo_work_heap_type const &wh,
218  in_iterator const &b,
219  in_iterator const &e,
220  typename thread_pool_type::pool_type::size_type const t_per_c) noexcept(true)
221 : threads_per_clique(t_per_c),
222  work_heap(wh),
223  pool(p),
224  fn(f),
225  all_done(w),
226  begin(b),
227  end(e) {
228  assert(threads_per_clique>=1);
229  assert(threads_per_clique<=pool.pool_size());
230  // Ensure that a sub-task that actually processes work can't mark the whole task complete before any other sub-tasks have completed their processing.
231  all_done.add_a_task();
232 }
233 
234 template<class TPB, class Alg>
235 struct subdivide_n_gen_wk<pool_traits::size_mode_t::infinite, TPB, Alg>::algo_work_heap_type {
236  typedef unsigned char * buffer_type;
237 
238  template<class T> constexpr FORCE_INLINE
239  algo_work_heap_type(T* const, std::ptrdiff_t const, std::size_t const) noexcept(true) {
240  }
241 };
242 
243 template<class TPB, class Alg> inline
245 subdivide_n_gen_wk<pool_traits::size_mode_t::infinite, TPB, Alg>::compute_threads_per_clique(typename thread_pool_type::pool_type::size_type, typename thread_pool_type::pool_type::size_type const) noexcept(true) {
246  return typename thread_pool_type::pool_type::size_type();
247 }
248 
249 template<class TPB, class Alg> inline
251 subdivide_n_gen_wk<pool_traits::size_mode_t::infinite, TPB, Alg>::compute_buffer_items(typename thread_pool_type::pool_type::size_type const) noexcept(true) {
252  return typename thread_pool_type::pool_type::size_type(1);
253 }
254 
255 template<class TPB, class Alg> inline
257  thread_pool_type &p,
258  operation_type &f,
259  typename alg_wrap_t::work_complete_t &w) noexcept(true)
260 :
261  // The number of threads in to pool may not be perfectly divisible into the requested number of cliques, so ensure that the number of tasks to be generated for a clique is the next highest integer.
263  pool(p),
264  fn(f),
265  all_done(w),
266  begin(all_done.containers().input1.begin()),
267  end(all_done.containers().input1.end()) {
268  // Ensure that a sub-task that actually processes work can't mark the whole task complete before any other sub-tasks have completed their processing.
269  all_done.add_a_task();
270 }
271 
272 template<class TPB, class Alg> inline
274  thread_pool_type &p,
275  operation_type &f,
276  typename alg_wrap_t::work_complete_t &w,
277  in_iterator const &b,
278  in_iterator const &e) noexcept(true)
280  pool(p),
281  fn(f),
282  all_done(w),
283  begin(b),
284  end(e) {
285  // Ensure that a sub-task that actually processes work can't mark the whole task complete before any other sub-tasks have completed their processing.
286  all_done.add_a_task();
287 }
288 
289 template<pool_traits::size_mode_t Ps, class TPB, class Fn, class Conts, template<class, class> class Alg> inline
292  operation_type &f,
293  typename alg_wrap_t::work_complete_t &w,
294  algo_work_heap_type const &wh,
295  in_iterator const &b,
296  in_iterator const &e,
297  typename thread_pool_type::pool_type::size_type const threads_per_clique) noexcept(true)
299  assert(sizeof(alg_wrap_t)<=this->work_heap.stride);
300  assert(sizeof(subdivide_n_gen_wk1)<=this->work_heap.stride);
301 }
302 
303 template<pool_traits::size_mode_t Ps, class TPB, class Fn, class Conts, template<class, class> class Alg> inline
306  assert(sizeof(alg_wrap_t)<=this->work_heap.stride);
307  assert(sizeof(subdivide_n_gen_wk1)<=this->work_heap.stride);
308 }
309 
310 template<pool_traits::size_mode_t Ps, class TPB, class Fn, class Conts, template<class, class> class Alg> inline void
311 subdivide_n_gen_wk1<Ps, TPB, Fn, Conts, Alg>::process() noexcept(false) {
312  typedef typename thread_pool_type::nonjoinable nonjoinable_t;
313 // TODO typedef typename thread_pool_type::nonjoinable_buff nonjoinable_t;
314 
315  const ensure_wk_complete_t e(this->all_done);
316  const typename in_iterator::difference_type dist(std::distance(this->begin, this->end));
317  if (dist>0) { // In case a joker passes in an empty collection...
318  assert(std::accumulate(this->work_heap.buffer, this->work_heap.buffer+this->work_heap.size, 0UL)==0UL);
319  if (this->threads_per_clique<=1) { // We're at a leaf task, so generate no more, and directly process() the sub-range.
320  alg_wrap_t leaf_wk(typename alg_wrap_t::work_wrap(this->begin, this->end, this->fn), this->all_done);
321  PREFETCH_READ(&*this->begin, 3);
322  leaf_wk.process();
323  } else {
324  if (this->threads_per_clique&0x01) {
325  // Note that this over-submits tasks, one of these should not be a sub-tree, but a terminal task, but I'd need to re-compute dist_3 correctly to submit less work to the terminal task.
326  const typename in_iterator::difference_type dist_3(dist/3);
327  const in_iterator middle(std::next(this->begin, dist_3));
328  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
329  if (this->begin!=middle) {
330  assert(this->first_buff_part()<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
331  this->pool
333 // TODO <<nonjoinable_t(this->first_buff_part(), this, gen_wk_node_str)
335  }
337  if (middle!=middle_next) {
338  assert((this->odd_second_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
339  this->pool
341 // TODO <<nonjoinable_t(this->odd_second_buff_part(), this, gen_wk_node_str)
343  }
344  if (middle_next!=this->end) {
345  assert((this->odd_third_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
347  }
348  } else {
349  const in_iterator middle(std::next(this->begin, dist>>1));
350  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
351  if (this->begin!=middle) {
352  assert(this->first_buff_part()<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
353  this->pool
355 // TODO <<nonjoinable_t(this->first_buff_part(), this, gen_wk_node_str)
357  this->pool,
358  this->fn,
359  this->all_done,
361  this->begin,
362  middle,
363  this->threads_per_clique>>1
364  );
365  }
366  if (middle!=this->end) {
367  assert((this->even_second_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
369  this->pool,
370  this->fn,
371  this->all_done,
373  middle,
374  this->end,
375  this->threads_per_clique>>1
376  ).process();
377  }
378  }
379  }
380  }
381 }
382 
383 template<class TPB, class Fn, class Conts, template<class, class> class Alg> inline
386  operation_type &f,
387  typename alg_wrap_t::work_complete_t &w,
388  in_iterator const &b,
389  in_iterator const &e) noexcept(true)
390 : base_t(p,f,w,b,e) {
391 }
392 
393 template<class TPB, class Fn, class Conts, template<class, class> class Alg> inline
395 : base_t(p, f, w) {
396 }
397 
398 template<class TPB, class Fn, class Conts, template<class, class> class Alg> inline void __fastcall
400  typedef typename thread_pool_type::nonjoinable nonjoinable_t;
401 
402  const ensure_wk_complete_t e(this->all_done);
403  const typename in_iterator::difference_type dist(std::distance(this->begin, this->end));
404  if (dist>0) { // In case a joker passes in an empty collection...
405  if (dist<=2) { // We're at a leaf task, so generate no more, and directly process() the sub-range.
406  alg_wrap_t leaf_wk(typename alg_wrap_t::work_wrap(this->begin, this->end, this->fn), this->all_done);
407  PREFETCH_READ(&*this->begin, 3);
408  leaf_wk.process();
409  } else {
410  const in_iterator middle(std::next(this->begin, dist>>1));
411  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
412  if (this->begin!=middle) {
413  this->pool
416  this->pool,
417  this->fn,
418  this->all_done,
419  this->begin,
420  middle
421  );
422  }
423  if (middle!=this->end) {
425  this->pool,
426  this->fn,
427  this->all_done,
428  middle,
429  this->end
430  ).process();
431  }
432  }
433  }
434 }
435 
436 template<pool_traits::size_mode_t Ps, class TPB, class UniOp, class Conts, template<class, class> class Alg> inline
439  operation_type &o,
440  typename alg_wrap_t::work_complete_t &w,
441  algo_work_heap_type const &wh,
442  in_iterator const &ib,
443  in_iterator const &ie,
444  out_iterator const &ob,
445  out_iterator const &oe,
446  const typename thread_pool_type::pool_type::size_type threads_per_clique) noexcept(true)
448  assert(this->all_done.containers().input1.size()<=this->all_done.containers().output.size());
449  assert(sizeof(alg_wrap_t)<=this->work_heap.stride);
450  assert(sizeof(subdivide_n_gen_wk2)<=this->work_heap.stride);
451 }
452 
453 template<pool_traits::size_mode_t Ps, class TPB, class UniOp, class Conts, template<class, class> class Alg> inline
458  assert(this->all_done.containers().input1.size()<=this->all_done.containers().output.size());
459  assert(sizeof(alg_wrap_t)<=this->work_heap.stride);
460  assert(sizeof(subdivide_n_gen_wk2)<=this->work_heap.stride);
461 }
462 
463 template<pool_traits::size_mode_t Ps, class TPB, class UniOp, class Conts, template<class, class> class Alg> inline void
464 subdivide_n_gen_wk2<Ps, TPB, UniOp, Conts, Alg>::process() noexcept(false) {
465  typedef typename thread_pool_type::nonjoinable nonjoinable_t;
466 // TODO typedef typename thread_pool_type::nonjoinable_buff nonjoinable_t;
467 
468  const ensure_wk_complete_t e(this->all_done);
469  const typename in_iterator::difference_type dist(std::distance(this->begin, this->end));
470  if (dist>0) { // In case a joker passes in an empty collection...
471  assert(std::accumulate(this->work_heap.buffer, this->work_heap.buffer+this->work_heap.size, 0UL)==0UL);
472  if (this->threads_per_clique<=1) { // We're at a leaf task, so generate no more, and directly process() the sub-range.
473  alg_wrap_t leaf_wk(typename alg_wrap_t::work_wrap(this->begin, this->end, out_begin, this->fn), this->all_done);
474  PREFETCH_READ(&*this->begin, 3);
476  leaf_wk.process();
477  } else {
478  if (this->threads_per_clique&0x01) {
479  // Note that this over-submits tasks, one of these should not be a sub-tree, but a terminal task, but I'd need to re-compute dist_3 correctly to submit less work to the terminal task.
481  const in_iterator in_middle(std::next(this->begin, dist_3));
483  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
484  if (this->begin!=in_middle) {
485  assert(this->first_buff_part()<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
486  this->pool
488 // TODO <<nonjoinable_t(this->first_buff_part(), this, gen_wk_node_str)
490  }
493  if (in_middle!=in_middle_next) {
494  assert((this->odd_second_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
495  this->pool
497 // TODO <<nonjoinable_t(this->odd_second_buff_part(), this, gen_wk_node_str)
499  }
500  if (in_middle_next!=this->end) {
501  assert((this->odd_third_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
503  }
504  } else {
505  const in_iterator in_middle(std::next(this->begin, dist>>1));
507  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
508  if (this->begin!=in_middle) {
509  assert(this->first_buff_part()<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
510  this->pool
512 // TODO <<nonjoinable_t(this->first_buff_part(), this, gen_wk_node_str)
514  this->pool,
515  this->fn,
516  this->all_done,
518  this->begin,
519  in_middle,
520  out_begin,
521  out_middle,
522  this->threads_per_clique>>1
523  );
524  }
525  if (in_middle!=this->end) {
526  assert((this->even_second_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
528  this->pool,
529  this->fn,
530  this->all_done,
532  in_middle,
533  this->end,
534  out_middle,
535  out_end,
536  this->threads_per_clique>>1
537  ).process();
538  }
539  }
540  }
541  }
542 }
543 
544 template<class TPB, class UniOp, class Conts, template<class, class> class Alg> inline
547  operation_type &o,
548  typename alg_wrap_t::work_complete_t &w,
549  in_iterator const &ib,
550  in_iterator const &ie,
551  out_iterator const &ob,
552  out_iterator const &oe) noexcept(true)
553 : base_t(p,o,w,ib,ie), out_begin(ob), out_end(oe) {
554  assert(this->all_done.containers().input1.size()<=this->all_done.containers().output.size());
555 }
556 
557 template<class TPB, class UniOp, class Conts, template<class, class> class Alg> inline
559 : base_t(p, o, w),
561  out_end(this->all_done.containers().output.end()) {
562  assert(this->all_done.containers().input1.size()<=this->all_done.containers().output.size());
563 }
564 
565 template<class TPB, class UniOp, class Conts, template<class, class> class Alg> inline void __fastcall
567  typedef typename thread_pool_type::nonjoinable nonjoinable_t;
568 
569  const ensure_wk_complete_t e(this->all_done);
570  const typename in_iterator::difference_type dist(std::distance(this->begin, this->end));
571  if (dist>0) { // In case a joker passes in an empty collection...
572  if (dist<=2) { // We're at a leaf task, so generate no more, and directly process() the sub-range.
573  alg_wrap_t leaf_wk(typename alg_wrap_t::work_wrap(this->begin, this->end, out_begin, this->fn), this->all_done);
574  PREFETCH_READ(&*this->begin, 3);
576  leaf_wk.process();
577  } else {
578  const in_iterator in_middle(std::next(this->begin, dist>>1));
580  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
581  if (this->begin!=in_middle) {
582  this->pool
585  this->pool,
586  this->fn,
587  this->all_done,
588  this->begin,
589  in_middle,
590  out_begin,
591  out_middle
592  );
593  }
594  if (in_middle!=this->end) {
596  this->pool,
597  this->fn,
598  this->all_done,
599  in_middle,
600  this->end,
601  out_middle,
602  out_end
603  ).process();
604  }
605  }
606  }
607 }
608 
609 template<pool_traits::size_mode_t Ps, class TPB, class BinOp, class Conts, template<class, class> class Alg> inline
612  operation_type &o,
613  typename alg_wrap_t::work_complete_t &w,
614  algo_work_heap_type const &wh,
615  in_iterator const &ib1,
616  in_iterator const &ie1,
617  in2_iterator const &ib2,
618  in2_iterator const &ie2,
619  out_iterator const &ob,
620  out_iterator const &oe,
621  typename thread_pool_type::pool_type::size_type const threads_per_clique) noexcept(true)
623  in_begin2(ib2),
624  in_end2(ie2),
625  out_begin(ob),
626  out_end(oe) {
627  assert(this->all_done.containers().input1.size()<=this->all_done.containers().input2.size());
628  assert(this->all_done.containers().input1.size()<=this->all_done.containers().output.size());
629  assert(sizeof(alg_wrap_t)<=this->work_heap.stride);
630  assert(sizeof(subdivide_n_gen_wk3)<=this->work_heap.stride);
631 }
632 
633 template<pool_traits::size_mode_t Ps, class TPB, class BinOp, class Conts, template<class, class> class Alg> inline
637  in_end2(this->all_done.containers().input2.end()),
639  out_end(this->all_done.containers().output.end()) {
640  assert(this->all_done.containers().input1.size()<=this->all_done.containers().input2.size());
641  assert(this->all_done.containers().input1.size()<=this->all_done.containers().output.size());
642  assert(sizeof(alg_wrap_t)<=this->work_heap.stride);
643  assert(sizeof(subdivide_n_gen_wk3)<=this->work_heap.stride);
644 }
645 
646 template<pool_traits::size_mode_t Ps, class TPB, class BinOp, class Conts, template<class, class> class Alg> inline void
647 subdivide_n_gen_wk3<Ps, TPB, BinOp, Conts, Alg>::process() noexcept(false) {
648  typedef typename thread_pool_type::nonjoinable nonjoinable_t;
649 // TODO typedef typename thread_pool_type::nonjoinable_buff nonjoinable_t;
650 
651  const ensure_wk_complete_t e(this->all_done);
652  const typename in_iterator::difference_type dist(std::distance(this->begin, this->end));
653  if (dist>0) { // In case a joker passes in an empty collection...
654  assert(std::accumulate(this->work_heap.buffer, this->work_heap.buffer+this->work_heap.size, 0UL)==0UL);
655  if (this->threads_per_clique<=1) { // We're at a leaf task, so generate no more, and directly process() the sub-range.
656  alg_wrap_t leaf_wk(typename alg_wrap_t::work_wrap(this->begin, this->end, in_begin2, out_begin, this->fn), this->all_done);
657  PREFETCH_READ(&*this->begin, 3);
658  PREFETCH_READ(&*in_begin2, 3);
660  leaf_wk.process();
661  } else {
662  if (this->threads_per_clique&0x01) {
663  // Note that this over-submits tasks, one of these should not be a sub-tree, but a terminal task, but I'd need to re-compute dist_3 correctly to submit less work to the terminal task.
664  const typename in_iterator::difference_type dist_3(dist/3);
665  const in_iterator in_middle1(std::next(this->begin, dist_3));
668  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
669  if (this->begin!=in_middle1) {
670  assert(this->first_buff_part()<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
671  this->pool
673 // TODO <<nonjoinable_t(this->first_buff_part(), this, gen_wk_node_str)
675  }
680  assert((this->odd_second_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
681  this->pool
683 // TODO <<nonjoinable_t(this->odd_second_buff_part(), this, gen_wk_node_str)
685  }
686  if (in_middle1_next!=this->end) {
687  assert((this->odd_third_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
689  }
690  } else {
691  const in_iterator in_middle1(std::next(this->begin, dist>>1));
694  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
695  if (this->begin!=in_middle1) {
696  assert(this->first_buff_part()<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
697  this->pool
699 // TODO <<nonjoinable_t(this->first_buff_part(), this, gen_wk_node_str)
701  this->pool,
702  this->fn,
703  this->all_done,
705  this->begin,
706  in_middle1,
707  in_begin2,
708  in_middle2,
709  out_begin,
710  out_middle,
711  this->threads_per_clique>>1
712  );
713  }
714  if (in_middle1!=this->end) {
715  assert((this->even_second_buff_part()+this->work_heap.stride)<=(this->work_heap.buffer+this->work_heap.stride*this->work_heap.size));
717  this->pool,
718  this->fn,
719  this->all_done,
721  in_middle1,
722  this->end,
723  in_middle2,
724  in_end2,
725  out_middle,
726  out_end,
727  this->threads_per_clique>>1
728  ).process();
729  }
730  }
731  }
732  }
733 }
734 
735 template<class TPB, class BinOp, class Conts, template<class, class> class Alg> inline
738  operation_type &o,
739  typename alg_wrap_t::work_complete_t &w,
740  in_iterator const &ib1,
741  in_iterator const &ie1,
742  in2_iterator const &ib2,
743  in2_iterator const &ie2,
744  out_iterator const &ob,
745  out_iterator const &oe) noexcept(true)
746 : base_t(p,o,w,ib1,ie1),
747  in_begin2(ib2),
748  in_end2(ie2),
749  out_begin(ob),
750  out_end(oe) {
751  assert(this->all_done.containers().input1.size()<=this->all_done.containers().input2.size());
752  assert(this->all_done.containers().input1.size()<=this->all_done.containers().output.size());
753 }
754 
755 template<class TPB, class BinOp, class Conts, template<class, class> class Alg> inline
757 : base_t(p, o, w),
759  in_end2(this->all_done.containers().input2.end()),
761  out_end(this->all_done.containers().output.end()) {
762  assert(this->all_done.containers().input1.size()<=this->all_done.containers().input2.size());
763  assert(this->all_done.containers().input1.size()<=this->all_done.containers().output.size());
764 }
765 
766 template<class TPB, class BinOp, class Conts, template<class, class> class Alg> inline void
768  typedef typename thread_pool_type::nonjoinable nonjoinable_t;
769 
770  const ensure_wk_complete_t e(this->all_done);
771  const typename in_iterator::difference_type dist(std::distance(this->begin, this->end));
772  if (dist>0) { // In case a joker passes in an empty collection...
773  if (dist<=2) { // We're at a leaf task, so generate no more, and directly process() the sub-range.
774  alg_wrap_t leaf_wk(typename alg_wrap_t::work_wrap(this->begin, this->end, in_begin2, out_begin, this->fn), this->all_done);
775  PREFETCH_READ(&*this->begin, 3);
776  PREFETCH_READ(&*in_begin2, 3);
778  leaf_wk.process();
779  } else {
780  const in_iterator in_middle1(std::next(this->begin, dist>>1));
783  // We create to sub-tasks of the two sub-ranges, to generate a tree of sub-tasks.
784  if (this->begin!=in_middle1) {
785  this->pool
788  this->pool,
789  this->fn,
790  this->all_done,
791  this->begin,
792  in_middle1,
793  in_begin2,
794  in_middle2,
795  out_begin,
796  out_middle
797  );
798  }
799  if (in_middle1!=this->end) {
801  this->pool,
802  this->fn,
803  this->all_done,
804  in_middle1,
805  this->end,
806  in_middle2,
807  in_end2,
808  out_middle,
809  out_end
810  ).process();
811  }
812  }
813  }
814 }
815 
816 } } } }