libjmmcg  release_579_6_g8cffd
A C++ library containing an eclectic mix of useful, advanced components.
shared_ptr_parallel.cpp
Go to the documentation of this file.
1 /******************************************************************************
2 ** Copyright © 2011 by J.M.McGuiness, coder@hussar.me.uk
3 **
4 ** This library is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU Lesser General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2.1 of the License, or (at your option) any later version.
8 **
9 ** This library is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ** Lesser General Public License for more details.
13 **
14 ** You should have received a copy of the GNU Lesser General Public
15 ** License along with this library; if not, write to the Free Software
16 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 
19 #include "stdafx.h"
20 
21 #define BOOST_TEST_MODULE libjmmcg_tests
22 #include <boost/test/included/unit_test.hpp>
23 
24 #include <boost/mpl/list.hpp>
25 #include <boost/shared_ptr.hpp>
26 #include <boost/smart_ptr/atomic_shared_ptr.hpp>
27 
28 #include "core/ave_deviation_meter.hpp"
29 #include "core/shared_ptr.hpp"
30 #include "core/stats_output.hpp"
31 #include "core/thread_wrapper.hpp"
32 
33 #include <chrono>
34 
35 using namespace libjmmcg;
36 using namespace ppd;
37 
38 template<template<class> class AtCtr>
39 struct obj final : public sp_counter_type<long, api_lock_traits<platform_api, heavyweight_threading>, default_delete, AtCtr> {
41  typedef int element_type;
42  typedef default_delete<obj> deleter_t;
43 
44  element_type const init;
45 
46  obj(element_type i) noexcept(true) : init(i) {}
47  ~obj() noexcept(true) {}
48 };
49 
50 template<class V>
51 struct boost_atomic_sp_compat final : public boost::atomic_shared_ptr<V> {
52  using base_t=boost::atomic_shared_ptr<V>;
53  using element_type=V;
54  using boost::atomic_shared_ptr<V>::atomic_shared_ptr;
55 
56  boost_atomic_sp_compat(boost_atomic_sp_compat const &p)
57  : base_t() {
58  base_t::store(p);
59  }
60 };
61 
62 typedef boost::mpl::list<
63  boost::shared_ptr<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>>,
64  boost_atomic_sp_compat<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>>,
65  std::shared_ptr<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>>,
66  shared_ptr<obj<api_lock_traits<platform_api, sequential_mode>::atomic_counter_type>, api_lock_traits<platform_api, sequential_mode>>,
67  shared_ptr<obj<api_lock_traits<platform_api, sequential_mode>::atomic_counter_type>, api_lock_traits<platform_api, heavyweight_threading>>,
68  shared_ptr<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>, api_lock_traits<platform_api, sequential_mode>>,
69  shared_ptr<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>, api_lock_traits<platform_api, heavyweight_threading>>
70 > ctr_types;
71 using timed_results_t=ave_deviation_meter<unsigned long long>;
72 
73 /**
74  * This is needed because the std::shared_ptr & boost::shared_ptr need to be created with an intrusive counter.
75  */
76 template<class PtrT>
77 struct make_shared final {
78  using ptr_t=PtrT;
79 
80  constexpr static ptr_t result(typename ptr_t::element_type p) noexcept(true) {
81  return ptr_t(new typename ptr_t::element_type (p));
82  }
83 };
84 template<class V>
85 struct make_shared<boost::shared_ptr<V>> final {
86  using ptr_t=boost::shared_ptr<V>;
87 
88  static ptr_t result(typename ptr_t::element_type p) noexcept(true) {
89  return boost::make_shared<typename ptr_t::element_type>(p);
90  }
91 };
92 template<class V>
93 struct make_shared<boost_atomic_sp_compat<V>> final {
94  using ptr_t=boost_atomic_sp_compat<V>;
95 
96  static ptr_t result(typename ptr_t::element_type p) noexcept(true) {
97  return ptr_t(boost::make_shared<typename ptr_t::element_type>(p));
98  }
99 };
100 template<class V>
101 struct make_shared<std::shared_ptr<V>> final {
102  using ptr_t=std::shared_ptr<V>;
103 
104  static ptr_t result(typename ptr_t::element_type p) noexcept(true) {
105  return std::make_shared<typename ptr_t::element_type>(p);
106  }
107 };
108 
109 /**
110  * This is needed because the std::shared_ptr & boost::shared_ptr are not thread-safe.
111  */
112 template<class PtrT>
113 struct copier final {
114  using ptr_t=PtrT;
115 
116  constexpr static ptr_t result(ptr_t &p) noexcept(true) {
117  return p;
118  }
119 };
120 template<class V>
121 struct copier<boost::shared_ptr<V>> final {
122  using ptr_t=boost::shared_ptr<V>;
123 
124  static ptr_t result(ptr_t &p) noexcept(true) {
125  ptr_t ptr=boost::atomic_load(&p);
126  return ptr;
127  }
128 };
129 template<class V>
130 struct copier<boost_atomic_sp_compat<V>> final {
131  using ptr_t=boost_atomic_sp_compat<V>;
132 
133  static ptr_t result(ptr_t &p) noexcept(true) {
134  return ptr_t(p.load());
135  }
136 };
137 template<class V>
138 struct copier<std::shared_ptr<V>> final {
139  using ptr_t=std::shared_ptr<V>;
140 
141  static ptr_t result(ptr_t &p) noexcept(true) {
142  ptr_t ptr=std::atomic_load(&p);
143  return ptr;
144  }
145 };
146 
147 template<class Element>
148 struct cctor_thread final : public ppd::wrapper<ppd::platform_api, heavyweight_threading> {
151  struct make {
153 
154  make() noexcept(true) : i() {}
155 
156  typename cont_t::value_type operator ()() noexcept(false) {
157  return make_shared<typename cont_t::value_type>::result(typename cont_t::value_type::element_type(++i));
158  }
159  };
160 
162 
163  explicit __stdcall cctor_thread(cont_t const &c) noexcept(true)
164  : base_t(), cont(c) {
165  }
166 
167  bool __fastcall worker_fn(typename base_t::thread_context_t &) override {
168  while (!cont.empty()) {
169  const typename cont_t::value_type tmp(copier<typename cont_t::value_type>::result(cont.back()));
170  cont.pop_back();
171  }
172  assert(cont.empty());
173  return true;
174  }
175 };
176 
177 /**
178  * This is needed because the std::shared_ptr & boost::shared_ptr are not thread-safe.
179  */
180 template<class PtrT>
181 struct resettor final {
182  template<class Cont>
183  static void result(Cont &cont) noexcept(false) {
184  cont.back().reset();
185  }
186 };
187 template<class V>
188 struct resettor<boost::shared_ptr<V>> final {
189  using ptr_t=boost::shared_ptr<V>;
190 
191  /**
192  * This is not really right, because the container is predicated about a lock-free shared pointer.
193  */
194  template<class Cont>
195  static void result(Cont &cont) noexcept(false) {
196  ptr_t ptr=boost::atomic_load(&cont[cont.size()-1]);
197  ptr.reset();
198  }
199 };
200 template<class V>
201 struct resettor<boost_atomic_sp_compat<V>> final {
202  using ptr_t=boost_atomic_sp_compat<V>;
203 
204  /**
205  * This is not really right, because the container is predicated about a lock-free shared pointer.
206  */
207  template<class Cont>
208  static void result(Cont &cont) noexcept(false) {
209  boost::shared_ptr<V> ptr=cont[cont.size()-1].exchange(boost::shared_ptr<V>());
210  ptr.reset();
211  }
212 };
213 template<class V>
214 struct resettor<std::shared_ptr<V>> final {
215  using ptr_t=std::shared_ptr<V>;
216 
217  /**
218  * This is not really right, because the container is predicated about a lock-free shared pointer.
219  */
220  template<class Cont>
221  static void result(Cont &cont) noexcept(false) {
222  ptr_t ptr=std::atomic_load(&cont[cont.size()-1]);
223  ptr.reset();
224  }
225 };
226 
227 template<class Element>
228 struct dtor_thread final : public ppd::wrapper<ppd::platform_api, heavyweight_threading> {
231  struct make {
233 
234  make() noexcept(true) : i() {}
235 
236  typename cont_t::value_type operator ()() noexcept(false) {
237  return make_shared<typename cont_t::value_type>::result(typename cont_t::value_type::element_type(++i));
238  }
239  };
240 
242 
243  explicit __stdcall dtor_thread(cont_t const &c) noexcept(true)
244  : base_t(), cont(c) {
245  }
246 
247  bool __fastcall worker_fn(typename base_t::thread_context_t &) override {
248  while (!cont.empty()) {
249  resettor<typename cont_t::value_type>::result(cont);
250  cont.pop_back();
251  }
252  assert(cont.empty());
253  return true;
254  }
255 };
256 
257 BOOST_AUTO_TEST_SUITE(shared_ptr_parallel_tests)
258 
259 BOOST_AUTO_TEST_SUITE(performance_cctor, *stats_to_csv::make_fixture("shared_ptr_parallel_cctor.csv"))
260 
261 /**
262  \test <a href="./examples/shared_ptr_parallel_cctor.svg">Graph</a> of performance results for parallel shared_ptr cctors.
263  ==========================================================================================
264  Results for 100000000 repetitions:
265  -# Build 1359: g++v4.7.3, boost v1.54:
266  - boost::shared_ptr (sequential?) Rate=7.92973e+06 deletions/sec. (TODO: Measure these two with parallel implementations.)
267  - std::shared_ptr (sequential) Rate=8.32583e+06 deletions/sec. (TODO: Measure these two with parallel implementations.)
268  - shared_ptr<sequential, simple pointer swaps> Rate=1.95911e+07 deletions/sec.
269  - shared_ptr<sequential, atomic pointer swaps> Rate=7.56916e+06 deletions/sec.
270  - shared_ptr<lock free, simple pointer swaps> Rate=1.25674e+07 deletions/sec.
271  - shared_ptr<lock free, atomic pointer swaps> Rate=7.036e+06 deletions/sec.
272  -# Build 1627: g++v4.8.4, boost v1.56:
273  - boost::shared_ptr (sequential?) Rate=6.36351e+06 deletions/sec. (TODO: Measure these two with parallel implementations.)
274  - std::shared_ptr (sequential) Rate=6.6604e+06 deletions/sec. (TODO: Measure these two with parallel implementations.)
275  - shared_ptr<sequential, simple pointer swaps> Rate=1.08592e+07 deletions/sec.
276  - shared_ptr<sequential, atomic pointer swaps> Rate=6.89014e+06 deletions/sec.
277  - shared_ptr<lock free, simple pointer swaps> Rate=1.12256e+07 deletions/sec.
278  - shared_ptr<lock free, atomic pointer swaps> Rate=6.52994e+06 deletions/sec.
279  -# Build 1643: g++v4.8.4, boost v1.56:
280  - boost::shared_ptr (sequential?) rate deletions/sec=[6616365, 6878884 ~(+/-4%), 7035776], samples=28, total=192608756 (TODO: Measure these two with parallel implementations.)
281  - std::shared_ptr (sequential) rate deletions/sec=[6796090, 7164560 ~(+/-4%), 7511524], samples=26, total=186278584 (TODO: Measure these two with parallel implementations.)
282  - shared_ptr<sequential, simple pointer swaps> rate deletions/sec=[10978380, 11155910 ~(+/-4%), 11483192], samples=23, total=256585947
283  - shared_ptr<sequential, atomic pointer swaps> rate deletions/sec=[6660595, 6898122 ~(+/-4%), 7085593], samples=22, total=151758702
284  - shared_ptr<lock free, simple pointer swaps> rate deletions/sec=[11225288, 11406743 ~(+/-4%), 11616904], samples=22, total=250948363
285  - shared_ptr<lock free, atomic pointer swaps> rate deletions/sec=[6616558, 6947559 ~(+/-4%), 7187529], samples=26, total=180636540
286 */
287 BOOST_AUTO_TEST_CASE_TEMPLATE(parallel_cctor, ctr_t, ctr_types) {
288  typedef cctor_thread<ctr_t> thread_t;
289 #ifdef JMMCG_PERFORMANCE_TESTS
290  const std::size_t num_items=10000000;
291 #else
292  const std::size_t num_items=100;
293 #endif
294  const unsigned short loops_for_conv=50;
295  const double perc_conv_estimate=5.0;
296 
297  const std::pair<timed_results_t, bool> timed_results(compute_average_deviation<timed_results_t::value_type>(
298  perc_conv_estimate,
299  loops_for_conv,
300  []() {
301  typename thread_t::cont_t c;
302  std::generate_n(std::back_inserter(c), num_items, typename thread_t::make());
303  thread_t th1(c);
304  thread_t th2(c);
305  c.clear();
306  const auto t1=std::chrono::high_resolution_clock::now();
307  th1.create_running();
308  th2.create_running();
309  do {
310  api_threading_traits<platform_api, heavyweight_threading>::sleep(100);
311  } while (th1.is_running() || th2.is_running());
312  const auto t2=std::chrono::high_resolution_clock::now();
313  BOOST_CHECK_EQUAL(th1.cont.size(), 0);
314  BOOST_CHECK(th1.cont.empty());
315  BOOST_CHECK_EQUAL(th2.cont.size(), 0);
316  BOOST_CHECK(th2.cont.empty());
317  BOOST_CHECK(c.empty());
318  return timed_results_t::value_type(num_items/(static_cast<double>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count())/1000000));
319  }
320  ));
321  std::cout<<thread_t::thread_traits::demangle_name(typeid(ctr_t))<<" rate cctors/sec="<<timed_results.first<<std::endl;
322 #ifdef JMMCG_PERFORMANCE_TESTS
323  stats_to_csv::handle->stats<<timed_results.first.to_csv()<<std::flush;
324  BOOST_CHECK(!timed_results.second);
325 #endif
326 }
327 
328 BOOST_AUTO_TEST_SUITE_END()
329 
330 BOOST_AUTO_TEST_SUITE(performance_dtor, *stats_to_csv::make_fixture("shared_ptr_parallel_dtor.csv"))
331 
332 /**
333  \test <a href="./examples/shared_ptr_parallel_dtor.svg">Graph</a> of performance results for parallel shared_ptr dtors.
334  ==========================================================================================
335  Results for 100000000 items:
336  -# Build 1654: g++v4.8.4, boost v1.56:
337  - boost::shared_ptr (sequential?) rate dtors/sec=[7400343, 8216575 ~(+/-4%), 8687559], samples=36, total=295796708 (TODO: Measure these two with parallel implementations.)
338  - std::shared_ptr (sequential) rate dtors/sec=[8611878, 8703849 ~(+/-4%), 8841397], samples=24, total=208892398 (TODO: Measure these two with parallel implementations.)
339  - shared_ptr<sequential, simple pointer swaps> rate dtors/sec=[16113069, 16621324 ~(+/-4%), 17224698], samples=25, total=415533114
340  - shared_ptr<sequential, atomic pointer swaps> rate dtors/sec=[6842758, 7109094 ~(+/-4%), 7291866], samples=21, total=149290992
341  - shared_ptr<lock free, simple pointer swaps> rate dtors/sec=[17224692, 17370089 ~(+/-4%), 17527721], samples=25, total=434252234
342  - shared_ptr<lock free, atomic pointer swaps> rate dtors/sec=[7136291, 7277720 ~(+/-4%), 7400557], samples=24, total=174665283
343 */
344 BOOST_AUTO_TEST_CASE_TEMPLATE(parallel_deletes, ctr_t, ctr_types) {
345  typedef dtor_thread<ctr_t> thread_t;
346 #ifdef JMMCG_PERFORMANCE_TESTS
347  const std::size_t num_items=10000000;
348 #else
349  const std::size_t num_items=100;
350 #endif
351  const unsigned short loops_for_conv=50;
352  const double perc_conv_estimate=5.0;
353 
354  const std::pair<timed_results_t, bool> timed_results(compute_average_deviation<timed_results_t::value_type>(
355  perc_conv_estimate,
356  loops_for_conv,
357  []() {
358  typename thread_t::cont_t c;
359  std::generate_n(std::back_inserter(c), num_items, typename thread_t::make());
360  thread_t th1(c);
361  thread_t th2(c);
362  c.clear();
363  const auto t1=std::chrono::high_resolution_clock::now();
364  th1.create_running();
365  th2.create_running();
366  do {
367  api_threading_traits<platform_api, heavyweight_threading>::sleep(100);
368  } while (th1.is_running() || th2.is_running());
369  const auto t2=std::chrono::high_resolution_clock::now();
370  BOOST_CHECK(th1.cont.empty());
371  BOOST_CHECK(th2.cont.empty());
372  BOOST_CHECK(c.empty());
373  return timed_results_t::value_type(num_items/(static_cast<double>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count())/1000000));
374  }
375  ));
376  std::cout<<thread_t::thread_traits::demangle_name(typeid(ctr_t))<<" rate dtors/sec="<<timed_results.first<<std::endl;
377 #ifdef JMMCG_PERFORMANCE_TESTS
378  stats_to_csv::handle->stats<<timed_results.first.to_csv()<<std::flush;
379  BOOST_CHECK(!timed_results.second);
380 #endif
381 }
382 
383 BOOST_AUTO_TEST_SUITE_END()
384 
385 BOOST_AUTO_TEST_SUITE_END()