doxygen/html/shared__ptr__parallel_8cpp_source.html

/******************************************************************************

** Copyright © 2011 by J.M.McGuiness, coder@hussar.me.uk

**

** This library is free software; you can redistribute it and/or

** modify it under the terms of the GNU Lesser General Public

** License as published by the Free Software Foundation; either

** version 2.1 of the License, or (at your option) any later version.

**

** This library is distributed in the hope that it will be useful,

** but WITHOUT ANY WARRANTY; without even the implied warranty of

** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

** Lesser General Public License for more details.

**

** You should have received a copy of the GNU Lesser General Public

** License along with this library; if not, write to the Free Software

** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

*/


#include "stdafx.h"


#define BOOST_TEST_MODULE libjmmcg_tests

#include <boost/test/included/unit_test.hpp>


#include <boost/mpl/list.hpp>

#include <boost/shared_ptr.hpp>

#include <boost/smart_ptr/atomic_shared_ptr.hpp>


#include "core/ave_deviation_meter.hpp"

#include "core/shared_ptr.hpp"

#include "core/stats_output.hpp"

#include "core/thread_wrapper.hpp"


#include <chrono>


using namespace libjmmcg;

using namespace ppd;


template<template<class> class AtCtr>

struct obj final : public sp_counter_type<long, api_lock_traits<platform_api, heavyweight_threading>, default_delete, AtCtr> {

   typedef sp_counter_type<long, api_lock_traits<platform_api, heavyweight_threading>, default_delete, AtCtr> base_t;

   typedef int element_type;

   typedef default_delete<obj> deleter_t;


   element_type const init;


   obj(element_type i) noexcept(true) : init(i) {}

   ~obj() noexcept(true) {}

};


template<class V>

struct boost_atomic_sp_compat final : public boost::atomic_shared_ptr<V> {

   using base_t=boost::atomic_shared_ptr<V>;

   using element_type=V;

   using boost::atomic_shared_ptr<V>::atomic_shared_ptr;


   boost_atomic_sp_compat(boost_atomic_sp_compat const &p)

   : base_t() {

      base_t::store(p);

   }

};


typedef boost::mpl::list<

   boost::shared_ptr<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>>,

   boost_atomic_sp_compat<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>>,

   std::shared_ptr<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>>,

   shared_ptr<obj<api_lock_traits<platform_api, sequential_mode>::atomic_counter_type>, api_lock_traits<platform_api, sequential_mode>>,

   shared_ptr<obj<api_lock_traits<platform_api, sequential_mode>::atomic_counter_type>, api_lock_traits<platform_api, heavyweight_threading>>,

   shared_ptr<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>, api_lock_traits<platform_api, sequential_mode>>,

   shared_ptr<obj<api_lock_traits<platform_api, heavyweight_threading>::atomic_counter_type>, api_lock_traits<platform_api, heavyweight_threading>>

> ctr_types;

using timed_results_t=ave_deviation_meter<unsigned long long>;


/**

 * This is needed because the std::shared_ptr & boost::shared_ptr need to be created with an intrusive counter.

 */

template<class PtrT>

struct make_shared final {

   using ptr_t=PtrT;


   constexpr static ptr_t result(typename ptr_t::element_type p) noexcept(true) {

      return ptr_t(new typename ptr_t::element_type (p));

   }

};

template<class V>

struct make_shared<boost::shared_ptr<V>> final {

   using ptr_t=boost::shared_ptr<V>;


   static ptr_t result(typename ptr_t::element_type p) noexcept(true) {

      return boost::make_shared<typename ptr_t::element_type>(p);

   }

};

template<class V>

struct make_shared<boost_atomic_sp_compat<V>> final {

   using ptr_t=boost_atomic_sp_compat<V>;


   static ptr_t result(typename ptr_t::element_type p) noexcept(true) {

      return ptr_t(boost::make_shared<typename ptr_t::element_type>(p));

   }

};

template<class V>

struct make_shared<std::shared_ptr<V>> final {

   using ptr_t=std::shared_ptr<V>;


   static ptr_t result(typename ptr_t::element_type p) noexcept(true) {

      return std::make_shared<typename ptr_t::element_type>(p);

   }

};


/**

 * This is needed because the std::shared_ptr & boost::shared_ptr are not thread-safe.

 */

template<class PtrT>

struct copier final {

   using ptr_t=PtrT;


   constexpr static ptr_t result(ptr_t &p) noexcept(true) {

      return p;

   }

};

template<class V>

struct copier<boost::shared_ptr<V>> final {

   using ptr_t=boost::shared_ptr<V>;


   static ptr_t result(ptr_t &p) noexcept(true) {

      ptr_t ptr=boost::atomic_load(&p);

      return ptr;

   }

};

template<class V>

struct copier<boost_atomic_sp_compat<V>> final {

   using ptr_t=boost_atomic_sp_compat<V>;


   static ptr_t result(ptr_t &p) noexcept(true) {

      return ptr_t(p.load());

   }

};

template<class V>

struct copier<std::shared_ptr<V>> final {

   using ptr_t=std::shared_ptr<V>;


   static ptr_t result(ptr_t &p) noexcept(true) {

      ptr_t ptr=std::atomic_load(&p);

      return ptr;

   }

};


template<class Element>

struct cctor_thread final : public ppd::wrapper<ppd::platform_api, heavyweight_threading> {

   typedef ppd::wrapper<ppd::platform_api, heavyweight_threading> base_t;

   typedef std::vector<Element> cont_t;

   struct make {

      typename cont_t::value_type::element_type::element_type i;


      make() noexcept(true) : i() {}


      typename cont_t::value_type operator ()() noexcept(false) {

         return make_shared<typename cont_t::value_type>::result(typename cont_t::value_type::element_type(++i));

      }

   };


   cont_t cont;


   explicit __stdcall cctor_thread(cont_t const &c) noexcept(true)

   : base_t(), cont(c) {

   }


   bool __fastcall worker_fn(typename base_t::thread_context_t &) override {

      while (!cont.empty()) {

         const typename cont_t::value_type tmp(copier<typename cont_t::value_type>::result(cont.back()));

         cont.pop_back();

      }

      assert(cont.empty());

      return true;

   }

};


/**

 * This is needed because the std::shared_ptr & boost::shared_ptr are not thread-safe.

 */

template<class PtrT>

struct resettor final {

   template<class Cont>

   static void result(Cont &cont) noexcept(false) {

      cont.back().reset();

   }

};

template<class V>

struct resettor<boost::shared_ptr<V>> final {

   using ptr_t=boost::shared_ptr<V>;


   /**

    * This is not really right, because the container is predicated about a lock-free shared pointer.

    */

   template<class Cont>

   static void result(Cont &cont) noexcept(false) {

      ptr_t ptr=boost::atomic_load(&cont[cont.size()-1]);

      ptr.reset();

   }

};

template<class V>

struct resettor<boost_atomic_sp_compat<V>> final {

   using ptr_t=boost_atomic_sp_compat<V>;


   /**

    * This is not really right, because the container is predicated about a lock-free shared pointer.

    */

   template<class Cont>

   static void result(Cont &cont) noexcept(false) {

      boost::shared_ptr<V> ptr=cont[cont.size()-1].exchange(boost::shared_ptr<V>());

      ptr.reset();

   }

};

template<class V>

struct resettor<std::shared_ptr<V>> final {

   using ptr_t=std::shared_ptr<V>;


   /**

    * This is not really right, because the container is predicated about a lock-free shared pointer.

    */

   template<class Cont>

   static void result(Cont &cont) noexcept(false) {

      ptr_t ptr=std::atomic_load(&cont[cont.size()-1]);

      ptr.reset();

   }

};


template<class Element>

struct dtor_thread final : public ppd::wrapper<ppd::platform_api, heavyweight_threading> {

   typedef ppd::wrapper<ppd::platform_api, heavyweight_threading> base_t;

   typedef std::vector<Element> cont_t;

   struct make {

      typename cont_t::value_type::element_type::element_type i;


      make() noexcept(true) : i() {}


      typename cont_t::value_type operator ()() noexcept(false) {

         return make_shared<typename cont_t::value_type>::result(typename cont_t::value_type::element_type(++i));

      }

   };


   cont_t cont;


   explicit __stdcall dtor_thread(cont_t const &c) noexcept(true)

   : base_t(), cont(c) {

   }


   bool __fastcall worker_fn(typename base_t::thread_context_t &) override {

      while (!cont.empty()) {

         resettor<typename cont_t::value_type>::result(cont);

         cont.pop_back();

      }

      assert(cont.empty());

      return true;

   }

};


BOOST_AUTO_TEST_SUITE(shared_ptr_parallel_tests)


BOOST_AUTO_TEST_SUITE(performance_cctor, *stats_to_csv::make_fixture("shared_ptr_parallel_cctor.csv"))


/**

   \test <a href="./examples/shared_ptr_parallel_cctor.svg">Graph</a> of performance results for parallel shared_ptr cctors.

         ==========================================================================================

   Results for 100000000 repetitions:

   -# Build 1359: g++v4.7.3, boost v1.54:

      - boost::shared_ptr (sequential?) Rate=7.92973e+06 deletions/sec. (TODO: Measure these two with parallel implementations.)

      - std::shared_ptr (sequential) Rate=8.32583e+06 deletions/sec. (TODO: Measure these two with parallel implementations.)

      - shared_ptr<sequential, simple pointer swaps> Rate=1.95911e+07 deletions/sec.

      - shared_ptr<sequential, atomic pointer swaps> Rate=7.56916e+06 deletions/sec.

      - shared_ptr<lock free, simple pointer swaps> Rate=1.25674e+07 deletions/sec.

      - shared_ptr<lock free, atomic pointer swaps> Rate=7.036e+06 deletions/sec.

   -# Build 1627: g++v4.8.4, boost v1.56:

      - boost::shared_ptr (sequential?) Rate=6.36351e+06 deletions/sec. (TODO: Measure these two with parallel implementations.)

      - std::shared_ptr (sequential) Rate=6.6604e+06 deletions/sec. (TODO: Measure these two with parallel implementations.)

      - shared_ptr<sequential, simple pointer swaps> Rate=1.08592e+07 deletions/sec.

      - shared_ptr<sequential, atomic pointer swaps> Rate=6.89014e+06 deletions/sec.

      - shared_ptr<lock free, simple pointer swaps> Rate=1.12256e+07 deletions/sec.

      - shared_ptr<lock free, atomic pointer swaps> Rate=6.52994e+06 deletions/sec.

   -# Build 1643: g++v4.8.4, boost v1.56:

      - boost::shared_ptr (sequential?) rate deletions/sec=[6616365, 6878884 ~(+/-4%), 7035776], samples=28, total=192608756 (TODO: Measure these two with parallel implementations.)

      - std::shared_ptr (sequential)  rate deletions/sec=[6796090, 7164560 ~(+/-4%), 7511524], samples=26, total=186278584 (TODO: Measure these two with parallel implementations.)

      - shared_ptr<sequential, simple pointer swaps> rate deletions/sec=[10978380, 11155910 ~(+/-4%), 11483192], samples=23, total=256585947

      - shared_ptr<sequential, atomic pointer swaps> rate deletions/sec=[6660595, 6898122 ~(+/-4%), 7085593], samples=22, total=151758702

      - shared_ptr<lock free, simple pointer swaps> rate deletions/sec=[11225288, 11406743 ~(+/-4%), 11616904], samples=22, total=250948363

      - shared_ptr<lock free, atomic pointer swaps> rate deletions/sec=[6616558, 6947559 ~(+/-4%), 7187529], samples=26, total=180636540

*/

BOOST_AUTO_TEST_CASE_TEMPLATE(parallel_cctor, ctr_t, ctr_types) {

   typedef cctor_thread<ctr_t> thread_t;

#ifdef JMMCG_PERFORMANCE_TESTS

   const std::size_t num_items=10000000;

#else

   const std::size_t num_items=100;

#endif

   const unsigned short loops_for_conv=50;

   const double perc_conv_estimate=5.0;


   const std::pair<timed_results_t, bool> timed_results(compute_average_deviation<timed_results_t::value_type>(

      perc_conv_estimate,

      loops_for_conv,

      []() {

         typename thread_t::cont_t c;

         std::generate_n(std::back_inserter(c), num_items, typename thread_t::make());

         thread_t th1(c);

         thread_t th2(c);

         c.clear();

         const auto t1=std::chrono::high_resolution_clock::now();

         th1.create_running();

         th2.create_running();

         do {

            api_threading_traits<platform_api, heavyweight_threading>::sleep(100);

         } while (th1.is_running() || th2.is_running());

         const auto t2=std::chrono::high_resolution_clock::now();

         BOOST_CHECK_EQUAL(th1.cont.size(), 0);

         BOOST_CHECK(th1.cont.empty());

         BOOST_CHECK_EQUAL(th2.cont.size(), 0);

         BOOST_CHECK(th2.cont.empty());

         BOOST_CHECK(c.empty());

         return timed_results_t::value_type(num_items/(static_cast<double>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count())/1000000));

      }

   ));

   std::cout<<thread_t::thread_traits::demangle_name(typeid(ctr_t))<<" rate cctors/sec="<<timed_results.first<<std::endl;

#ifdef JMMCG_PERFORMANCE_TESTS

   stats_to_csv::handle->stats<<timed_results.first.to_csv()<<std::flush;

   BOOST_CHECK(!timed_results.second);

#endif

}


BOOST_AUTO_TEST_SUITE_END()


BOOST_AUTO_TEST_SUITE(performance_dtor, *stats_to_csv::make_fixture("shared_ptr_parallel_dtor.csv"))


/**

   \test <a href="./examples/shared_ptr_parallel_dtor.svg">Graph</a> of performance results for parallel shared_ptr dtors.

         ==========================================================================================

   Results for 100000000 items:

   -# Build 1654: g++v4.8.4, boost v1.56:

      - boost::shared_ptr (sequential?) rate dtors/sec=[7400343, 8216575 ~(+/-4%), 8687559], samples=36, total=295796708 (TODO: Measure these two with parallel implementations.)

      - std::shared_ptr (sequential)  rate dtors/sec=[8611878, 8703849 ~(+/-4%), 8841397], samples=24, total=208892398 (TODO: Measure these two with parallel implementations.)

      - shared_ptr<sequential, simple pointer swaps> rate dtors/sec=[16113069, 16621324 ~(+/-4%), 17224698], samples=25, total=415533114

      - shared_ptr<sequential, atomic pointer swaps> rate dtors/sec=[6842758, 7109094 ~(+/-4%), 7291866], samples=21, total=149290992

      - shared_ptr<lock free, simple pointer swaps> rate dtors/sec=[17224692, 17370089 ~(+/-4%), 17527721], samples=25, total=434252234

      - shared_ptr<lock free, atomic pointer swaps> rate dtors/sec=[7136291, 7277720 ~(+/-4%), 7400557], samples=24, total=174665283

*/

BOOST_AUTO_TEST_CASE_TEMPLATE(parallel_deletes, ctr_t, ctr_types) {

   typedef dtor_thread<ctr_t> thread_t;

#ifdef JMMCG_PERFORMANCE_TESTS

   const std::size_t num_items=10000000;

#else

   const std::size_t num_items=100;

#endif

   const unsigned short loops_for_conv=50;

   const double perc_conv_estimate=5.0;


   const std::pair<timed_results_t, bool> timed_results(compute_average_deviation<timed_results_t::value_type>(

      perc_conv_estimate,

      loops_for_conv,

      []() {

         typename thread_t::cont_t c;

         std::generate_n(std::back_inserter(c), num_items, typename thread_t::make());

         thread_t th1(c);

         thread_t th2(c);

         c.clear();

         const auto t1=std::chrono::high_resolution_clock::now();

         th1.create_running();

         th2.create_running();

         do {

            api_threading_traits<platform_api, heavyweight_threading>::sleep(100);

         } while (th1.is_running() || th2.is_running());

         const auto t2=std::chrono::high_resolution_clock::now();

         BOOST_CHECK(th1.cont.empty());

         BOOST_CHECK(th2.cont.empty());

         BOOST_CHECK(c.empty());

         return timed_results_t::value_type(num_items/(static_cast<double>(std::chrono::duration_cast<std::chrono::microseconds>(t2 - t1).count())/1000000));

      }

   ));

   std::cout<<thread_t::thread_traits::demangle_name(typeid(ctr_t))<<" rate dtors/sec="<<timed_results.first<<std::endl;

#ifdef JMMCG_PERFORMANCE_TESTS

   stats_to_csv::handle->stats<<timed_results.first.to_csv()<<std::flush;

   BOOST_CHECK(!timed_results.second);

#endif

}


BOOST_AUTO_TEST_SUITE_END()


BOOST_AUTO_TEST_SUITE_END()