libjmmcg  release_579_6_g8cffd
A C++ library containing an eclectic mix of useful, advanced components.
hp_timer.hpp
Go to the documentation of this file.
1 #ifndef LIBJMMCG_CORE_HP_TIMER_HPP
2 #define LIBJMMCG_CORE_HP_TIMER_HPP
3 
4 /******************************************************************************
5 ** Copyright © 2008 by J.M.McGuiness, coder@hussar.me.uk
6 **
7 ** This library is free software; you can redistribute it and/or
8 ** modify it under the terms of the GNU Lesser General Public
9 ** License as published by the Free Software Foundation; either
10 ** version 2.1 of the License, or (at your option) any later version.
11 **
12 ** This library is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ** Lesser General Public License for more details.
16 **
17 ** You should have received a copy of the GNU Lesser General Public
18 ** License along with this library; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
22 #include "info.hpp"
25 
26 #include "boost/date_time/posix_time/posix_time_types.hpp"
27 
28 #include <numeric>
29 #include <chrono>
30 #include <thread>
31 
32 #ifdef _MSC_VER
33 #else
34 # if defined(i386) || defined(__i386__) || defined(__x86_64__)
35 # include <cpuid.h>
36 # else
37 # if (__ARM_ARCH >= 6) // V6 is the earliest arch that has a standard cyclecount
38  inline std::uint64_t __rdtsc() noexcept(true) {
39  std::uint32_t pmccntr;
40  std::uint32_t pmuseren;
41  std::uint32_t pmcntenset;
42  // Read the user mode perf monitor counter access permissions.
43  asm volatile("mrc p15, 0, %0, c9, c14, 0" : "=r"(pmuseren));
44  if (pmuseren & 1) { // Allows reading perfmon counters for user mode code.
45  asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r"(pmcntenset));
46  if (pmcntenset & 0x80000000ul) { // Is it counting?
47  asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(pmccntr));
48  // The counter is set up to count every 64th cycle
49  return static_cast<std::uint64_t>(pmccntr) * 64ULL;
50  }
51  }
52  return 0;
53  }
54  inline std::uint64_t __rdtscp(unsigned int *) noexcept(true) {
55  return __rdtsc();
56  }
57  inline std::uint64_t __cpuid(int, unsigned, unsigned, unsigned, unsigned) noexcept(true) {
58  return __rdtsc();
59  }
60  # endif
61 # endif
62 #endif
63 
64 namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE {
65 
66 /// A class for providing timings in high-performance counter resolution.
67 template<ppd::generic_traits::api_type API, typename Mdl>
68 class hp_timer;
69 
70 /// Measure a time interval via RAII.
71 /**
72  \see hp_timer
73 */
74 template<typename T>
75 struct hp_interval {
76  typedef T timer_t; ///< The underlying timer-type.
77 
78  timer_t const &timer; ///< The timer to use.
79  const typename timer_t::time_utc_t start; ///< The time this object was created.
80  /// Construct an interval-measurer on the stack.
81  /**
82  \param t The timer to use.
83  \param i A reference to the interval to be set in the dtor.
84  */
85  explicit __stdcall hp_interval(timer_t const &t,typename timer_t::time_utc_t &i) noexcept(true)
86  : timer(t), start(t.current_time()), interval(i) {
87  }
88  hp_interval(hp_interval const &)=delete;
89  /**
90  Set the interval in units of hp_timer::value_type, using the timer given in the ctor.
91  */
92  __stdcall ~hp_interval() noexcept(true);
93 
94 private:
95  typename timer_t::time_utc_t &interval;
96 };
97 
98 /// Use the TSC timer to measure intervals.
99 /**
100  To determine is the TSC is available or reliable, on Linux the contents of:
101  /sys/devices/system/clocksource/clocksource0/available_clocksource
102  should be examined. This also indicates if the TSCs between processors are synchronised [1], section 17.16.1.
103 
104  [1] "Intel® 64 and IA-32 Architectures Developer's Manual", https://www-ssl.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-manual-325462.html
105 */
106 namespace cpu_timer {
107 
108 using element_type=std::uint64_t;
109 
110 /// Measure a time interval using the CPU instruction RDTSC.
111 /**
112  Note that the use of RDTSC means that the interval will be inaccurate as super-scalar processor may reschedule the instructions before & after the RDTSC instructions. But this is very fast.
113 */
115 public:
116  static element_type REALLY_FORCE_INLINE now() noexcept(true) {
117  return __rdtsc();
118  }
119  static element_type REALLY_FORCE_INLINE get_start() noexcept(true) {
120  return now();
121  }
122  static element_type REALLY_FORCE_INLINE get_end() noexcept(true) {
123  return now();
124  }
125 
126  explicit REALLY_FORCE_INLINE out_of_order(element_type &d) noexcept(true)
127  : start(get_start()), diff(d) {
128  }
129 
130  REALLY_FORCE_INLINE ~out_of_order() noexcept(true) {
131  const element_type end(now());
132  assert(end>=start);
133  diff=end-start;
134  }
135 
136 private:
137  const element_type start;
138  element_type &diff;
139 };
140 
141 /// Measure a time interval using derivatives of the CPU instruction RDTSC.
142 /**
143  The design of this code is directly influenced from [1]. Note that instructions are emitted to ensure that the pipeline in the CPU is serialised at the start of the interval and partially serialised at the end, so the interval measurement is fairly accurate. Note that this costs approximately 20 clock cycles.
144 
145  [1] "How to Benchmark Code Execution Times on Intel® IA-32 and IA-64 Instruction Set Architectures", <a href="http://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf"/>
146 */
147 class in_order {
148 public:
149  static element_type REALLY_FORCE_INLINE get_start() noexcept(true) {
150  unsigned eax, ebx, ecx, edx;
151 
152  __cpuid(1, eax, ebx, ecx, edx);
153  return __rdtsc();
154  }
155  static element_type REALLY_FORCE_INLINE get_end() noexcept(true) {
156  unsigned int cpu;
157  return __rdtscp(&cpu);
158  }
159  static element_type REALLY_FORCE_INLINE now() noexcept(true) {
160  return get_end();
161  }
162 
163  /**
164  Note that this serialises the CPU.
165  */
166  explicit REALLY_FORCE_INLINE in_order(element_type &d) noexcept(true)
167  : start(get_start()), diff(d) {
168  }
169 
170  /**
171  Note that instructions issued before this dtor will complete before the dtor, but those issued after may be scheduled before the dtor, if the super-scalar CPU has sufficient resources.
172  */
173  REALLY_FORCE_INLINE ~in_order() noexcept(true) {
174  const element_type end(get_end());
175  assert(end>=start);
176  diff=end-start;
177  }
178 
179 private:
180  const element_type start;
181  element_type &diff;
182 };
183 
184 namespace private_ {
186  const double min;
187  const double mean;
188  const double max;
189  const double mean_average_dev;
190  };
191 
192  std::ostream &
193  operator<<(std::ostream &s, ticks_per_microsec_details const &tpm);
194 
196  get_mean_ticks_per_microsec() noexcept(false);
197 
198  static const element_type start_ticks=out_of_order::get_start();
199  static const boost::posix_time::ptime start_UTC=boost::posix_time::microsec_clock::universal_time();
200  /**
201  Example values from a run:
202  min: 2595.7MHz
203  mean: 2596.9MHz
204  mean-average deviation: 0.03%
205  max: 2597.4MHz
206  For an AMD Opteron at 2.6GHz (with frequency scaling enabled).
207  */
208  static const ticks_per_microsec_details ticks_per_microsec=get_mean_ticks_per_microsec();
209 }
210 
211 inline double
212 TSC_to_microsec(element_type ticks) noexcept(true) {
213  assert(ticks>private_::start_ticks);
214  const element_type ticks_since_start=ticks-private_::start_ticks;
215  const double microsecs_since_start=ticks_since_start/private_::ticks_per_microsec.mean;
216  assert(microsecs_since_start>=0);
217  return microsecs_since_start;
218 }
219 
220 inline boost::posix_time::ptime
221 TSC_to_UTC(element_type ticks) noexcept(true) {
222  const boost::posix_time::ptime curr_UTC=private_::start_UTC+boost::posix_time::microseconds(static_cast<unsigned long long>(TSC_to_microsec(ticks)));
223  assert(curr_UTC>private_::start_UTC);
224  return curr_UTC;
225 }
226 
227 /// Busy wait for a certain period.
228 /**
229  \param delay In microseconds.
230  \return Delay achieved in microseconds.
231 */
232 inline double
233 pause_for_usec(double delay) noexcept(true) {
235  element_type delay_achieved;
236  do {
237  auto const current=in_order::get_end();
238  assert(current>begin);
239  delay_achieved=TSC_to_microsec(current)-begin;
240  } while (delay_achieved<delay);
241  assert(delay_achieved>=delay);
242  return delay_achieved;
243 }
244 
245 }
246 
247 } }
248 
249 #ifdef WIN32
250 # include "../experimental/NT-based/NTSpecific/hp_timer.hpp"
251 #elif defined(__unix__)
252 # include "../unix/hp_timer.hpp"
253 #endif
254 
255 #endif