libjmmcg  release_579_6_g8cffd
A C++ library containing an eclectic mix of useful, advanced components.
stack_string.hpp
Go to the documentation of this file.
1 #ifndef LIBJMMCG_CORE_BASIC_STACK_STRING_HPP
2 #define LIBJMMCG_CORE_BASIC_STACK_STRING_HPP
3 
4 /******************************************************************************
5 ** Copyright © 2013 by J.M.McGuiness, coder@hussar.me.uk
6 **
7 ** This library is free software; you can redistribute it and/or
8 ** modify it under the terms of the GNU Lesser General Public
9 ** License as published by the Free Software Foundation; either
10 ** version 2.1 of the License, or (at your option) any later version.
11 **
12 ** This library is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ** Lesser General Public License for more details.
16 **
17 ** You should have received a copy of the GNU Lesser General Public
18 ** License along with this library; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
22 #include "hash.hpp"
23 #include "max_min.hpp"
24 #include "memops.hpp"
25 #include "non_allocatable.hpp"
26 
27 #include "boost/mpl/assert.hpp"
28 
29 #include <algorithm>
30 #include <cstring>
31 #include <functional>
32 #include <memory>
33 #include <stdexcept>
34 #include <iterator>
35 #include <limits>
36 
37 namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE {
38 
39 /// A class to provide a representation for a null-terminated character string that utilises a small-string optimisation.
40 /**
41  This class is a value-type.
42  If the string to be stored is less than small_string_max_size it is not stored on the heap, but on the stack.
43  It is not a drop-in replacement for std::string, as it does not implement all of the member functions of the latter (because I don't like the fat interface).
44  The optimisations assume that this class will be used for small strings, get the static branch-predictions wrong, and one can get a 50% drop in performance!
45 
46  \see __gnuxx::__vstring, std::string
47 */
48 template<
49  unsigned int BuffN, ///< The approximate number of charT characters to store in the stack-based buffer. The minimum actual size is given by small_string_max_size.
50  class charT,
51  class traits=std::char_traits<charT>
52 >
53 class basic_stack_string final : private non_newable {
54  /**
55  A type to express the machine's native memory-bus width for a word, i.e. what can be rapidly fetched from, copied in a single register operation & written back to memory. This is not the cache line width.
56  */
57  using native_databus_type=unsigned long;
58 
59 public:
60  typedef charT value_type;
61  typedef traits traits_type;
62  typedef std::iterator_traits<value_type const *> const_iterator_traits;
63  typedef std::iterator_traits<value_type *> iterator_traits;
64  typedef std::size_t size_type;
65  typedef typename iterator_traits::pointer pointer;
67  typedef typename iterator_traits::pointer iterator;
69  typedef typename iterator_traits::reference reference;
71 
72  static_assert(std::is_pod<value_type>::value, "The contained type must be a POD due to the implementation.");
73 
74  enum : std::size_t {
76  std::size_t,
77  sizeof(pointer),
78  ((BuffN+sizeof(native_databus_type))/sizeof(native_databus_type))*sizeof(native_databus_type)
79  >::value ///< Ensure that the internal, stack-based buffer is the minimum size of a pointer, or larger, in units of "native word"s. Also store the null termination char so that c_str() is fast too, trading off size for speed. In units of chars.
80  };
81  BOOST_MPL_ASSERT_RELATION(sizeof(pointer), <=, small_string_max_size);
82 
83  /**
84  The type of the exception that we throw.
85  */
86  struct exception : std::runtime_error {
87  typedef std::runtime_error base_t;
88 
89  explicit constexpr exception(char const *s) FORCE_INLINE : base_t(s) {}
90  };
91 
92 private:
93  /**
94  Try to ensure that the alignment of the input char ptr parameter is suitable for speedily copying into the internal buffer, otherwise we might get sub-optimal copying or at worst a bus error or alignment error in buffer_type::copy().
95  Note that this is compiler-specific, and the attribute will need to change for different compilers.
96 
97  \see buffer_type::copy(), basic_stack_string()
98  */
99  typedef value_type __attribute__ ((aligned (sizeof(native_databus_type)))) const * ensure_char_ptr_argument_aligned;
100 
101 public:
102  /**
103  Complexity: O(1)
104  */
105  constexpr basic_stack_string() noexcept(true) FORCE_INLINE;
106  /// Convert a C-style string into a basic_stack_string.
107  /**
108  Note the use of the opaque parameter type ensure_char_ptr_argument_aligned: this allows the user to pass in a char const * pointer, but ensures that it is suitably aligned for our purposes, without the user having to worry about alignment issues.
109  Complexity: O(sz) best case, O(new()+sz) worst-case.
110  May throw: std::bad_alloc, exception if pointer is nullptr
111 
112  \param src A c-style null terminated string, that should have been suitably aligned, automatically, by the compiler.
113  \param sz The number of chars to copy.
114 
115  \see ensure_char_ptr_argument_aligned
116  */
117  explicit basic_stack_string(ensure_char_ptr_argument_aligned src, std::size_t sz) noexcept(false) FORCE_INLINE;
118  /// Convert a C-style array of chars into a basic_stack_string.
119  /**
120  Complexity: O(1) best case, O(new()+SrcSz) worst-case.
121  May throw: std::bad_alloc
122 
123  \param src A c-style null terminated char-array.
124  */
125  template<std::size_t SrcSz> explicit FORCE_INLINE
126  basic_stack_string(value_type const (& src)[SrcSz]) noexcept(SrcSz<=small_string_max_size);
127  /**
128  Note that if the string contained in str can be optimised, this function will optimise the copy, i.e. place the copy on the stack from the heap.
129  Complexity: O(1) best case, O(new()+str.size()) worst-case.
130  May throw: std::bad_alloc
131 
132  \param str The basic_stack_string to be copied.
133  */
134  basic_stack_string(basic_stack_string const &str) noexcept(false) FORCE_INLINE;
135  /**
136  Complexity: O(1).
137 
138  \param str The string to be copied.
139  */
140  basic_stack_string(basic_stack_string&& str) noexcept(true) FORCE_INLINE;
141  /**
142  Complexity: O(1) best-case, O(delete()) worst-case.
143  */
144  ~basic_stack_string() noexcept(true) FORCE_INLINE;
145 
146  /// Copy the basic_stack_string.
147  /**
148  Complexity: O(1) best case, O(new()+str.size()) worst-case.
149  May throw: std::bad_alloc
150 
151  \return An r-value to the copied basic_stack_string.
152  */
153  basic_stack_string &operator=(basic_stack_string const &str) noexcept(false) FORCE_INLINE;
154  /// Move the basic_stack_string.
155  /**
156  Complexity: O(1)
157 
158  \return An r-value to the copied basic_stack_string.
159  */
160  basic_stack_string &operator=(basic_stack_string &&str) noexcept(true) FORCE_INLINE;
161 
162  constexpr bool operator==(basic_stack_string const &) const noexcept(true) FORCE_INLINE;
163  constexpr bool operator!=(basic_stack_string const &) const noexcept(true) FORCE_INLINE;
164 
165  /// Return an iterator to the beginning of the contained string.
166  /**
167  Complexity: O(1)
168 
169  \return iterator to the beginning of the contained string.
170  */
171  iterator begin() noexcept(true) FORCE_INLINE;
172  /// Return a const_iterator to the beginning of the contained string.
173  /**
174  Complexity: O(1)
175 
176  \return const_iterator to the beginning of the contained string.
177  */
178  const_iterator begin() const noexcept(true) FORCE_INLINE;
179  /// Return an iterator to the end of the contained string.
180  /**
181  Complexity: O(1)
182 
183  \return iterator to the end of the contained string.
184  */
185  iterator end() noexcept(true) FORCE_INLINE;
186  /// Return a const_iterator to the end of the contained string.
187  /**
188  Complexity: O(1)
189 
190  \return const_iterator to the end of the contained string.
191  */
192  const_iterator end() const noexcept(true) FORCE_INLINE;
193 
194  /// Return the size of the contained string, excluding any null termination.
195  /**
196  Complexity: O(1)
197  Constraint: size()<=capacity() && empty==(size()==0).
198 
199  \return Size of the contained string.
200 
201  \see capacity(), empty()
202  */
203  constexpr size_type size() const noexcept(true) FORCE_INLINE;
204  /// Return the maximum size of any possible contained string, including any null termination.
205  /**
206  Complexity: O(1)
207  Constraint: max_size()>=capacity().
208 
209  \return Maximum possible size of a contained string.
210 
211  \see capacity()
212  */
213  static constexpr size_type max_size() noexcept(true) FORCE_INLINE {
214  return std::numeric_limits<typename iterator_traits::difference_type>::max();
215  }
216  /// Uninitialised resize of the contained string to the requested size.
217  /**
218  Notes:
219  - The elements in the internal buffer are not re-initialised, even if more memory has had to be allocated, for speed.
220  - If capacity()>s then the internal buffer is reduced in size. Note that if the string has been moved to the heap it will not be moved back to the stack, for speed.
221  - The null terminator is re-added to the end.
222  - If the capacity() has to be increased, all iterators are invalidated but the contained string is maintained.
223  Complexity: O(reserve())
224  Constraint: size()==s.
225  May throw: std::bad_alloc
226 
227  \param s Ensure that size() returns s. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity of at least s.
228 
229  \see empty(), reserve(), size()
230  */
231  void resize(size_type s) noexcept(false) FORCE_INLINE;
232  /// Initialised resize of the contained string to the requested size.
233  /**
234  Notes:
235  - If capacity()>s then the internal buffer is reduced in size. Note that if the string has been moved to the heap it will not be moved back to the stack, for speed.
236  - The null terminator is re-added to the end.
237  - If the capacity() has to be increased, all iterators are invalidated but the contained string is maintained.
238  Complexity: O(reserve()+std::max(s-size(), 0))
239  Constraint: size()==s && empty()==false.
240  May throw: std::bad_alloc
241 
242  \param s Ensure that size() returns s. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity() of at least s.
243  \param i The value to which the s elements within the contained string should be initialised.
244 
245  \see capacity(), empty(), size()
246  */
247  void resize(size_type s, value_type i) noexcept(false) FORCE_INLINE;
248  /// Return the size of the internal buffer used to store the string, including the null termination.
249  /**
250  Complexity: O(1)
251  Constraint: capacity()>=size() && capacity()<=max_size().
252 
253  \return Return the size of internal buffer.
254 
255  \see max_size(), size()
256  */
257  constexpr size_type capacity() const noexcept(true) FORCE_INLINE {
258  return capacity_;
259  }
260  /// Set the size of the internal buffer to at least the requested size, if the requested size is larger, then it will not be made smaller.
261  /**
262  Notes:
263  - The elements in the internal buffer are not re-initialised, even if more memory has had to be allocated.
264  - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained string is maintained.
265  - If capacity() has to be reduced, note that if the string has been moved to the heap it will not be moved back to the stack, for speed.
266  - If an exception is thrown, then the object is unaffected.
267  Complexity: O(1) best case, O(new()+delete()+std::max(s-size(), 0)) worst case.
268  Constraint: capacity()==s && capacity()<=max_size().
269  May throw: std::bad_alloc
270 
271  \param s Ensure that capacity() returns s, if less than max_size(), otherwise no effect. The internal buffer in which the contained string may be stored is adjusted to ensure that it has a capacity of s.
272 
273  \see capacity(), max_size()
274  */
275  void reserve(size_type s) noexcept(false) FORCE_INLINE;
276 
277  /// Clear the contained string.
278  /**
279  Complexity: O(1) best case, O(delete()) worst case.
280  Postcondition: size()==0 && empty()==true.
281 
282  \see empty(), size()
283  */
284  void clear() noexcept(true) FORCE_INLINE;
285  /// Return is the string is empty.
286  /**
287  Complexity: O(1)
288  Condition: empty()==(size()==0).
289 
290  \return true if size()==0, false otherwise.
291 
292  \see size()
293  */
294  constexpr bool empty() const noexcept(true) FORCE_INLINE;
295 
296  /// Return an l-value to the requested element in the contained string.
297  /**
298  Note that p must be less than size(), otherwise UB.
299  Complexity: O(1)
300 
301  \param p The zero-based index into the contained string.
302  \return An l-value to the requested character in the contained string.
303 
304  \see at(), size()
305  */
306  reference operator[](size_type p) noexcept(true) FORCE_INLINE;
307  /// Return an r-value to the requested element in the contained string.
308  /**
309  Note that p must be less than size(), otherwise UB.
310  Complexity: O(1)
311 
312  \param p The zero-based index into the contained string.
313  \return An r-value to the requested character in the contained string.
314 
315  \see at(), size()
316  */
317  const_reference operator[](size_type p) const noexcept(true) FORCE_INLINE;
318 
319  /// Append a character to the end of the contained string.
320  /**
321  Note:
322  - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained basic_stack_string is maintained.
323  Complexity: O(resize())
324  May throw: std::bad_alloc
325 
326  \param c The character to append.
327 
328  \see resize()
329  */
330  void push_back(value_type c) noexcept(false) FORCE_INLINE;
331  /// Insert the characters in the range [b, e) into the contained string from the specified position, growing the contained string as necessary.
332  /**
333  Note:
334  - I'm not implementing the large number of overloads in std::basic_basic_stack_string, as that interface is unnecessarily fat.
335  - The range inserted is [b, e).
336  - If the capacity() has to be increased, all iterators are invalidated, values returned from data() & c_str() are invalidated, but the contained basic_stack_string is maintained.
337  Complexity: O(resize())
338  May throw: std::bad_alloc
339 
340  \param p The position immediately after the position into which the range is inserted, must be in the range [begin(), end()).
341  \param b The beginning of the range.
342  \param e The end of the range.
343  \return An iterator which refers to the copy of the first inserted character, or p if b==e.
344  */
346 
347  /// Remove the characters in the range [b, e).
348  /**
349  Note that {b, e} must be within [begin(), end()).
350  Complexity: O(std::distance(b, e))
351 
352  \param b The beginning of the range.
353  \param e The end of the range.
354  \return An iterator which points to the element pointed to by e prior to the other elements being
355 erased. If no such element exists, end() is returned.
356  */
358 
359  /// Replace the characters in the range [b, e) with those in the range [src_b, src_e).
360  /**
361  Note that {b, e} must be within [begin(), end()).
362  Complexity: O(resize()+std::max(std::distance(b, e), std::distance(src_b, src_e))
363 
364  \param b The beginning of the range.
365  \param e The end of the range.
366  \param src_b The beginning of the source range.
367  \param src_e The end of the source range.
368  \return An l-value to the basic_stack_string.
369  */
370  basic_stack_string &replace(iterator b, iterator e, const_iterator src_b, const_iterator src_e) noexcept(false) FORCE_INLINE;
371 
372  /// Swap the current basic_stack_string with the argument.
373  /**
374  Complexity: O(1)
375 
376  \param str The basic_stack_string with which to swap.
377  */
378  void swap(basic_stack_string &str) noexcept(true) FORCE_INLINE;
379 
380 private:
381  /**
382  Either have a pointer to the heap or a small string stored in the space for the pointer. By using this union and putting the pointer-type first this should guarantee that the string alignment is reasonable for the cache, i.e. not at char alignment, which could be very bad.
383  If the small_string_max_size>capacity() then make sure that the small-string optimisation is engaged, i.e. this is the flag to know which member of the buffer_type union is valid.
384  */
385  union ALIGN_TO_L1_CACHE buffer_type {
386  /**
387  The copy of the small string can be sped up using this array to directly copy the chunks of chars words-at-a-time.
388  */
389  native_databus_type fast_copy_values[small_string_max_size/sizeof(native_databus_type)+1]={};
390  pointer heap;
391  value_type small_basic_stack_string[small_string_max_size/sizeof(value_type)];
392 
393  enum {
394  num_fast_copy_values=sizeof(fast_copy_values)/sizeof(native_databus_type)
395  };
396  BOOST_MPL_ASSERT_RELATION(sizeof(pointer), <=, sizeof(fast_copy_values));
397  BOOST_MPL_ASSERT_RELATION(sizeof(small_basic_stack_string), <=, sizeof(fast_copy_values));
398  BOOST_MPL_ASSERT_RELATION(sizeof(small_basic_stack_string), ==, small_string_max_size);
399  BOOST_MPL_ASSERT_RELATION(small_string_max_size, <=, num_fast_copy_values*sizeof(native_databus_type));
400 
401  using unrolled_op_t=private_::unroll<num_fast_copy_values>;
402 
403  void ctor(size_type cap, ensure_char_ptr_argument_aligned src) noexcept(false) FORCE_INLINE;
404  template<std::size_t SrcSz>
405  void ctor(value_type const (& src)[SrcSz]) noexcept(SrcSz<=small_string_max_size) FORCE_INLINE;
406  void cctor(size_type cap, buffer_type const &b) noexcept(false) FORCE_INLINE;
407 
408  /**
409  Copy the small string directly in chunks of chars words-at-a-time.
410 
411  \param src The source string.
412  */
413  constexpr void copy(native_databus_type const *src) noexcept(true) FORCE_INLINE;
414  /**
415  Copy the small string directly in chunks of chars words-at-a-time.
416 
417  \param src The source string.
418  */
419  constexpr void copy(ensure_char_ptr_argument_aligned src) noexcept(true) FORCE_INLINE;
420  template<std::size_t SrcSz> constexpr void FORCE_INLINE
421  copy(value_type const (& src)[SrcSz]) noexcept(true);
422  void copy(size_type cap, const_pointer b, const_pointer e, size_type offset) noexcept(true) FORCE_INLINE;
423  void fill_n(size_type cap, size_type offset, size_type n, value_type i) noexcept(true) FORCE_INLINE;
424  void swap(buffer_type &buff) noexcept(true) FORCE_INLINE;
425  void move(size_type cap, typename iterator_traits::difference_type f, typename iterator_traits::difference_type l, size_type n) noexcept(true) FORCE_INLINE;
426  };
427 
428  /**
429  We store the size to get at it rapidly, trading off the space required to store the size.
430  */
431  size_type size_;
432  /**
433  We store the capacity to get at it rapidly, trading off the space required to store the size.
434  */
435  size_type capacity_;
436  // Larger objects come first to improve cache use of strings.
437  buffer_type buffer;
438 };
439 
440 template<unsigned int BuffN, class charT, class traits> std::basic_ostream<charT, traits> &
441 operator<<(std::basic_ostream<charT, traits> &os, basic_stack_string<BuffN, charT, traits> const &s) noexcept(false);
442 
443 template<unsigned int BuffN, class charT, class traits> std::basic_istream<charT, traits> &
444 operator>>(std::basic_istream<charT, traits> &os, basic_stack_string<BuffN, charT, traits> &s) noexcept(false);
445 
447 typedef basic_stack_string<1, wchar_t> wstack_string;
448 
449 } }
450 
451 namespace std {
452 
453 template<>
457 
458  result_type operator()(argument_type const &s) const noexcept(true) FORCE_INLINE {
460  return s.capacity()<s.small_string_max_size ? *s.begin() : hasher_t().operator()(s);
461  }
462 };
463 
464 template<>
468 
469  result_type operator()(argument_type const &s) const noexcept(true) FORCE_INLINE {
471  return s.capacity()<s.small_string_max_size ? *s.begin() : hasher_t().operator()(s);
472  }
473 };
474 
475 }
476 
478 
479 #endif