libjmmcg  release_579_6_g8cffd
A C++ library containing an eclectic mix of useful, advanced components.
memops.hpp
Go to the documentation of this file.
1 #ifndef LIBJMMCG_CORE_MEMOPS_HPP
2 #define LIBJMMCG_CORE_MEMOPS_HPP
3 
4 /******************************************************************************
5 ** Copyright © 2013 by J.M.McGuiness, coder@hussar.me.uk
6 **
7 ** This library is free software; you can redistribute it and/or
8 ** modify it under the terms of the GNU Lesser General Public
9 ** License as published by the Free Software Foundation; either
10 ** version 2.1 of the License, or (at your option) any later version.
11 **
12 ** This library is distributed in the hope that it will be useful,
13 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ** Lesser General Public License for more details.
16 **
17 ** You should have received a copy of the GNU Lesser General Public
18 ** License along with this library; if not, write to the Free Software
19 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
21 
24 #include "max_min.hpp"
25 
26 #include <boost/mpl/assert.hpp>
27 
28 #include <algorithm>
29 #include <cassert>
30 #include <cstdint>
31 #include <cstring>
32 #include <iostream>
33 #include <memory>
34 
35 #if defined(i386) || defined(__i386__) || defined(__x86_64__)
36 # include <immintrin.h>
37 #endif
38 
39 /**
40  \file memops.hpp The objective of these template specialisations is to allow the compiler to correctly select the appropriate, high-speed operation.
41 */
42 
43 namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE {
44 
45 /// Equivalent to the memcpy operations.
46 /**
47  \param dest A random-access iterator into a contiguous array of memory, note that the locations will not be pre-initialised, i.e. placement-new will only be used.
48  \param src A random-access iterator into a contiguous array of memory.
49  \param n The number of elements to copy. Must comprise a valid range.
50 
51  \see std::memcpy(), std::wmemcpy()
52 */
53 template<class Iter1, class Iter2> inline void FORCE_INLINE
54 memcpy(Iter1 dest, Iter2 src, std::size_t n) noexcept(true);
55 template<> inline void FORCE_INLINE
56 memcpy<char *, char const *>(char *dest, char const *src, std::size_t n) noexcept(true);
57 template<> inline void FORCE_INLINE
58 memcpy<wchar_t *, wchar_t const *>(wchar_t *dest, wchar_t const *src, std::size_t n) noexcept(true);
59 
60 /// Equivalent to the memmove operations.
61 /**
62  \param dest A random-access iterator into a contiguous array of memory, note that the locations will not be pre-initialised, i.e. placement-new will only be used.
63  \param src A random-access iterator into a contiguous array of memory.
64  \param n The number of elements to move. Must comprise a valid range.
65 
66  \see std::memmove(), std::wmemmove()
67 */
68 template<class Iter1, class Iter2> inline void FORCE_INLINE
69 memmove(Iter1 dest, Iter2 src, std::size_t n) noexcept(true);
70 template<> inline void FORCE_INLINE
71 memmove<char *, char const *>(char *dest, char const *src, std::size_t n) noexcept(true);
72 template<> inline void FORCE_INLINE
73 memmove<wchar_t *, wchar_t const *>(wchar_t *dest, wchar_t const *src, std::size_t n) noexcept(true);
74 
75 /// Equivalent to the memset operations.
76 /**
77  \param dest A random-access iterator into a contiguous array of memory.
78  \param i The value with which to initialise the elements in the range.
79  \param n The number of elements to set. Must comprise a valid range.
80 
81  \see std::memset(), std::wmemset()
82 */
83 template<class Iter, class V> inline typename std::enable_if<std::is_same<typename std::iterator_traits<Iter>::value_type, V>::value>::type FORCE_INLINE
84 memset(Iter dest, V i, std::size_t n) noexcept(true);
85 template<> inline void FORCE_INLINE
86 memset<char *, char>(char *dest, char i, std::size_t n) noexcept(true);
87 template<> inline void FORCE_INLINE
88 memset<wchar_t *, wchar_t>(wchar_t *dest, wchar_t i, std::size_t n) noexcept(true);
89 
90 /// Equivalent to the std::memcmp() operations.
91 /**
92  \param src1 A random-access iterator into a contiguous array of memory.
93  \param src2 A random-access iterator into a contiguous array of memory.
94  \param n The number of elements to compare. Must comprise a valid range.
95  \return true iff the two sequences accessed via the iterators compare equal, element-by-element wise.
96 
97  \see std::memcmp(), std::wmemcmp()
98 */
99 template<class Iter> inline bool FORCE_INLINE
100 memcmp(Iter src1, Iter src2, std::size_t n) noexcept(true);
101 template<> inline bool FORCE_INLINE
102 memcmp<char const *>(char const *src1, char const *src2, std::size_t n) noexcept(true);
103 template<> inline bool FORCE_INLINE
104 memcmp<wchar_t const *>(wchar_t const *src1, wchar_t const *src2, std::size_t n) noexcept(true);
105 
106 template<class Val, std::size_t SrcSz, std::size_t DestSz> void FORCE_INLINE
107 memcpy(Val const (& src)[SrcSz], Val (& dest)[DestSz]) noexcept(false);
108 
109 /// Copy the source array to the destination (which may overlap), statically unrolled.
110 /**
111  This method uses 512-bit, 256-bit, 128-bit, 64-bit, 32-bit, 16-bit then byte copies, unrolled to attempt to reduce the number of copy operations performed.
112 
113  Algorithmic complexity: O(min(SrcSz, DestSz)/512)
114 
115  Now ASAN reports mis-aligned accesses, [1], which could occur using MOV instructions [2] as in 64-bit mode alignment checking of ring 3 can be enabled. But on Intel & AMD mis-aligned access to memory-operands is not an issue in ring 3, so ubsan is conservative (obeying the Standard).
116 
117  [1] <a href="https://developers.redhat.com/blog/2014/10/16/gcc-undefined-behavior-sanitizer-ubsan/"/>
118  [2] <a href="https://www.felixcloutier.com/x86/mov"/>
119 
120  \param src The source C-style char-array.
121  \param dest The destination C-style char-array.
122 
123  \see std::memcpy()
124 */
125 template<
126  std::size_t SrcSz, ///< The size of the source array in bytes.
127  std::size_t DestSz ///< The size of the destination array in bytes.
128 >
129 inline constexpr void FORCE_INLINE
130 memcpy_opt(char const (&src)[SrcSz], char (&dest)[DestSz]) noexcept(true);
131 
132 /// Compare the source array to the destination (which may overlap), statically unrolled.
133 /**
134  This method uses 512-bit, 256-bit, 128-bit, 64-bit, 32-bit, 16-bit then byte copies, unrolled to attempt to reduce the number of copy operations performed.
135 
136  Algorithmic complexity: O(min(SrcSz, DestSz)/512)
137 
138  Now ASAN reports mis-aligned accesses, [1], which could occur using MOV instructions [2] as in 64-bit mode alignment checking of ring 3 can be enabled. But on Intel & AMD mis-aligned access to memory-operands is not an issue in ring 3, so ubsan is conservative (obeying the Standard).
139 
140  [1] <a href="https://developers.redhat.com/blog/2014/10/16/gcc-undefined-behavior-sanitizer-ubsan/"/>
141  [2] <a href="https://www.felixcloutier.com/x86/mov"/>
142 
143  \param src The first C-style char-array.
144  \param second The second C-style char-array.
145  \return True if they are identical arrays, otherwise false.
146 
147  \see std::memcmp()
148 */
149 template<
150  std::size_t FirstSz, ///< The size of the first array in bytes.
151  std::size_t SecondSz ///< The size of the second array in bytes.
152 >
153 inline constexpr bool FORCE_INLINE
154 memcmp_opt(char const (&first)[FirstSz], char const (&second)[SecondSz]) noexcept(true);
155 
156 /// Find the first occurrence of the character in the string.
157 /**
158  This is based upon algorithms from: <a href="https://www.strchr.com/strcmp_and_strlen_using_sse_4.2"/>.
159 
160  Algorithmic complexity: O(min(FirstSz)/256)
161 
162  \param haystack The C-style char-array.
163  \param needle The character to find.
164  \return Zero if the character was not found in the first, otherwise the first occurrence of the character in the string.
165 
166  \see std::strchr()
167 */
168 template<
169  char const needle,
170  std::size_t FirstSz ///< The size of the first array in bytes.
171 >
172 inline constexpr char const * FORCE_INLINE
173 strchr_opt(char const (&haystack)[FirstSz]) noexcept(true) __attribute__((pure));
174 
175 /// Find if the second string is a sub-string of the first.
176 /**
177  This is based upon algorithms from: <a href="https://github.com/WojciechMula/sse4-strstr/blob/master/avx2-strstr-v2.cpp"/> & <a href="http://0x80.pl/articles/simd-strfind.html#generic-sse-avx2"/>.
178 
179  Algorithmic complexity: O(min(FirstSz, SecondSz)/256)
180 
181  \param haystack The first C-style char-array, that must be in whole 32-byte units.
182  \param needle The second C-style char-array, up to a maximum size of 32 bytes.
183  \return Zero if the second string was not found in the first, otherwise the offset of the beginning of that string in the first.
184 
185  \see std::strstr()
186 */
187 template<
188  std::size_t FirstSz, ///< The size of the first array in bytes.
189  std::size_t SecondSz, ///< The size of the second array in bytes.
190  class LessThan32BytesLong=typename std::enable_if<SecondSz<=32>::type
191 >
192 inline constexpr char const * FORCE_INLINE
193 strstr_opt(char const (&haystack)[FirstSz], char const (&needle)[SecondSz]) noexcept(true) __attribute__((pure));
194 
195 template<
196  std::size_t SrcSz, ///< The size of the source array in bytes.
197  std::size_t DestSz ///< The size of the destination array in bytes.
198 >
199 inline constexpr void FORCE_INLINE
200 memcpy_opt(std::array<char, SrcSz> const &src, std::array<char, DestSz> &dest) noexcept(true);
201 template<
202  std::size_t SrcSz, ///< The size of the source array in bytes.
203  std::size_t DestSz ///< The size of the destination array in bytes.
204 >
205 inline constexpr void FORCE_INLINE
206 memcpy_opt(std::array<uint8_t, SrcSz> const &src, std::array<uint8_t, DestSz> &dest) noexcept(true);
207 
208 template<
209  std::size_t Sz ///< The size of the arrays in bytes.
210 >
211 inline bool FORCE_INLINE
212 memcmp(std::array<char, Sz> const &src1, std::array<char, Sz> const &src2) noexcept(true);
213 template<
214  std::size_t Sz ///< The size of the arrays in bytes.
215 >
216 inline bool FORCE_INLINE
217 memcmp(std::array<uint8_t, Sz> const &src1, std::array<uint8_t, Sz> const &src2) noexcept(true);
218 
219 template<
220  std::size_t Sz ///< The size of the arrays in bytes.
221 >
222 inline bool FORCE_INLINE
223 operator==(std::array<char, Sz> const &src1, std::array<char, Sz> const &src2) noexcept(true);
224 template<
225  std::size_t Sz ///< The size of the arrays in bytes.
226 >
227 inline bool FORCE_INLINE
228 operator==(std::array<uint8_t, Sz> const &src1, std::array<uint8_t, Sz> const &src2) noexcept(true);
229 
230 /// Copy the source array to the destination (which may overlap) byte-by-byte, statically unrolled.
231 /**
232  Algorithmic complexity: O(min(SrcSz, DestSz))
233 
234  \param src The source C-style array.
235  \param dest The destination C-style array.
236 
237  \see std::memcpy()
238 */
239 template<
240  std::size_t SrcSz, ///< The size of the source array in bytes.
241  std::size_t DestSz ///< The size of the destination array in bytes.
242 >
243 inline constexpr void FORCE_INLINE
244 memcpy_slow(char const (& src)[SrcSz], char (& dest)[DestSz]) noexcept(true);
245 
246 template<class T, class V=std::pair<typename T::value_type const *, typename T::value_type const *>> inline T FORCE_INLINE
247 copy(V const &src) noexcept(true);
248 
249 template<
250  std::size_t SrcSz,
251  std::size_t DestSz
252 > inline constexpr std::array<char, DestSz> FORCE_INLINE
253 copy(std::array<char, SrcSz> const &src) noexcept(true);
254 
255 template<
256  std::size_t SrcSz,
257  std::size_t DestSz
258 > inline constexpr std::array<uint8_t, DestSz> FORCE_INLINE
259 copy(std::array<uint8_t, SrcSz> const &src) noexcept(true);
260 
261 } }
262 
263 #include "memops_impl.hpp"
264 
265 #endif