libjmmcg  release_579_6_g8cffd
A C++ library containing an eclectic mix of useful, advanced components.
hash.hpp
Go to the documentation of this file.
1 /******************************************************************************
2 ** Copyright © 2004 by J.M.McGuiness, coder@hussar.me.uk
3 **
4 ** This library is free software; you can redistribute it and/or
5 ** modify it under the terms of the GNU Lesser General Public
6 ** License as published by the Free Software Foundation; either
7 ** version 2.1 of the License, or (at your option) any later version.
8 **
9 ** This library is distributed in the hope that it will be useful,
10 ** but WITHOUT ANY WARRANTY; without even the implied warranty of
11 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 ** Lesser General Public License for more details.
13 **
14 ** You should have received a copy of the GNU Lesser General Public
15 ** License along with this library; if not, write to the Free Software
16 ** Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17 */
18 
20 
21 #include <boost/cstdint.hpp>
22 #include <boost/static_assert.hpp>
23 
24 #include <cassert>
25 #include <functional>
26 #include <iterator>
27 
28 namespace jmmcg { namespace LIBJMMCG_VER_NAMESPACE {
29 
30  /// Various hashing functions.
31  /**
32  \see boost::hash, std::hash
33  */
34  namespace hashers {
35 
36  /// An implementation of the MurmurHash2 hash function.
37  /**
38  From <a href="http://murmurhash.googlepages.com/"/>.
39  */
40  template<
41  typename A ///< The STL-container type of the item to be hashed.
42  >
43  struct murmur2 : std::unary_function<A, std::size_t> {
44  private:
45  // 'm' and 'r' are mixing constants generated offline.
46  // They're not really 'magic', they just happen to work well.
47  static constexpr int m = 0x5bd1e995;
48  static constexpr int r = 24;
49 
50  public:
51  typedef typename std::unary_function<A, std::size_t>::result_type result_type;
52  typedef typename std::unary_function<A, std::size_t>::argument_type argument_type;
53 
54  /// Generate the hash of the value.
55  /**
56  \param d An STL-container type holding the value to be hashed, that must be aligned.
57  \param seed The seed value for hashing.
58  \return The hash of the value.
59  */
60  result_type __fastcall FORCE_INLINE operator()(const argument_type &d, const result_type seed=0) const noexcept(true) {
61  BOOST_STATIC_ASSERT(sizeof(int) == 4);
62  typename argument_type::size_type len=d.size()*sizeof(typename argument_type::value_type);
63  // Initialize the hash to a 'random' value.
64  result_type h = seed ^ len;
65  // Mix 4 bytes at a time into the hash.
66  unsigned char const *data=reinterpret_cast<unsigned char const *>(&*d.begin());
67  for (;len >= 4; len -= 4) {
68  unsigned int k=*reinterpret_cast<unsigned int const *>(data);
69  k *= m;
70  k ^= k >> r;
71  k *= m;
72 
73  h *= m;
74  h ^= k;
75 
76  data += 4;
77  }
78 
79 #pragma GCC diagnostic push
80 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
81  // Handle the last few bytes of the input array.
82  switch(len) {
83  case 3:
84  h ^= data[2] << 16;
85  case 2:
86  h ^= data[1] << 8;
87  case 1:
88  h ^= data[0];
89  h *= m;
90  default:
91  break;
92  };
93 #pragma GCC diagnostic pop
94 
95  // Do a few final mixes of the hash to ensure the last few bytes are well-incorporated.
96  h ^= h >> 13;
97  h *= m;
98  h ^= h >> 15;
99  return h;
100  }
101  };
102 
103  /// An implementation of the Hsieh hash function.
104  /**
105  From <a href="http://www.azillionmonkeys.com/qed/hash.html"/>.
106 
107  \see mpl::hsieh
108  */
109  template<
110  typename A ///< The STL-container type of the item to be hashed.
111  >
112  struct hsieh : std::unary_function<A, std::size_t> {
113  typedef typename std::unary_function<A, std::size_t>::result_type result_type;
114  typedef typename std::unary_function<A, std::size_t>::argument_type argument_type;
115 
116  private:
117  typedef int16_t hashed_unit_t;
118 
119  static constexpr hashed_unit_t __fastcall FORCE_INLINE get16bits(typename argument_type::const_iterator d) noexcept(true) {
120  return *reinterpret_cast<hashed_unit_t const *>(&*d);
121  }
122 
123  public:
124  /// Generate the hash of the value.
125  /**
126  \param d An STL-container type holding the value to be hashed.
127  \return The hash of the value.
128  */
129  result_type __fastcall FORCE_INLINE operator()(const argument_type &d) const noexcept(true) {
130  if (LIKELY(!d.empty())) {
131  typename argument_type::size_type len=d.size();
132  const unsigned short rem=static_cast<unsigned short>(len & 3);
133  unsigned long hash=static_cast<unsigned long>(len);
134  typename argument_type::const_iterator data=d.begin();
135  len >>= 2;
136 
137  /* Main loop */
138  for (;len > 0; --len) {
139  hash += get16bits(data);
140  typename argument_type::const_iterator it(std::next(data, sizeof(hashed_unit_t)));
141  const unsigned long tmp = (get16bits(it) << 11) ^ hash;
142  hash = (hash << 16) ^ tmp;
143  std::advance(data, 2*sizeof(hashed_unit_t));
144  hash += hash >> 11;
145  }
146 
147  /* Handle end cases */
148  switch (rem) {
149  case 3:
150  hash += get16bits(data);
151  hash ^= hash << 16;
152  hash ^= data[sizeof(hashed_unit_t)] << 18;
153  hash += hash >> 11;
154  break;
155  case 2:
156  hash += get16bits(data);
157  hash ^= hash << 11;
158  hash += hash >> 17;
159  break;
160  case 1:
161  hash += *data;
162  hash ^= hash << 10;
163  hash += hash >> 1;
164  }
165 
166  /* Force "avalanching" of final 127 bits */
167  hash ^= hash << 3;
168  hash += hash >> 5;
169  hash ^= hash << 4;
170  hash += hash >> 17;
171  hash ^= hash << 25;
172  hash += hash >> 6;
173 
174  return static_cast<result_type>(hash);
175  } else {
176  return 0;
177  }
178  }
179  };
180 
181  namespace mpl {
182 
183  namespace private_ {
184 
185  typedef int16_t hashed_unit_t;
186 
187  template<typename A>
188  inline constexpr hashed_unit_t __fastcall FORCE_INLINE get16bits(A &d) noexcept(true) {
189  return *reinterpret_cast<hashed_unit_t const *>(&*d);
190  }
191 
192  template<unsigned long len,class C>
193  struct core_hash;
194  template<class C>
195  struct core_hash<0,C> {
196  static constexpr unsigned long FORCE_INLINE result(typename C::const_iterator&, unsigned long hash) noexcept(true) {
197  return hash;
198  }
199  };
200 
201  template<unsigned long len,class C>
202  struct core_hash {
203  static unsigned long FORCE_INLINE result(typename C::const_iterator &data, unsigned long hash) noexcept(true) {
204  hash += get16bits(data);
205  typename C::const_iterator it(std::next(data, sizeof(hashed_unit_t)));
206  const unsigned long tmp = (get16bits(it) << 11) ^ hash;
207  hash = (hash << 16) ^ tmp;
208  std::advance(data, 2*sizeof(hashed_unit_t));
209  return core_hash<len-1,C>::result(data, hash + (hash >> 11));
210  }
211  };
212 
213  template<unsigned long rem,class C>
214  struct end_cases;
215  template<class C>
216  struct end_cases<3,C> {
217  static constexpr unsigned long FORCE_INLINE result(const typename C::const_iterator data, unsigned long hash) noexcept(true) {
218  hash += get16bits(data);
219  hash ^= hash << 16;
220  hash ^= data[sizeof(hashed_unit_t)] << 18;
221  return hash + (hash >> 11);
222  }
223  };
224  template<class C>
225  struct end_cases<2,C> {
226  static constexpr unsigned long result(const typename C::const_iterator data, unsigned long hash) noexcept(true) {
227  hash += get16bits(data);
228  hash ^= hash << 11;
229  return hash + (hash >> 17);
230  }
231  };
232  template<class C>
233  struct end_cases<1,C> {
234  static constexpr unsigned long FORCE_INLINE result(const typename C::const_iterator data, unsigned long hash) noexcept(true) {
235  hash += *data;
236  hash ^= hash << 10;
237  return hash + (hash >> 1);
238  }
239  };
240  template<class C>
241  struct end_cases<0,C> {
242  static constexpr unsigned long FORCE_INLINE result(const typename C::const_iterator, unsigned long hash) noexcept(true) {
243  return hash;
244  }
245  };
246  }
247 
248  /// An implementation of the Hsieh hash function.
249  /**
250  From <a href="http://www.azillionmonkeys.com/qed/hash.html"/>.
251  Using template meta-programming to unroll the hashing loop.
252 
253  \see hashing::hsieh
254  */
255  template<
256  typename A, ///< The STL-container type of the item to be hashed.
257  unsigned long L ///< The length of the item to be hashed.
258  >
259  struct hsieh : std::unary_function<A, std::size_t> {
260  typedef typename std::unary_function<A, std::size_t>::result_type result_type;
261  typedef typename std::unary_function<A, std::size_t>::argument_type argument_type;
262  typedef private_::hashed_unit_t hashed_unit_t;
263  static constexpr unsigned long length=L;
264 
265  private:
266  static constexpr unsigned long rem=length&3;
267 
268  public:
269  /// Generate the hash of the value.
270  /**
271  \param d An STL-container type holding the value to be hashed.
272  \return The hash of the value.
273  */
274  result_type __fastcall FORCE_INLINE operator()(const argument_type &d) const noexcept(true) {
275  if (LIKELY(!d.empty())) {
276  assert(d.size()>=length);
277  typename argument_type::const_iterator data(d.begin());
278  const unsigned long hash_int=private_::core_hash<(length>>2),argument_type>::result(data, length);
279  unsigned long hash=private_::end_cases<rem,argument_type>::result(data, hash_int);
280  /* Force "avalanching" of final 127 bits */
281  hash ^= hash << 3;
282  hash += hash >> 5;
283  hash ^= hash << 4;
284  hash += hash >> 17;
285  hash ^= hash << 25;
286  return static_cast<result_type>(hash + (hash >> 6));
287  } else {
288  return 0;
289  }
290  }
291  };
292 
293  }
294 
295 } } }