1 | /* boost random/discrete_distribution.hpp header file |
2 | * |
3 | * Copyright Steven Watanabe 2009-2011 |
4 | * Distributed under the Boost Software License, Version 1.0. (See |
5 | * accompanying file LICENSE_1_0.txt or copy at |
6 | * http://www.boost.org/LICENSE_1_0.txt) |
7 | * |
8 | * See http://www.boost.org for most recent version including documentation. |
9 | * |
10 | * $Id$ |
11 | */ |
12 | |
13 | #ifndef BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED |
14 | #define BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED |
15 | |
16 | #include <vector> |
17 | #include <limits> |
18 | #include <numeric> |
19 | #include <utility> |
20 | #include <iterator> |
21 | #include <boost/assert.hpp> |
22 | #include <boost/random/uniform_01.hpp> |
23 | #include <boost/random/uniform_int_distribution.hpp> |
24 | #include <boost/random/detail/config.hpp> |
25 | #include <boost/random/detail/operators.hpp> |
26 | #include <boost/random/detail/vector_io.hpp> |
27 | |
28 | #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST |
29 | #include <initializer_list> |
30 | #endif |
31 | |
32 | #include <boost/range/begin.hpp> |
33 | #include <boost/range/end.hpp> |
34 | |
35 | #include <boost/random/detail/disable_warnings.hpp> |
36 | |
37 | namespace boost { |
38 | namespace random { |
39 | namespace detail { |
40 | |
41 | template<class IntType, class WeightType> |
42 | struct integer_alias_table { |
43 | WeightType get_weight(IntType bin) const { |
44 | WeightType result = _average; |
45 | if(bin < _excess) ++result; |
46 | return result; |
47 | } |
48 | template<class Iter> |
49 | WeightType init_average(Iter begin, Iter end) { |
50 | WeightType weight_average = 0; |
51 | IntType excess = 0; |
52 | IntType n = 0; |
53 | // weight_average * n + excess == current partial sum |
54 | // This is a bit messy, but it's guaranteed not to overflow |
55 | for(Iter iter = begin; iter != end; ++iter) { |
56 | ++n; |
57 | if(*iter < weight_average) { |
58 | WeightType diff = weight_average - *iter; |
59 | weight_average -= diff / n; |
60 | if(diff % n > excess) { |
61 | --weight_average; |
62 | excess += n - diff % n; |
63 | } else { |
64 | excess -= diff % n; |
65 | } |
66 | } else { |
67 | WeightType diff = *iter - weight_average; |
68 | weight_average += diff / n; |
69 | if(diff % n < n - excess) { |
70 | excess += diff % n; |
71 | } else { |
72 | ++weight_average; |
73 | excess -= n - diff % n; |
74 | } |
75 | } |
76 | } |
77 | _alias_table.resize(static_cast<std::size_t>(n)); |
78 | _average = weight_average; |
79 | _excess = excess; |
80 | return weight_average; |
81 | } |
82 | void init_empty() |
83 | { |
84 | _alias_table.clear(); |
85 | _alias_table.push_back(std::make_pair(static_cast<WeightType>(1), |
86 | static_cast<IntType>(0))); |
87 | _average = static_cast<WeightType>(1); |
88 | _excess = static_cast<IntType>(0); |
89 | } |
90 | bool operator==(const integer_alias_table& other) const |
91 | { |
92 | return _alias_table == other._alias_table && |
93 | _average == other._average && _excess == other._excess; |
94 | } |
95 | static WeightType normalize(WeightType val, WeightType average) |
96 | { |
97 | return val; |
98 | } |
99 | static void normalize(std::vector<WeightType>&) {} |
100 | template<class URNG> |
101 | WeightType test(URNG &urng) const |
102 | { |
103 | return uniform_int_distribution<WeightType>(0, _average)(urng); |
104 | } |
105 | bool accept(IntType result, WeightType val) const |
106 | { |
107 | return result < _excess || val < _average; |
108 | } |
109 | static WeightType try_get_sum(const std::vector<WeightType>& weights) |
110 | { |
111 | WeightType result = static_cast<WeightType>(0); |
112 | for(typename std::vector<WeightType>::const_iterator |
113 | iter = weights.begin(), end = weights.end(); |
114 | iter != end; ++iter) |
115 | { |
116 | if((std::numeric_limits<WeightType>::max)() - result > *iter) { |
117 | return static_cast<WeightType>(0); |
118 | } |
119 | result += *iter; |
120 | } |
121 | return result; |
122 | } |
123 | template<class URNG> |
124 | static WeightType generate_in_range(URNG &urng, WeightType max) |
125 | { |
126 | return uniform_int_distribution<WeightType>( |
127 | static_cast<WeightType>(0), max-1)(urng); |
128 | } |
129 | typedef std::vector<std::pair<WeightType, IntType> > alias_table_t; |
130 | alias_table_t _alias_table; |
131 | WeightType _average; |
132 | IntType _excess; |
133 | }; |
134 | |
135 | template<class IntType, class WeightType> |
136 | struct real_alias_table { |
137 | WeightType get_weight(IntType) const |
138 | { |
139 | return WeightType(1.0); |
140 | } |
141 | template<class Iter> |
142 | WeightType init_average(Iter first, Iter last) |
143 | { |
144 | std::size_t size = std::distance(first, last); |
145 | WeightType weight_sum = |
146 | std::accumulate(first, last, static_cast<WeightType>(0)); |
147 | _alias_table.resize(size); |
148 | return weight_sum / size; |
149 | } |
150 | void init_empty() |
151 | { |
152 | _alias_table.clear(); |
153 | _alias_table.push_back(std::make_pair(static_cast<WeightType>(1), |
154 | static_cast<IntType>(0))); |
155 | } |
156 | bool operator==(const real_alias_table& other) const |
157 | { |
158 | return _alias_table == other._alias_table; |
159 | } |
160 | static WeightType normalize(WeightType val, WeightType average) |
161 | { |
162 | return val / average; |
163 | } |
164 | static void normalize(std::vector<WeightType>& weights) |
165 | { |
166 | WeightType sum = |
167 | std::accumulate(weights.begin(), weights.end(), |
168 | static_cast<WeightType>(0)); |
169 | for(typename std::vector<WeightType>::iterator |
170 | iter = weights.begin(), |
171 | end = weights.end(); |
172 | iter != end; ++iter) |
173 | { |
174 | *iter /= sum; |
175 | } |
176 | } |
177 | template<class URNG> |
178 | WeightType test(URNG &urng) const |
179 | { |
180 | return uniform_01<WeightType>()(urng); |
181 | } |
182 | bool accept(IntType, WeightType) const |
183 | { |
184 | return true; |
185 | } |
186 | static WeightType try_get_sum(const std::vector<WeightType>& weights) |
187 | { |
188 | return static_cast<WeightType>(1); |
189 | } |
190 | template<class URNG> |
191 | static WeightType generate_in_range(URNG &urng, WeightType) |
192 | { |
193 | return uniform_01<WeightType>()(urng); |
194 | } |
195 | typedef std::vector<std::pair<WeightType, IntType> > alias_table_t; |
196 | alias_table_t _alias_table; |
197 | }; |
198 | |
199 | template<bool IsIntegral> |
200 | struct select_alias_table; |
201 | |
202 | template<> |
203 | struct select_alias_table<true> { |
204 | template<class IntType, class WeightType> |
205 | struct apply { |
206 | typedef integer_alias_table<IntType, WeightType> type; |
207 | }; |
208 | }; |
209 | |
210 | template<> |
211 | struct select_alias_table<false> { |
212 | template<class IntType, class WeightType> |
213 | struct apply { |
214 | typedef real_alias_table<IntType, WeightType> type; |
215 | }; |
216 | }; |
217 | |
218 | } |
219 | |
220 | /** |
221 | * The class @c discrete_distribution models a \random_distribution. |
222 | * It produces integers in the range [0, n) with the probability |
223 | * of producing each value is specified by the parameters of the |
224 | * distribution. |
225 | */ |
226 | template<class IntType = int, class WeightType = double> |
227 | class discrete_distribution { |
228 | public: |
229 | typedef WeightType input_type; |
230 | typedef IntType result_type; |
231 | |
232 | class param_type { |
233 | public: |
234 | |
235 | typedef discrete_distribution distribution_type; |
236 | |
237 | /** |
238 | * Constructs a @c param_type object, representing a distribution |
239 | * with \f$p(0) = 1\f$ and \f$p(k|k>0) = 0\f$. |
240 | */ |
241 | param_type() : _probabilities(1, static_cast<WeightType>(1)) {} |
242 | /** |
243 | * If @c first == @c last, equivalent to the default constructor. |
244 | * Otherwise, the values of the range represent weights for the |
245 | * possible values of the distribution. |
246 | */ |
247 | template<class Iter> |
248 | param_type(Iter first, Iter last) : _probabilities(first, last) |
249 | { |
250 | normalize(); |
251 | } |
252 | #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST |
253 | /** |
254 | * If wl.size() == 0, equivalent to the default constructor. |
255 | * Otherwise, the values of the @c initializer_list represent |
256 | * weights for the possible values of the distribution. |
257 | */ |
258 | param_type(const std::initializer_list<WeightType>& wl) |
259 | : _probabilities(wl) |
260 | { |
261 | normalize(); |
262 | } |
263 | #endif |
264 | /** |
265 | * If the range is empty, equivalent to the default constructor. |
266 | * Otherwise, the elements of the range represent |
267 | * weights for the possible values of the distribution. |
268 | */ |
269 | template<class Range> |
270 | explicit param_type(const Range& range) |
271 | : _probabilities(boost::begin(range), boost::end(range)) |
272 | { |
273 | normalize(); |
274 | } |
275 | |
276 | /** |
277 | * If nw is zero, equivalent to the default constructor. |
278 | * Otherwise, the range of the distribution is [0, nw), |
279 | * and the weights are found by calling fw with values |
280 | * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and |
281 | * \f$\mbox{xmax} - \delta/2\f$, where |
282 | * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$. |
283 | */ |
284 | template<class Func> |
285 | param_type(std::size_t nw, double xmin, double xmax, Func fw) |
286 | { |
287 | std::size_t n = (nw == 0) ? 1 : nw; |
288 | double delta = (xmax - xmin) / n; |
289 | BOOST_ASSERT(delta > 0); |
290 | for(std::size_t k = 0; k < n; ++k) { |
291 | _probabilities.push_back(fw(xmin + k*delta + delta/2)); |
292 | } |
293 | normalize(); |
294 | } |
295 | |
296 | /** |
297 | * Returns a vector containing the probabilities of each possible |
298 | * value of the distribution. |
299 | */ |
300 | std::vector<WeightType> probabilities() const |
301 | { |
302 | return _probabilities; |
303 | } |
304 | |
305 | /** Writes the parameters to a @c std::ostream. */ |
306 | BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, param_type, parm) |
307 | { |
308 | detail::print_vector(os, parm._probabilities); |
309 | return os; |
310 | } |
311 | |
312 | /** Reads the parameters from a @c std::istream. */ |
313 | BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, param_type, parm) |
314 | { |
315 | std::vector<WeightType> temp; |
316 | detail::read_vector(is, temp); |
317 | if(is) { |
318 | parm._probabilities.swap(temp); |
319 | } |
320 | return is; |
321 | } |
322 | |
323 | /** Returns true if the two sets of parameters are the same. */ |
324 | BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(param_type, lhs, rhs) |
325 | { |
326 | return lhs._probabilities == rhs._probabilities; |
327 | } |
328 | /** Returns true if the two sets of parameters are different. */ |
329 | BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(param_type) |
330 | private: |
331 | /// @cond show_private |
332 | friend class discrete_distribution; |
333 | explicit param_type(const discrete_distribution& dist) |
334 | : _probabilities(dist.probabilities()) |
335 | {} |
336 | void normalize() |
337 | { |
338 | impl_type::normalize(_probabilities); |
339 | } |
340 | std::vector<WeightType> _probabilities; |
341 | /// @endcond |
342 | }; |
343 | |
344 | /** |
345 | * Creates a new @c discrete_distribution object that has |
346 | * \f$p(0) = 1\f$ and \f$p(i|i>0) = 0\f$. |
347 | */ |
348 | discrete_distribution() |
349 | { |
350 | _impl.init_empty(); |
351 | } |
352 | /** |
353 | * Constructs a discrete_distribution from an iterator range. |
354 | * If @c first == @c last, equivalent to the default constructor. |
355 | * Otherwise, the values of the range represent weights for the |
356 | * possible values of the distribution. |
357 | */ |
358 | template<class Iter> |
359 | discrete_distribution(Iter first, Iter last) |
360 | { |
361 | init(first, last); |
362 | } |
363 | #ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST |
364 | /** |
365 | * Constructs a @c discrete_distribution from a @c std::initializer_list. |
366 | * If the @c initializer_list is empty, equivalent to the default |
367 | * constructor. Otherwise, the values of the @c initializer_list |
368 | * represent weights for the possible values of the distribution. |
369 | * For example, given the distribution |
370 | * |
371 | * @code |
372 | * discrete_distribution<> dist{1, 4, 5}; |
373 | * @endcode |
374 | * |
375 | * The probability of a 0 is 1/10, the probability of a 1 is 2/5, |
376 | * the probability of a 2 is 1/2, and no other values are possible. |
377 | */ |
378 | discrete_distribution(std::initializer_list<WeightType> wl) |
379 | { |
380 | init(wl.begin(), wl.end()); |
381 | } |
382 | #endif |
383 | /** |
384 | * Constructs a discrete_distribution from a Boost.Range range. |
385 | * If the range is empty, equivalent to the default constructor. |
386 | * Otherwise, the values of the range represent weights for the |
387 | * possible values of the distribution. |
388 | */ |
389 | template<class Range> |
390 | explicit discrete_distribution(const Range& range) |
391 | { |
392 | init(boost::begin(range), boost::end(range)); |
393 | } |
394 | /** |
395 | * Constructs a discrete_distribution that approximates a function. |
396 | * If nw is zero, equivalent to the default constructor. |
397 | * Otherwise, the range of the distribution is [0, nw), |
398 | * and the weights are found by calling fw with values |
399 | * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and |
400 | * \f$\mbox{xmax} - \delta/2\f$, where |
401 | * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$. |
402 | */ |
403 | template<class Func> |
404 | discrete_distribution(std::size_t nw, double xmin, double xmax, Func fw) |
405 | { |
406 | std::size_t n = (nw == 0) ? 1 : nw; |
407 | double delta = (xmax - xmin) / n; |
408 | BOOST_ASSERT(delta > 0); |
409 | std::vector<WeightType> weights; |
410 | for(std::size_t k = 0; k < n; ++k) { |
411 | weights.push_back(fw(xmin + k*delta + delta/2)); |
412 | } |
413 | init(weights.begin(), weights.end()); |
414 | } |
415 | /** |
416 | * Constructs a discrete_distribution from its parameters. |
417 | */ |
418 | explicit discrete_distribution(const param_type& parm) |
419 | { |
420 | param(parm); |
421 | } |
422 | |
423 | /** |
424 | * Returns a value distributed according to the parameters of the |
425 | * discrete_distribution. |
426 | */ |
427 | template<class URNG> |
428 | IntType operator()(URNG& urng) const |
429 | { |
430 | BOOST_ASSERT(!_impl._alias_table.empty()); |
431 | IntType result; |
432 | WeightType test; |
433 | do { |
434 | result = uniform_int_distribution<IntType>((min)(), (max)())(urng); |
435 | test = _impl.test(urng); |
436 | } while(!_impl.accept(result, test)); |
437 | if(test < _impl._alias_table[static_cast<std::size_t>(result)].first) { |
438 | return result; |
439 | } else { |
440 | return(_impl._alias_table[static_cast<std::size_t>(result)].second); |
441 | } |
442 | } |
443 | |
444 | /** |
445 | * Returns a value distributed according to the parameters |
446 | * specified by param. |
447 | */ |
448 | template<class URNG> |
449 | IntType operator()(URNG& urng, const param_type& parm) const |
450 | { |
451 | if(WeightType limit = impl_type::try_get_sum(parm._probabilities)) { |
452 | WeightType val = impl_type::generate_in_range(urng, limit); |
453 | WeightType sum = 0; |
454 | std::size_t result = 0; |
455 | for(typename std::vector<WeightType>::const_iterator |
456 | iter = parm._probabilities.begin(), |
457 | end = parm._probabilities.end(); |
458 | iter != end; ++iter, ++result) |
459 | { |
460 | sum += *iter; |
461 | if(sum > val) { |
462 | return result; |
463 | } |
464 | } |
465 | // This shouldn't be reachable, but round-off error |
466 | // can prevent any match from being found when val is |
467 | // very close to 1. |
468 | return static_cast<IntType>(parm._probabilities.size() - 1); |
469 | } else { |
470 | // WeightType is integral and sum(parm._probabilities) |
471 | // would overflow. Just use the easy solution. |
472 | return discrete_distribution(parm)(urng); |
473 | } |
474 | } |
475 | |
476 | /** Returns the smallest value that the distribution can produce. */ |
477 | result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return 0; } |
478 | /** Returns the largest value that the distribution can produce. */ |
479 | result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const |
480 | { return static_cast<result_type>(_impl._alias_table.size() - 1); } |
481 | |
482 | /** |
483 | * Returns a vector containing the probabilities of each |
484 | * value of the distribution. For example, given |
485 | * |
486 | * @code |
487 | * discrete_distribution<> dist = { 1, 4, 5 }; |
488 | * std::vector<double> p = dist.param(); |
489 | * @endcode |
490 | * |
491 | * the vector, p will contain {0.1, 0.4, 0.5}. |
492 | * |
493 | * If @c WeightType is integral, then the weights |
494 | * will be returned unchanged. |
495 | */ |
496 | std::vector<WeightType> probabilities() const |
497 | { |
498 | std::vector<WeightType> result(_impl._alias_table.size(), static_cast<WeightType>(0)); |
499 | std::size_t i = 0; |
500 | for(typename impl_type::alias_table_t::const_iterator |
501 | iter = _impl._alias_table.begin(), |
502 | end = _impl._alias_table.end(); |
503 | iter != end; ++iter, ++i) |
504 | { |
505 | WeightType val = iter->first; |
506 | result[i] += val; |
507 | result[static_cast<std::size_t>(iter->second)] += _impl.get_weight(i) - val; |
508 | } |
509 | impl_type::normalize(result); |
510 | return(result); |
511 | } |
512 | |
513 | /** Returns the parameters of the distribution. */ |
514 | param_type param() const |
515 | { |
516 | return param_type(*this); |
517 | } |
518 | /** Sets the parameters of the distribution. */ |
519 | void param(const param_type& parm) |
520 | { |
521 | init(parm._probabilities.begin(), parm._probabilities.end()); |
522 | } |
523 | |
524 | /** |
525 | * Effects: Subsequent uses of the distribution do not depend |
526 | * on values produced by any engine prior to invoking reset. |
527 | */ |
528 | void reset() {} |
529 | |
530 | /** Writes a distribution to a @c std::ostream. */ |
531 | BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, discrete_distribution, dd) |
532 | { |
533 | os << dd.param(); |
534 | return os; |
535 | } |
536 | |
537 | /** Reads a distribution from a @c std::istream */ |
538 | BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, discrete_distribution, dd) |
539 | { |
540 | param_type parm; |
541 | if(is >> parm) { |
542 | dd.param(parm); |
543 | } |
544 | return is; |
545 | } |
546 | |
547 | /** |
548 | * Returns true if the two distributions will return the |
549 | * same sequence of values, when passed equal generators. |
550 | */ |
551 | BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(discrete_distribution, lhs, rhs) |
552 | { |
553 | return lhs._impl == rhs._impl; |
554 | } |
555 | /** |
556 | * Returns true if the two distributions may return different |
557 | * sequences of values, when passed equal generators. |
558 | */ |
559 | BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(discrete_distribution) |
560 | |
561 | private: |
562 | |
563 | /// @cond show_private |
564 | |
565 | template<class Iter> |
566 | void init(Iter first, Iter last, std::input_iterator_tag) |
567 | { |
568 | std::vector<WeightType> temp(first, last); |
569 | init(temp.begin(), temp.end()); |
570 | } |
571 | template<class Iter> |
572 | void init(Iter first, Iter last, std::forward_iterator_tag) |
573 | { |
574 | std::vector<std::pair<WeightType, IntType> > below_average; |
575 | std::vector<std::pair<WeightType, IntType> > above_average; |
576 | WeightType weight_average = _impl.init_average(first, last); |
577 | WeightType normalized_average = _impl.get_weight(0); |
578 | std::size_t i = 0; |
579 | for(; first != last; ++first, ++i) { |
580 | WeightType val = impl_type::normalize(*first, weight_average); |
581 | std::pair<WeightType, IntType> elem(val, static_cast<IntType>(i)); |
582 | if(val < normalized_average) { |
583 | below_average.push_back(elem); |
584 | } else { |
585 | above_average.push_back(elem); |
586 | } |
587 | } |
588 | |
589 | typename impl_type::alias_table_t::iterator |
590 | b_iter = below_average.begin(), |
591 | b_end = below_average.end(), |
592 | a_iter = above_average.begin(), |
593 | a_end = above_average.end() |
594 | ; |
595 | while(b_iter != b_end && a_iter != a_end) { |
596 | _impl._alias_table[static_cast<std::size_t>(b_iter->second)] = |
597 | std::make_pair(b_iter->first, a_iter->second); |
598 | a_iter->first -= (_impl.get_weight(b_iter->second) - b_iter->first); |
599 | if(a_iter->first < normalized_average) { |
600 | *b_iter = *a_iter++; |
601 | } else { |
602 | ++b_iter; |
603 | } |
604 | } |
605 | for(; b_iter != b_end; ++b_iter) { |
606 | _impl._alias_table[static_cast<std::size_t>(b_iter->second)].first = |
607 | _impl.get_weight(b_iter->second); |
608 | } |
609 | for(; a_iter != a_end; ++a_iter) { |
610 | _impl._alias_table[static_cast<std::size_t>(a_iter->second)].first = |
611 | _impl.get_weight(a_iter->second); |
612 | } |
613 | } |
614 | template<class Iter> |
615 | void init(Iter first, Iter last) |
616 | { |
617 | if(first == last) { |
618 | _impl.init_empty(); |
619 | } else { |
620 | typename std::iterator_traits<Iter>::iterator_category category; |
621 | init(first, last, category); |
622 | } |
623 | } |
624 | typedef typename detail::select_alias_table< |
625 | (::boost::is_integral<WeightType>::value) |
626 | >::template apply<IntType, WeightType>::type impl_type; |
627 | impl_type _impl; |
628 | /// @endcond |
629 | }; |
630 | |
631 | } |
632 | } |
633 | |
634 | #include <boost/random/detail/enable_warnings.hpp> |
635 | |
636 | #endif |
637 | |