1/* boost random/discrete_distribution.hpp header file
2 *
3 * Copyright Steven Watanabe 2009-2011
4 * Distributed under the Boost Software License, Version 1.0. (See
5 * accompanying file LICENSE_1_0.txt or copy at
6 * http://www.boost.org/LICENSE_1_0.txt)
7 *
8 * See http://www.boost.org for most recent version including documentation.
9 *
10 * $Id$
11 */
12
13#ifndef BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED
14#define BOOST_RANDOM_DISCRETE_DISTRIBUTION_HPP_INCLUDED
15
16#include <vector>
17#include <limits>
18#include <numeric>
19#include <utility>
20#include <iterator>
21#include <boost/assert.hpp>
22#include <boost/random/uniform_01.hpp>
23#include <boost/random/uniform_int_distribution.hpp>
24#include <boost/random/detail/config.hpp>
25#include <boost/random/detail/operators.hpp>
26#include <boost/random/detail/vector_io.hpp>
27
28#ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
29#include <initializer_list>
30#endif
31
32#include <boost/range/begin.hpp>
33#include <boost/range/end.hpp>
34
35#include <boost/random/detail/disable_warnings.hpp>
36
37namespace boost {
38namespace random {
39namespace detail {
40
41template<class IntType, class WeightType>
42struct integer_alias_table {
43 WeightType get_weight(IntType bin) const {
44 WeightType result = _average;
45 if(bin < _excess) ++result;
46 return result;
47 }
48 template<class Iter>
49 WeightType init_average(Iter begin, Iter end) {
50 WeightType weight_average = 0;
51 IntType excess = 0;
52 IntType n = 0;
53 // weight_average * n + excess == current partial sum
54 // This is a bit messy, but it's guaranteed not to overflow
55 for(Iter iter = begin; iter != end; ++iter) {
56 ++n;
57 if(*iter < weight_average) {
58 WeightType diff = weight_average - *iter;
59 weight_average -= diff / n;
60 if(diff % n > excess) {
61 --weight_average;
62 excess += n - diff % n;
63 } else {
64 excess -= diff % n;
65 }
66 } else {
67 WeightType diff = *iter - weight_average;
68 weight_average += diff / n;
69 if(diff % n < n - excess) {
70 excess += diff % n;
71 } else {
72 ++weight_average;
73 excess -= n - diff % n;
74 }
75 }
76 }
77 _alias_table.resize(static_cast<std::size_t>(n));
78 _average = weight_average;
79 _excess = excess;
80 return weight_average;
81 }
82 void init_empty()
83 {
84 _alias_table.clear();
85 _alias_table.push_back(std::make_pair(static_cast<WeightType>(1),
86 static_cast<IntType>(0)));
87 _average = static_cast<WeightType>(1);
88 _excess = static_cast<IntType>(0);
89 }
90 bool operator==(const integer_alias_table& other) const
91 {
92 return _alias_table == other._alias_table &&
93 _average == other._average && _excess == other._excess;
94 }
95 static WeightType normalize(WeightType val, WeightType average)
96 {
97 return val;
98 }
99 static void normalize(std::vector<WeightType>&) {}
100 template<class URNG>
101 WeightType test(URNG &urng) const
102 {
103 return uniform_int_distribution<WeightType>(0, _average)(urng);
104 }
105 bool accept(IntType result, WeightType val) const
106 {
107 return result < _excess || val < _average;
108 }
109 static WeightType try_get_sum(const std::vector<WeightType>& weights)
110 {
111 WeightType result = static_cast<WeightType>(0);
112 for(typename std::vector<WeightType>::const_iterator
113 iter = weights.begin(), end = weights.end();
114 iter != end; ++iter)
115 {
116 if((std::numeric_limits<WeightType>::max)() - result > *iter) {
117 return static_cast<WeightType>(0);
118 }
119 result += *iter;
120 }
121 return result;
122 }
123 template<class URNG>
124 static WeightType generate_in_range(URNG &urng, WeightType max)
125 {
126 return uniform_int_distribution<WeightType>(
127 static_cast<WeightType>(0), max-1)(urng);
128 }
129 typedef std::vector<std::pair<WeightType, IntType> > alias_table_t;
130 alias_table_t _alias_table;
131 WeightType _average;
132 IntType _excess;
133};
134
135template<class IntType, class WeightType>
136struct real_alias_table {
137 WeightType get_weight(IntType) const
138 {
139 return WeightType(1.0);
140 }
141 template<class Iter>
142 WeightType init_average(Iter first, Iter last)
143 {
144 std::size_t size = std::distance(first, last);
145 WeightType weight_sum =
146 std::accumulate(first, last, static_cast<WeightType>(0));
147 _alias_table.resize(size);
148 return weight_sum / size;
149 }
150 void init_empty()
151 {
152 _alias_table.clear();
153 _alias_table.push_back(std::make_pair(static_cast<WeightType>(1),
154 static_cast<IntType>(0)));
155 }
156 bool operator==(const real_alias_table& other) const
157 {
158 return _alias_table == other._alias_table;
159 }
160 static WeightType normalize(WeightType val, WeightType average)
161 {
162 return val / average;
163 }
164 static void normalize(std::vector<WeightType>& weights)
165 {
166 WeightType sum =
167 std::accumulate(weights.begin(), weights.end(),
168 static_cast<WeightType>(0));
169 for(typename std::vector<WeightType>::iterator
170 iter = weights.begin(),
171 end = weights.end();
172 iter != end; ++iter)
173 {
174 *iter /= sum;
175 }
176 }
177 template<class URNG>
178 WeightType test(URNG &urng) const
179 {
180 return uniform_01<WeightType>()(urng);
181 }
182 bool accept(IntType, WeightType) const
183 {
184 return true;
185 }
186 static WeightType try_get_sum(const std::vector<WeightType>& weights)
187 {
188 return static_cast<WeightType>(1);
189 }
190 template<class URNG>
191 static WeightType generate_in_range(URNG &urng, WeightType)
192 {
193 return uniform_01<WeightType>()(urng);
194 }
195 typedef std::vector<std::pair<WeightType, IntType> > alias_table_t;
196 alias_table_t _alias_table;
197};
198
199template<bool IsIntegral>
200struct select_alias_table;
201
202template<>
203struct select_alias_table<true> {
204 template<class IntType, class WeightType>
205 struct apply {
206 typedef integer_alias_table<IntType, WeightType> type;
207 };
208};
209
210template<>
211struct select_alias_table<false> {
212 template<class IntType, class WeightType>
213 struct apply {
214 typedef real_alias_table<IntType, WeightType> type;
215 };
216};
217
218}
219
220/**
221 * The class @c discrete_distribution models a \random_distribution.
222 * It produces integers in the range [0, n) with the probability
223 * of producing each value is specified by the parameters of the
224 * distribution.
225 */
226template<class IntType = int, class WeightType = double>
227class discrete_distribution {
228public:
229 typedef WeightType input_type;
230 typedef IntType result_type;
231
232 class param_type {
233 public:
234
235 typedef discrete_distribution distribution_type;
236
237 /**
238 * Constructs a @c param_type object, representing a distribution
239 * with \f$p(0) = 1\f$ and \f$p(k|k>0) = 0\f$.
240 */
241 param_type() : _probabilities(1, static_cast<WeightType>(1)) {}
242 /**
243 * If @c first == @c last, equivalent to the default constructor.
244 * Otherwise, the values of the range represent weights for the
245 * possible values of the distribution.
246 */
247 template<class Iter>
248 param_type(Iter first, Iter last) : _probabilities(first, last)
249 {
250 normalize();
251 }
252#ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
253 /**
254 * If wl.size() == 0, equivalent to the default constructor.
255 * Otherwise, the values of the @c initializer_list represent
256 * weights for the possible values of the distribution.
257 */
258 param_type(const std::initializer_list<WeightType>& wl)
259 : _probabilities(wl)
260 {
261 normalize();
262 }
263#endif
264 /**
265 * If the range is empty, equivalent to the default constructor.
266 * Otherwise, the elements of the range represent
267 * weights for the possible values of the distribution.
268 */
269 template<class Range>
270 explicit param_type(const Range& range)
271 : _probabilities(boost::begin(range), boost::end(range))
272 {
273 normalize();
274 }
275
276 /**
277 * If nw is zero, equivalent to the default constructor.
278 * Otherwise, the range of the distribution is [0, nw),
279 * and the weights are found by calling fw with values
280 * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and
281 * \f$\mbox{xmax} - \delta/2\f$, where
282 * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$.
283 */
284 template<class Func>
285 param_type(std::size_t nw, double xmin, double xmax, Func fw)
286 {
287 std::size_t n = (nw == 0) ? 1 : nw;
288 double delta = (xmax - xmin) / n;
289 BOOST_ASSERT(delta > 0);
290 for(std::size_t k = 0; k < n; ++k) {
291 _probabilities.push_back(fw(xmin + k*delta + delta/2));
292 }
293 normalize();
294 }
295
296 /**
297 * Returns a vector containing the probabilities of each possible
298 * value of the distribution.
299 */
300 std::vector<WeightType> probabilities() const
301 {
302 return _probabilities;
303 }
304
305 /** Writes the parameters to a @c std::ostream. */
306 BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, param_type, parm)
307 {
308 detail::print_vector(os, parm._probabilities);
309 return os;
310 }
311
312 /** Reads the parameters from a @c std::istream. */
313 BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, param_type, parm)
314 {
315 std::vector<WeightType> temp;
316 detail::read_vector(is, temp);
317 if(is) {
318 parm._probabilities.swap(temp);
319 }
320 return is;
321 }
322
323 /** Returns true if the two sets of parameters are the same. */
324 BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(param_type, lhs, rhs)
325 {
326 return lhs._probabilities == rhs._probabilities;
327 }
328 /** Returns true if the two sets of parameters are different. */
329 BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(param_type)
330 private:
331 /// @cond show_private
332 friend class discrete_distribution;
333 explicit param_type(const discrete_distribution& dist)
334 : _probabilities(dist.probabilities())
335 {}
336 void normalize()
337 {
338 impl_type::normalize(_probabilities);
339 }
340 std::vector<WeightType> _probabilities;
341 /// @endcond
342 };
343
344 /**
345 * Creates a new @c discrete_distribution object that has
346 * \f$p(0) = 1\f$ and \f$p(i|i>0) = 0\f$.
347 */
348 discrete_distribution()
349 {
350 _impl.init_empty();
351 }
352 /**
353 * Constructs a discrete_distribution from an iterator range.
354 * If @c first == @c last, equivalent to the default constructor.
355 * Otherwise, the values of the range represent weights for the
356 * possible values of the distribution.
357 */
358 template<class Iter>
359 discrete_distribution(Iter first, Iter last)
360 {
361 init(first, last);
362 }
363#ifndef BOOST_NO_CXX11_HDR_INITIALIZER_LIST
364 /**
365 * Constructs a @c discrete_distribution from a @c std::initializer_list.
366 * If the @c initializer_list is empty, equivalent to the default
367 * constructor. Otherwise, the values of the @c initializer_list
368 * represent weights for the possible values of the distribution.
369 * For example, given the distribution
370 *
371 * @code
372 * discrete_distribution<> dist{1, 4, 5};
373 * @endcode
374 *
375 * The probability of a 0 is 1/10, the probability of a 1 is 2/5,
376 * the probability of a 2 is 1/2, and no other values are possible.
377 */
378 discrete_distribution(std::initializer_list<WeightType> wl)
379 {
380 init(wl.begin(), wl.end());
381 }
382#endif
383 /**
384 * Constructs a discrete_distribution from a Boost.Range range.
385 * If the range is empty, equivalent to the default constructor.
386 * Otherwise, the values of the range represent weights for the
387 * possible values of the distribution.
388 */
389 template<class Range>
390 explicit discrete_distribution(const Range& range)
391 {
392 init(boost::begin(range), boost::end(range));
393 }
394 /**
395 * Constructs a discrete_distribution that approximates a function.
396 * If nw is zero, equivalent to the default constructor.
397 * Otherwise, the range of the distribution is [0, nw),
398 * and the weights are found by calling fw with values
399 * evenly distributed between \f$\mbox{xmin} + \delta/2\f$ and
400 * \f$\mbox{xmax} - \delta/2\f$, where
401 * \f$\delta = (\mbox{xmax} - \mbox{xmin})/\mbox{nw}\f$.
402 */
403 template<class Func>
404 discrete_distribution(std::size_t nw, double xmin, double xmax, Func fw)
405 {
406 std::size_t n = (nw == 0) ? 1 : nw;
407 double delta = (xmax - xmin) / n;
408 BOOST_ASSERT(delta > 0);
409 std::vector<WeightType> weights;
410 for(std::size_t k = 0; k < n; ++k) {
411 weights.push_back(fw(xmin + k*delta + delta/2));
412 }
413 init(weights.begin(), weights.end());
414 }
415 /**
416 * Constructs a discrete_distribution from its parameters.
417 */
418 explicit discrete_distribution(const param_type& parm)
419 {
420 param(parm);
421 }
422
423 /**
424 * Returns a value distributed according to the parameters of the
425 * discrete_distribution.
426 */
427 template<class URNG>
428 IntType operator()(URNG& urng) const
429 {
430 BOOST_ASSERT(!_impl._alias_table.empty());
431 IntType result;
432 WeightType test;
433 do {
434 result = uniform_int_distribution<IntType>((min)(), (max)())(urng);
435 test = _impl.test(urng);
436 } while(!_impl.accept(result, test));
437 if(test < _impl._alias_table[static_cast<std::size_t>(result)].first) {
438 return result;
439 } else {
440 return(_impl._alias_table[static_cast<std::size_t>(result)].second);
441 }
442 }
443
444 /**
445 * Returns a value distributed according to the parameters
446 * specified by param.
447 */
448 template<class URNG>
449 IntType operator()(URNG& urng, const param_type& parm) const
450 {
451 if(WeightType limit = impl_type::try_get_sum(parm._probabilities)) {
452 WeightType val = impl_type::generate_in_range(urng, limit);
453 WeightType sum = 0;
454 std::size_t result = 0;
455 for(typename std::vector<WeightType>::const_iterator
456 iter = parm._probabilities.begin(),
457 end = parm._probabilities.end();
458 iter != end; ++iter, ++result)
459 {
460 sum += *iter;
461 if(sum > val) {
462 return result;
463 }
464 }
465 // This shouldn't be reachable, but round-off error
466 // can prevent any match from being found when val is
467 // very close to 1.
468 return static_cast<IntType>(parm._probabilities.size() - 1);
469 } else {
470 // WeightType is integral and sum(parm._probabilities)
471 // would overflow. Just use the easy solution.
472 return discrete_distribution(parm)(urng);
473 }
474 }
475
476 /** Returns the smallest value that the distribution can produce. */
477 result_type min BOOST_PREVENT_MACRO_SUBSTITUTION () const { return 0; }
478 /** Returns the largest value that the distribution can produce. */
479 result_type max BOOST_PREVENT_MACRO_SUBSTITUTION () const
480 { return static_cast<result_type>(_impl._alias_table.size() - 1); }
481
482 /**
483 * Returns a vector containing the probabilities of each
484 * value of the distribution. For example, given
485 *
486 * @code
487 * discrete_distribution<> dist = { 1, 4, 5 };
488 * std::vector<double> p = dist.param();
489 * @endcode
490 *
491 * the vector, p will contain {0.1, 0.4, 0.5}.
492 *
493 * If @c WeightType is integral, then the weights
494 * will be returned unchanged.
495 */
496 std::vector<WeightType> probabilities() const
497 {
498 std::vector<WeightType> result(_impl._alias_table.size(), static_cast<WeightType>(0));
499 std::size_t i = 0;
500 for(typename impl_type::alias_table_t::const_iterator
501 iter = _impl._alias_table.begin(),
502 end = _impl._alias_table.end();
503 iter != end; ++iter, ++i)
504 {
505 WeightType val = iter->first;
506 result[i] += val;
507 result[static_cast<std::size_t>(iter->second)] += _impl.get_weight(i) - val;
508 }
509 impl_type::normalize(result);
510 return(result);
511 }
512
513 /** Returns the parameters of the distribution. */
514 param_type param() const
515 {
516 return param_type(*this);
517 }
518 /** Sets the parameters of the distribution. */
519 void param(const param_type& parm)
520 {
521 init(parm._probabilities.begin(), parm._probabilities.end());
522 }
523
524 /**
525 * Effects: Subsequent uses of the distribution do not depend
526 * on values produced by any engine prior to invoking reset.
527 */
528 void reset() {}
529
530 /** Writes a distribution to a @c std::ostream. */
531 BOOST_RANDOM_DETAIL_OSTREAM_OPERATOR(os, discrete_distribution, dd)
532 {
533 os << dd.param();
534 return os;
535 }
536
537 /** Reads a distribution from a @c std::istream */
538 BOOST_RANDOM_DETAIL_ISTREAM_OPERATOR(is, discrete_distribution, dd)
539 {
540 param_type parm;
541 if(is >> parm) {
542 dd.param(parm);
543 }
544 return is;
545 }
546
547 /**
548 * Returns true if the two distributions will return the
549 * same sequence of values, when passed equal generators.
550 */
551 BOOST_RANDOM_DETAIL_EQUALITY_OPERATOR(discrete_distribution, lhs, rhs)
552 {
553 return lhs._impl == rhs._impl;
554 }
555 /**
556 * Returns true if the two distributions may return different
557 * sequences of values, when passed equal generators.
558 */
559 BOOST_RANDOM_DETAIL_INEQUALITY_OPERATOR(discrete_distribution)
560
561private:
562
563 /// @cond show_private
564
565 template<class Iter>
566 void init(Iter first, Iter last, std::input_iterator_tag)
567 {
568 std::vector<WeightType> temp(first, last);
569 init(temp.begin(), temp.end());
570 }
571 template<class Iter>
572 void init(Iter first, Iter last, std::forward_iterator_tag)
573 {
574 std::vector<std::pair<WeightType, IntType> > below_average;
575 std::vector<std::pair<WeightType, IntType> > above_average;
576 WeightType weight_average = _impl.init_average(first, last);
577 WeightType normalized_average = _impl.get_weight(0);
578 std::size_t i = 0;
579 for(; first != last; ++first, ++i) {
580 WeightType val = impl_type::normalize(*first, weight_average);
581 std::pair<WeightType, IntType> elem(val, static_cast<IntType>(i));
582 if(val < normalized_average) {
583 below_average.push_back(elem);
584 } else {
585 above_average.push_back(elem);
586 }
587 }
588
589 typename impl_type::alias_table_t::iterator
590 b_iter = below_average.begin(),
591 b_end = below_average.end(),
592 a_iter = above_average.begin(),
593 a_end = above_average.end()
594 ;
595 while(b_iter != b_end && a_iter != a_end) {
596 _impl._alias_table[static_cast<std::size_t>(b_iter->second)] =
597 std::make_pair(b_iter->first, a_iter->second);
598 a_iter->first -= (_impl.get_weight(b_iter->second) - b_iter->first);
599 if(a_iter->first < normalized_average) {
600 *b_iter = *a_iter++;
601 } else {
602 ++b_iter;
603 }
604 }
605 for(; b_iter != b_end; ++b_iter) {
606 _impl._alias_table[static_cast<std::size_t>(b_iter->second)].first =
607 _impl.get_weight(b_iter->second);
608 }
609 for(; a_iter != a_end; ++a_iter) {
610 _impl._alias_table[static_cast<std::size_t>(a_iter->second)].first =
611 _impl.get_weight(a_iter->second);
612 }
613 }
614 template<class Iter>
615 void init(Iter first, Iter last)
616 {
617 if(first == last) {
618 _impl.init_empty();
619 } else {
620 typename std::iterator_traits<Iter>::iterator_category category;
621 init(first, last, category);
622 }
623 }
624 typedef typename detail::select_alias_table<
625 (::boost::is_integral<WeightType>::value)
626 >::template apply<IntType, WeightType>::type impl_type;
627 impl_type _impl;
628 /// @endcond
629};
630
631}
632}
633
634#include <boost/random/detail/enable_warnings.hpp>
635
636#endif
637