1 // Written in the D programming language. 2 3 /** 4 * Implements algorithms that transform the output of uniform random 5 * number generators into other random behaviours, such as shuffling, 6 * sampling, and so on. Typically these are implemented as range 7 * objects that wrap a provided RNG instance. 8 * 9 * As with the random number generators provided elsewhere in this 10 * package, the range objects implemented here are implemented as final 11 * classes to enforce reference semantics. They also assume that the 12 * RNGs they make use of have reference type semantics. User-supplied 13 * value-type RNGs may result in incorrect behaviour when used with 14 * these objects. 15 * 16 * Copyright: © 2008-2011 Andrei Alexandrescu, 17 * 2012-2014 Joseph Rushton Wakeling 18 * 19 * License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0). 20 * 21 * Authors: $(WEB erdani.org, Andrei Alexandrescu), 22 * $(WEB braingam.es, Joseph Rushton Wakeling) 23 * 24 * Source: $(HAPSRC hap/random/_adaptor.d) 25 */ 26 module hap.random.adaptor; 27 28 import hap.random.generator, hap.random.traits; 29 30 import std.range, std.traits; 31 32 // Cover 33 /** 34 * Covers a given range $(D r) in a random manner, i.e. goes through each 35 * element of $(D r) once and only once, but in a random order. $(D r) 36 * must be a random-access range with length. 37 * 38 * If no random number generator is passed to $(D cover), the thread-global 39 * RNG rndGen will be used as the source of randomness. 40 * 41 * Example: 42 * ---- 43 * int[] a = [ 0, 1, 2, 3, 4, 5, 6, 7, 8 ]; 44 * foreach (e; cover(a)) 45 * { 46 * writeln(e); 47 * } 48 * 49 * // using a specified random number generator 50 * auto gen = new Random(unpredictableSeed); 51 * foreach (e; cover(a, gen)) 52 * { 53 * writeln(e); 54 * } 55 * ---- 56 * 57 * The alias $(D randomCover) is available to ease migration for code 58 * written using $(PHOBOSXREF random, randomCover). 59 */ 60 final class Cover(Range, UniformRNG) 61 if (isRandomAccessRange!Range && isUniformRNG!UniformRNG) 62 { 63 private: 64 bool[] _chosen; 65 size_t _current; 66 size_t _alreadyChosen; 67 Range _input; 68 UniformRNG _rng; 69 70 public: 71 this(Range input, UniformRNG rng) 72 { 73 _input = input; 74 _rng = rng; 75 _chosen.length = _input.length; 76 _alreadyChosen = 0; 77 popFront(); 78 } 79 80 this(typeof(this) that) 81 { 82 _chosen = that._chosen.dup; 83 _current = that._current; 84 _alreadyChosen = that._alreadyChosen; 85 _input = that._input.save; 86 _rng = that._rng; 87 } 88 89 /// Range primitives. 90 bool empty() @property @safe const nothrow pure 91 { 92 return _alreadyChosen > _input.length; 93 } 94 95 /// ditto 96 auto ref front() @property @safe const nothrow pure 97 in 98 { 99 assert(_alreadyChosen > 0); 100 } 101 body 102 { 103 return _input[_current]; 104 } 105 106 /// ditto 107 void popFront() 108 { 109 if (_alreadyChosen >= _input.length) 110 { 111 // No more elements 112 ++_alreadyChosen; // means we're done 113 return; 114 } 115 116 size_t k = _input.length - _alreadyChosen; 117 size_t i; 118 119 foreach (e; _input) 120 { 121 if (_chosen[i]) 122 { 123 ++i; 124 continue; 125 } 126 127 // Roll a dice with k faces 128 import hap.random.distribution; 129 auto chooseMe = uniform(0, k, _rng) == 0; 130 assert(k > 1 || chooseMe); 131 132 if (chooseMe) 133 { 134 _chosen[i] = true; 135 _current = i; 136 ++_alreadyChosen; 137 return; 138 } 139 140 --k; 141 ++i; 142 } 143 } 144 145 /// ditto 146 static if (hasLength!Range) 147 { 148 size_t length() @property @safe const nothrow pure 149 in 150 { 151 assert(_alreadyChosen > 0); 152 } 153 body 154 { 155 return (1 + _input.length) - _alreadyChosen; 156 } 157 } 158 159 /// ditto 160 static if (isForwardRange!UniformRNG) 161 { 162 typeof(this) save() @property 163 { 164 auto ret = new typeof(this)(this); 165 ret._rng = this._rng.save; 166 return ret; 167 } 168 } 169 170 } 171 172 /// ditto 173 auto cover(Range, UniformRNG)(Range r, UniformRNG rng) 174 if (isRandomAccessRange!Range && isUniformRNG!UniformRNG) 175 { 176 return new Cover!(Range, UniformRNG)(r, rng); 177 } 178 179 /// ditto 180 auto cover(Range)(Range r) 181 if (isRandomAccessRange!Range) 182 { 183 return new Cover!(Range, Random)(r, rndGen); 184 } 185 186 /// ditto 187 alias randomCover = cover; 188 189 unittest 190 { 191 import std.algorithm, std.string, std.typetuple; 192 193 int[] a = [ 0, 1, 2, 3, 4, 5, 6, 7, 8 ]; 194 195 foreach (UniformRNG; TypeTuple!(void, UniformRNGTypes)) 196 { 197 static if (is(UniformRNG == void)) 198 { 199 auto c = cover(a); 200 static assert(isInputRange!(typeof(c))); 201 static assert((isForwardRange!Random && isForwardRange!(typeof(c))) || 202 (!isForwardRange!Random && !isForwardRange!(typeof(c)))); 203 } 204 else 205 { 206 auto rng = new UniformRNG(unpredictableSeed); 207 auto c = cover(a, rng); 208 static assert(isInputRange!(typeof(c))); 209 static assert((isForwardRange!UniformRNG && isForwardRange!(typeof(c))) || 210 (!isForwardRange!UniformRNG && !isForwardRange!(typeof(c)))); 211 } 212 213 auto c2 = c.save; 214 215 int[] b = new int[9]; 216 uint i; 217 foreach (e, e2; lockstep(c, c2)) 218 { 219 b[i++] = e; 220 assert(e == e2); 221 } 222 sort(b); 223 assert(a == b, format("%s", b)); 224 } 225 } 226 227 // Sample 228 /** 229 * Selects a random subsample out of $(D r), containing exactly $(D n) 230 * elements. The order of elements is the same as in the original 231 * range. The total length of $(D r) must be known. If $(D total) is 232 * passed in, the total number of elements available to sample is 233 * considered to be $(D total). Otherwise, $(D Sample) uses 234 * $(D r.length). 235 * 236 * $(D Sample) implements Jeffrey Scott Vitter's Algorithm D 237 * (see Vitter $(WEB dx.doi.org/10.1145/358105.893, 1984), $(WEB 238 * dx.doi.org/10.1145/23002.23003, 1987)), which selects a sample 239 * of size $(D n) in O(n) steps and requiring O(n) random variates, 240 * regardless of the size of the data being sampled. The exception 241 * to this is if traversing k elements on the input range is itself 242 * an O(k) operation (e.g. when sampling lines from an input file), 243 * in which case the sampling calculation will inevitably be of 244 * O(total). 245 * 246 * $(D Sample) will throw an error if $(D total) is verifiably less 247 * than the total number of elements available in the input, or if 248 * $(D n > total). 249 * 250 * If no random number generator is passed to $(D sample), the 251 * thread-local default RNG rndGen will be used as the source of 252 * randomness. 253 * 254 * Example: 255 * ---- 256 * int[] arr = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]; 257 * // Print 5 random elements picked from arr 258 * foreach (e; sample(arr, 5)) 259 * { 260 * writeln(e); 261 * } 262 * 263 * // Print 5 random elements picked from arr, 264 * // using a specified random number generator 265 * auto gen = new Random(unpredictableSeed); 266 * foreach (e; sample(arr, 5, gen)) 267 * { 268 * writeln(e); 269 * } 270 * ---- 271 * 272 * The alias $(D randomSample) is available to ease migration for 273 * code written using $(PHOBOSXREF random, randomSample). 274 */ 275 final class Sample(Range, UniformRNG) 276 if (isInputRange!Range && isUniformRNG!UniformRNG) 277 { 278 private: 279 import hap.random.distribution; 280 enum ushort _alphaInverse = 13; // Vitter's recommended value. 281 enum Skip { None, A, D }; 282 size_t _available, _toSelect, _index; 283 double _Vprime; 284 Range _input; 285 Skip _skip = Skip.None; 286 UniformRNG _rng; 287 288 // Randomly reset the value of _Vprime. 289 double newVprime(size_t remaining) 290 { 291 return uniform!"()"(0.0, 1.0, _rng) ^^ (1.0 / remaining); 292 } 293 294 void prime() 295 { 296 if (empty) 297 { 298 return; 299 } 300 assert(_available && _available >= _toSelect); 301 immutable size_t s = skip(); 302 assert(s + _toSelect <= _available); 303 static if (hasLength!Range) 304 { 305 assert(s + _toSelect <= _input.length); 306 } 307 assert(!_input.empty); 308 _input.popFrontExactly(s); 309 _index += s; 310 _available -= s; 311 assert(_available > 0); 312 } 313 314 size_t skip() 315 in 316 { 317 assert(_skip != Skip.None); 318 } 319 body 320 { 321 // Step D1: if the number of points still to select is greater 322 // than a certain proportion of the remaining data points, i.e. 323 // if n >= alpha * N where alpha = 1/13, we carry out the 324 // sampling with Algorithm A. 325 if (_skip == Skip.A) 326 { 327 return skipA(); 328 } 329 else if ((_alphaInverse * _toSelect) > _available) 330 { 331 // We shouldn't get here unless the current selected 332 // algorithm is D. 333 assert(_skip == Skip.D); 334 _skip = Skip.A; 335 return skipA(); 336 } 337 else 338 { 339 assert(_skip == Skip.D); 340 return skipD(); 341 } 342 } 343 344 /* Vitter's Algorithm A, used when the ratio of needed sample values 345 * to remaining data values is sufficiently large. 346 */ 347 size_t skipA() 348 { 349 size_t s; 350 double v, quot, top; 351 352 if (_toSelect==1) 353 { 354 s = uniform(0, _available, _rng); 355 } 356 else 357 { 358 top = _available - _toSelect; 359 quot = top / _available; 360 v = uniform!"()"(0.0, 1.0, _rng); 361 362 while (quot > v) 363 { 364 ++s; 365 quot *= (top - s) / (_available - s); 366 } 367 } 368 369 return s; 370 } 371 372 /* Vitter's Algorithm D. For an extensive description of the algorithm 373 * and its rationale, see: 374 * 375 * * Vitter, J.S. (1984), "Faster methods for random sampling", 376 * Commun. ACM 27(7): 703--718 377 * 378 * * Vitter, J.S. (1987) "An efficient algorithm for sequential 379 * random sampling", ACM Trans. Math. Softw. 13(1): 58-67. 380 * 381 * Variable names are chosen to match those in Vitter's paper. 382 */ 383 size_t skipD() 384 { 385 import std.math; 386 387 // Confirm that the check in Step D1 is valid and we 388 // haven't been sent here by mistake 389 assert((_alphaInverse * _toSelect) <= _available); 390 391 // Now it's safe to use the standard Algorithm D mechanism. 392 if (_toSelect > 1) 393 { 394 size_t s; 395 size_t qu1 = 1 + _available - _toSelect; 396 double x, y1; 397 398 assert(!_Vprime.isNaN); 399 400 while (true) 401 { 402 // Step D2: set values of x and u. 403 while (true) 404 { 405 x = _available * (1 - _Vprime); 406 s = cast(size_t) trunc(x); 407 408 if (s < qu1) 409 { 410 break; 411 } 412 413 _Vprime = newVprime(_toSelect); 414 } 415 416 double u = uniform!"()"(0.0, 1.0, _rng); 417 418 y1 = (u * (cast(double) _available) / qu1) ^^ (1.0/(_toSelect - 1)); 419 420 _Vprime = y1 * ((-x/_available)+1.0) * ( qu1/( (cast(double) qu1) - s ) ); 421 422 // Step D3: if _Vprime <= 1.0 our work is done and we return S. 423 // Otherwise ... 424 if (_Vprime > 1.0) 425 { 426 size_t top = _available - 1, limit; 427 double y2 = 1.0, bottom; 428 429 if (_toSelect > (s+1)) 430 { 431 bottom = _available - _toSelect; 432 limit = _available - s; 433 } 434 else 435 { 436 bottom = _available - (s+1); 437 limit = qu1; 438 } 439 440 foreach (size_t t; limit .. _available) 441 { 442 y2 *= top/bottom; 443 top--; 444 bottom--; 445 } 446 447 // Step D4: decide whether or not to accept the current value of S. 448 if (_available/(_available-x) < y1 * (y2 ^^ (1.0/(_toSelect-1)))) 449 { 450 // If it's not acceptable, we generate a new value of _Vprime 451 // and go back to the start of the for (;;) loop. 452 _Vprime = newVprime(_toSelect); 453 } 454 else 455 { 456 // If it's acceptable we generate a new value of _Vprime 457 // based on the remaining number of sample points needed, 458 // and return S. 459 _Vprime = newVprime(_toSelect-1); 460 return s; 461 } 462 } 463 else 464 { 465 // Return if condition D3 satisfied. 466 return s; 467 } 468 } 469 } 470 else 471 { 472 // If only one sample point remains to be taken ... 473 return cast(size_t) trunc(_available * _Vprime); 474 } 475 } 476 477 public: 478 static if (hasLength!Range) 479 { 480 this(Range input, size_t howMany, UniformRNG rng) 481 { 482 this(input, howMany, input.length, rng); 483 } 484 } 485 486 this(Range input, size_t howMany, size_t total, UniformRNG rng) 487 { 488 import std.exception, std.string : format; 489 _input = input; 490 _rng = rng; 491 _available = total; 492 _toSelect = howMany; 493 enforce(_toSelect <= _available, 494 format("Sample: cannot sample %s items when only %s are available", 495 _toSelect, _available)); 496 static if (hasLength!Range) 497 { 498 enforce(_available <= _input.length, 499 format("Sample: specified %s items as available when input contains only %s", 500 _available, _input.length)); 501 } 502 503 /* We can save ourselves a random variate by checking right 504 * at the beginning if we should use Algorithm A. 505 */ 506 if ((_alphaInverse * _toSelect) > _available) 507 { 508 _skip = Skip.A; 509 } 510 else 511 { 512 _skip = Skip.D; 513 _Vprime = newVprime(_toSelect); 514 } 515 prime(); 516 } 517 518 static if (isForwardRange!Range) 519 { 520 this(typeof(this) that) 521 { 522 this._available = that._available; 523 this._toSelect = that._toSelect; 524 this._index = that._index; 525 this._Vprime = that._Vprime; 526 this._input = that._input.save; 527 this._skip = that._skip; 528 this._rng = that._rng; 529 } 530 } 531 532 /// Range primitives. 533 bool empty() @property @safe const nothrow pure 534 { 535 return _toSelect == 0; 536 } 537 538 /// ditto 539 auto ref front() @property 540 in 541 { 542 /* Check if sample has been initialized and that 543 * there are still sample points left to generate 544 */ 545 assert(_skip != Skip.None); 546 assert(!empty); 547 } 548 body 549 { 550 return _input.front; 551 } 552 553 /// ditto 554 void popFront() 555 in 556 { 557 // Check that sample has been initialized 558 assert(_skip != Skip.None); 559 } 560 body 561 { 562 _input.popFront(); 563 --_available; 564 --_toSelect; 565 ++_index; 566 prime(); 567 } 568 569 /// ditto 570 static if (isForwardRange!Range && isForwardRange!UniformRNG) 571 { 572 typeof(this) save() @property 573 { 574 auto ret = new typeof(this)(this); 575 ret._rng = this._rng.save; 576 return ret; 577 } 578 } 579 580 /// ditto 581 size_t length() @property @safe const nothrow pure 582 { 583 return _toSelect; 584 } 585 586 /// Returns the _index of the visited record. 587 size_t index() 588 in 589 { 590 assert(_skip != Skip.None); 591 } 592 body 593 { 594 return _index; 595 } 596 597 } 598 599 /// ditto 600 auto sample(Range)(Range r, size_t n, size_t total) 601 if (isInputRange!Range) 602 { 603 return new Sample!(Range, Random)(r, n, total, rndGen); 604 } 605 606 /// ditto 607 auto sample(Range)(Range r, size_t n) 608 if (isInputRange!Range && hasLength!Range) 609 { 610 return new Sample!(Range, Random)(r, n, r.length, rndGen); 611 } 612 613 /// ditto 614 auto sample(Range, UniformRNG)(Range r, size_t n, size_t total, UniformRNG rng) 615 if (isInputRange!Range && isUniformRNG!UniformRNG) 616 { 617 return new Sample!(Range, UniformRNG)(r, n, total, rng); 618 } 619 620 /// ditto 621 auto sample(Range, UniformRNG)(Range r, size_t n, UniformRNG rng) 622 if (isInputRange!Range && hasLength!Range && isUniformRNG!UniformRNG) 623 { 624 return new Sample!(Range, UniformRNG)(r, n, r.length, rng); 625 } 626 627 /// ditto 628 alias randomSample = sample; 629 630 unittest 631 { 632 // For test purposes, an infinite input range 633 struct TestInputRange 634 { 635 private auto r = recurrence!"a[n-1] + 1"(0); 636 bool empty() @property const pure nothrow { return r.empty; } 637 auto front() @property pure nothrow { return r.front; } 638 void popFront() pure nothrow { r.popFront(); } 639 } 640 static assert(isInputRange!TestInputRange); 641 static assert(!isForwardRange!TestInputRange); 642 643 int[] a = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ]; 644 645 foreach (UniformRNG; UniformRNGTypes) 646 { 647 auto rng = new UniformRNG(unpredictableSeed); 648 /* First test the most general case: sample of input range, with and 649 * without a specified random number generator. 650 */ 651 static assert(isInputRange!(typeof(sample(TestInputRange(), 5, 10)))); 652 static assert(isInputRange!(typeof(sample(TestInputRange(), 5, 10, rng)))); 653 static assert(!isForwardRange!(typeof(sample(TestInputRange(), 5, 10)))); 654 static assert(!isForwardRange!(typeof(sample(TestInputRange(), 5, 10, rng)))); 655 // test case with range initialized by direct call to struct 656 { 657 auto s = new Sample!(TestInputRange, UniformRNG) 658 (TestInputRange(), 5, 10, rng); 659 static assert(isInputRange!(typeof(s))); 660 static assert(!isForwardRange!(typeof(s))); 661 assert(s.length == 5); 662 assert(s._available == 10 - s.index); 663 } 664 665 /* Now test the case of an input range with length. We ignore the cases 666 * already covered by the previous tests. 667 */ 668 static assert(isInputRange!(typeof(sample(TestInputRange().takeExactly(10), 5)))); 669 static assert(isInputRange!(typeof(sample(TestInputRange().takeExactly(10), 5, rng)))); 670 static assert(!isForwardRange!(typeof(sample(TestInputRange().takeExactly(10), 5)))); 671 static assert(!isForwardRange!(typeof(sample(TestInputRange().takeExactly(10), 5, rng)))); 672 // test case with range initialized by direct call to struct 673 { 674 auto s = new Sample!(typeof(TestInputRange().takeExactly(10)), UniformRNG) 675 (TestInputRange().takeExactly(10), 5, 10, rng); 676 static assert(isInputRange!(typeof(s))); 677 static assert(!isForwardRange!(typeof(s))); 678 assert(s.length == 5); 679 assert(s._available == 10 - s.index); 680 } 681 682 // Now test the case of providing a forward range as input. 683 static assert(isForwardRange!(typeof(sample(a, 5)))); 684 static if (isForwardRange!UniformRNG) 685 { 686 static assert(isForwardRange!(typeof(sample(a, 5, rng)))); 687 // ... and test with range initialized directly 688 { 689 auto s = new Sample!(int[], UniformRNG)(a, 5, rng); 690 static assert(isForwardRange!(typeof(s))); 691 assert(s.length == 5); 692 assert(s._available == a.length - s.index); 693 } 694 } 695 else 696 { 697 static assert(isInputRange!(typeof(sample(a, 5, rng)))); 698 static assert(!isForwardRange!(typeof(sample(a, 5, rng)))); 699 // ... and test with range initialized directly 700 { 701 auto s = new Sample!(int[], UniformRNG)(a, 5, rng); 702 static assert(isInputRange!(typeof(s))); 703 static assert(!isForwardRange!(typeof(s))); 704 assert(s.length == 5); 705 assert(s._available == a.length - s.index); 706 } 707 } 708 709 /* Check that sample will throw an error if we claim more 710 * items are available than there actually are, or if we try to 711 * sample more items than are available. */ 712 import std.exception; 713 assert(collectExceptionMsg(sample(a, 5, 15)) == "Sample: specified 15 items as available when input contains only 10"); 714 assert(collectExceptionMsg(sample(a, 15)) == "Sample: cannot sample 15 items when only 10 are available"); 715 assert(collectExceptionMsg(sample(a, 9, 8)) == "Sample: cannot sample 9 items when only 8 are available"); 716 assert(collectExceptionMsg(sample(TestInputRange(), 12, 11)) == "Sample: cannot sample 12 items when only 11 are available"); 717 718 /* Check that sampling algorithm never accidentally overruns the end of 719 * the input range. If input is an InputRange without .length, this 720 * relies on the user specifying the total number of available items 721 * correctly. 722 */ 723 { 724 uint i = 0; 725 foreach (e; sample(a, a.length)) 726 { 727 assert(e == i); 728 ++i; 729 } 730 assert(i == a.length); 731 732 i = 0; 733 foreach (e; sample(TestInputRange(), 17, 17)) 734 { 735 assert(e == i); 736 ++i; 737 } 738 assert(i == 17); 739 } 740 741 742 // Check length properties of random samples. 743 assert(sample(a, 5).length == 5); 744 assert(sample(a, 5, 10).length == 5); 745 assert(sample(a, 5, rng).length == 5); 746 assert(sample(a, 5, 10, rng).length == 5); 747 assert(sample(TestInputRange(), 5, 10).length == 5); 748 assert(sample(TestInputRange(), 5, 10, rng).length == 5); 749 750 // ... and emptiness! 751 assert(sample(a, 0).empty); 752 assert(sample(a, 0, 5).empty); 753 assert(sample(a, 0, rng).empty); 754 assert(sample(a, 0, 5, rng).empty); 755 assert(sample(TestInputRange(), 0, 10).empty); 756 assert(sample(TestInputRange(), 0, 10, rng).empty); 757 758 /* Test that the (lazy) evaluation of random samples works correctly. 759 * 760 * We cover 2 different cases: a sample where the ratio of sample points 761 * to total points is greater than the threshold for using Algorithm, and 762 * one where the ratio is small enough (< 1/13) for Algorithm D to be used. 763 * 764 * For each, we also cover the case with and without a specified RNG. 765 */ 766 { 767 uint i = 0; 768 769 // Small sample/source ratio, no specified RNG. 770 foreach (e; sample(cover(a), 5)) 771 { 772 ++i; 773 } 774 assert(i == 5); 775 776 // Small sample/source ratio, specified RNG. 777 i = 0; 778 foreach (e; sample(cover(a), 5, rng)) 779 { 780 ++i; 781 } 782 assert(i == 5); 783 784 // Large sample/source ratio, no specified RNG. 785 i = 0; 786 foreach (e; sample(TestInputRange(), 123, 123_456)) 787 { 788 ++i; 789 } 790 assert(i == 123); 791 792 // Large sample/source ratio, specified RNG. 793 i = 0; 794 foreach (e; sample(TestInputRange(), 123, 123_456, rng)) 795 { 796 ++i; 797 } 798 assert(i == 123); 799 800 /* Sample/source ratio large enough to start with Algorithm D, 801 * small enough to switch to Algorithm A. 802 */ 803 i = 0; 804 foreach (e; sample(TestInputRange(), 10, 131)) 805 { 806 ++i; 807 } 808 assert(i == 10); 809 } 810 811 // Test that the .index property works correctly 812 { 813 auto s1 = sample(TestInputRange(), 654, 654_321); 814 for (; !s1.empty; s1.popFront()) 815 { 816 assert(s1.front == s1.index); 817 } 818 819 auto s2 = sample(TestInputRange(), 654, 654_321, rng); 820 for (; !s2.empty; s2.popFront()) 821 { 822 assert(s2.front == s2.index); 823 } 824 825 /* Check that it also works if .index is called before .front. 826 * See: http://d.puremagic.com/issues/show_bug.cgi?id=10322 827 */ 828 auto s3 = sample(TestInputRange(), 654, 654_321); 829 for (; !s3.empty; s3.popFront()) 830 { 831 assert(s3.index == s3.front); 832 } 833 834 auto s4 = sample(TestInputRange(), 654, 654_321, rng); 835 for (; !s4.empty; s4.popFront()) 836 { 837 assert(s4.index == s4.front); 838 } 839 } 840 841 /* Test behaviour if .popFront() is called before sample is read. 842 * This is a rough-and-ready check that the statistical properties 843 * are in the ballpark -- not a proper validation of statistical 844 * quality! This incidentally also checks for reference-type 845 * initialization bugs, as the foreach() loop will operate on a 846 * copy of the popFronted (and hence initialized) sample. 847 */ 848 { 849 size_t count1, count99; 850 foreach(_; 0 .. 100_000) 851 { 852 auto s = sample(iota(100), 5); 853 s.popFront(); 854 foreach(e; s) 855 { 856 /* This is a sequential sampling process: 0 can only be 857 * the first sample point, so _can't_ be in the remainder 858 * of the sample after .popFront() is called. 859 */ 860 assert(e != 0); 861 862 if (e == 1) 863 { 864 ++count1; 865 } 866 else if (e == 99) 867 { 868 ++count99; 869 } 870 } 871 } 872 /* We already established that because this is a sequential sampling 873 * process, 0 can only be the _first_ sample point and not one of the 874 * sample points remaining after .popFront() is first called. By the 875 * same argument, 1 can only be in the remainder if it's the 2nd point 876 * of the whole sample, and hence if 0 was the first; probability of 0 877 * being first and 1 second is 5/100 * 4/99 (thank you, Algorithm S:-) 878 * and so the mean count of 1 should be about 202. 879 880 * Conversely, 99 can only be the _last_ sample point to be picked, 881 * so its probability of inclusion should be independent of the initial 882 * .popFront() and it should occur with frequency 5/100, hence its count 883 * should be about 5000. 884 * 885 * Ideally we would perform far more trials and have a much higher 886 * tolerance for this unittest, but the time required to do so is 887 * out of all proportion to all other Phobos unittests, so we have 888 * to put up with quite high variance in the outcome. The current 889 * test should at least highlight any extreme biases and could be 890 * backed up by a hardcore external random test suite. 891 */ 892 import std.string : format; 893 assert(count1 < 300, format("1: %s > 300.", count1)); 894 assert(4_700 < count99, format("99: %s <= 4700.", count99)); 895 assert(count99 < 5_300, format("99: %s >= 5300.", count99)); 896 } 897 898 /* Test that the save property works where input is a forward range, 899 * and Sample is using a (forward range) random number generator 900 * that is not rndGen. 901 */ 902 static if (isForwardRange!UniformRNG) 903 { 904 auto s1 = sample(a, 5, rng); 905 auto s2 = s1.save; 906 assert(s1.array() == s2.array()); 907 } 908 909 // Bugzilla 8314 910 { 911 UniformRNG gen = new UniformRNG; 912 913 auto sampleFirst(UniformRNG)(uint seed, UniformRNG gen) 914 if (isUniformRNG!UniformRNG) 915 { 916 gen.seed(seed); 917 return sample(a, 1, gen).front; 918 } 919 920 // Start from 1 because not all RNGs accept 0 as seed. 921 immutable fst = sampleFirst(1, gen); 922 uint n = 1; 923 while (sampleFirst(++n, gen) == fst && n < n.max) {} 924 assert(n < n.max); 925 } 926 } 927 } 928 929 /** 930 * Shuffles elements of $(D r) using $(D gen) as a shuffler. $(D r) must be 931 * a random-access range with length. If no RNG is specified, $(D rndGen) 932 * will be used. Returns the newly-shuffled range, and so is composable: 933 * 934 * Example: 935 * -------- 936 * // generates the array [0, 1, ..., 10], 937 * // shuffles it, and writes to console 938 * iota(10).array.shuffle.writeln; 939 * -------- 940 * 941 * A $(D randomShuffle) alias has been provided to ease migration from code 942 * written using $(PHOBOSXREF random, randomShuffle). 943 */ 944 auto shuffle(Range, UniformRNG)(Range r, UniformRNG gen) 945 if(isRandomAccessRange!Range && isUniformRNG!UniformRNG) 946 { 947 return partialShuffle!(Range, UniformRNG)(r, r.length, gen); 948 } 949 950 /// ditto 951 auto shuffle(Range)(Range r) 952 if(isRandomAccessRange!Range) 953 { 954 return shuffle(r, rndGen); 955 } 956 957 /// ditto 958 alias randomShuffle = shuffle; 959 960 unittest 961 { 962 import std.algorithm; 963 964 foreach(UniformRNG; UniformRNGTypes) 965 { 966 // Also tests partialShuffle indirectly. 967 auto a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; 968 auto b = a.dup; 969 auto gen = new UniformRNG(unpredictableSeed); 970 shuffle(a, gen); 971 sort(a); 972 assert(a == b); 973 shuffle(a); 974 sort(a); 975 assert(a == b); 976 } 977 } 978 979 /** 980 * Partially shuffles the elements of $(D r) such that upon returning $(D r[0..n]) 981 * is a random subset of $(D r) and is randomly ordered. $(D r[n..r.length]) 982 * will contain the elements not in $(D r[0..n]). These will be in an undefined 983 * order, but will not be random in the sense that their order after 984 * $(D partialShuffle) returns will not be independent of their order before 985 * $(D partialShuffle) was called. 986 * 987 * $(D r) must be a random-access range with length. $(D n) must be less than 988 * or equal to $(D r.length). If no RNG is specified, $(D rndGen) will be used. 989 */ 990 auto partialShuffle(Range, UniformRNG)(Range r, in size_t n, UniformRNG gen) 991 if(isRandomAccessRange!Range && isUniformRNG!UniformRNG) 992 { 993 import std.algorithm, std.exception, hap.random.distribution; 994 enforce(n <= r.length, "n must be <= r.length for partialShuffle."); 995 foreach (i; 0 .. n) 996 { 997 swapAt(r, i, i + uniform(0, n - i, gen)); 998 } 999 return r; 1000 } 1001 1002 /// ditto 1003 auto partialShuffle(Range)(Range r, in size_t n) 1004 if(isRandomAccessRange!Range) 1005 { 1006 return partialShuffle(r, n, rndGen); 1007 } 1008 1009 unittest 1010 { 1011 import std.algorithm; 1012 1013 foreach(UniformRNG; UniformRNGTypes) 1014 { 1015 auto a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; 1016 auto b = a.dup; 1017 auto gen = new UniformRNG(unpredictableSeed); 1018 partialShuffle(a, 5, gen); 1019 assert(a[5 .. $] == b[5 .. $]); 1020 sort(a[0 .. 5]); 1021 assert(a[0 .. 5] == b[0 .. 5]); 1022 partialShuffle(a, 6); 1023 assert(a[6 .. $] == b[6 .. $]); 1024 sort(a[0 .. 6]); 1025 assert(a[0 .. 6] == b[0 .. 6]); 1026 } 1027 }