1 // Written in the D programming language.
2 
3 /**
4  * Implements algorithms that transform the output of uniform random
5  * number generators into other random behaviours, such as shuffling,
6  * sampling, and so on.  Typically these are implemented as range
7  * objects that wrap a provided RNG instance.
8  *
9  * As with the random number generators provided elsewhere in this
10  * package, the range objects implemented here are implemented as final
11  * classes to enforce reference semantics.  They also assume that the
12  * RNGs they make use of have reference type semantics.  User-supplied
13  * value-type RNGs may result in incorrect behaviour when used with
14  * these objects.
15  *
16  * Copyright: © 2008-2011 Andrei Alexandrescu,
17  *              2012-2014 Joseph Rushton Wakeling
18  *
19  * License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
20  *
21  * Authors: $(WEB erdani.org, Andrei Alexandrescu),
22  *          $(WEB braingam.es, Joseph Rushton Wakeling)
23  *
24  * Source: $(HAPSRC hap/random/_adaptor.d)
25  */
26 module hap.random.adaptor;
27 
28 import hap.random.generator, hap.random.traits;
29 
30 import std.range, std.traits;
31 
32 // Cover
33 /**
34  * Covers a given range $(D r) in a random manner, i.e. goes through each
35  * element of $(D r) once and only once, but in a random order.  $(D r)
36  * must be a random-access range with length.
37  *
38  * If no random number generator is passed to $(D cover), the thread-global
39  * RNG rndGen will be used as the source of randomness.
40  *
41  * Example:
42  * ----
43  * int[] a = [ 0, 1, 2, 3, 4, 5, 6, 7, 8 ];
44  * foreach (e; cover(a))
45  * {
46  *     writeln(e);
47  * }
48  *
49  * // using a specified random number generator
50  * auto gen = new Random(unpredictableSeed);
51  * foreach (e; cover(a, gen))
52  * {
53  *     writeln(e);
54  * }
55  * ----
56  *
57  * The alias $(D randomCover) is available to ease migration for code
58  * written using $(PHOBOSXREF random, randomCover).
59  */
60 final class Cover(Range, UniformRNG)
61     if (isRandomAccessRange!Range && isUniformRNG!UniformRNG)
62 {
63   private:
64     bool[] _chosen;
65     size_t _current;
66     size_t _alreadyChosen;
67     Range _input;
68     UniformRNG _rng;
69 
70   public:
71     this(Range input, UniformRNG rng)
72     {
73         _input = input;
74         _rng = rng;
75         _chosen.length = _input.length;
76         _alreadyChosen = 0;
77         popFront();
78     }
79 
80     this(typeof(this) that)
81     {
82         _chosen = that._chosen.dup;
83         _current = that._current;
84         _alreadyChosen = that._alreadyChosen;
85         _input = that._input.save;
86         _rng = that._rng;
87     }
88 
89     /// Range primitives.
90     bool empty() @property @safe const nothrow pure
91     {
92         return _alreadyChosen > _input.length;
93     }
94 
95     /// ditto
96     auto ref front() @property @safe const nothrow pure
97     in
98     {
99         assert(_alreadyChosen > 0);
100     }
101     body
102     {
103         return _input[_current];
104     }
105 
106     /// ditto
107     void popFront()
108     {
109         if (_alreadyChosen >= _input.length)
110         {
111             // No more elements
112             ++_alreadyChosen; // means we're done
113             return;
114         }
115 
116         size_t k = _input.length - _alreadyChosen;
117         size_t i;
118 
119         foreach (e; _input)
120         {
121             if (_chosen[i])
122             {
123                 ++i;
124                 continue;
125             }
126 
127             // Roll a dice with k faces
128             import hap.random.distribution;
129             auto chooseMe = uniform(0, k, _rng) == 0;
130             assert(k > 1 || chooseMe);
131 
132             if (chooseMe)
133             {
134                 _chosen[i] = true;
135                 _current = i;
136                 ++_alreadyChosen;
137                 return;
138             }
139 
140             --k;
141             ++i;
142         }
143     }
144 
145     /// ditto
146     static if (hasLength!Range)
147     {
148         size_t length() @property @safe const nothrow pure
149         in
150         {
151             assert(_alreadyChosen > 0);
152         }
153         body
154         {
155             return (1 + _input.length) - _alreadyChosen;
156         }
157     }
158 
159     /// ditto
160     static if (isForwardRange!UniformRNG)
161     {
162         typeof(this) save() @property
163         {
164             auto ret = new typeof(this)(this);
165             ret._rng = this._rng.save;
166             return ret;
167         }
168     }
169 
170 }
171 
172 /// ditto
173 auto cover(Range, UniformRNG)(Range r, UniformRNG rng)
174     if (isRandomAccessRange!Range && isUniformRNG!UniformRNG)
175 {
176     return new Cover!(Range, UniformRNG)(r, rng);
177 }
178 
179 /// ditto
180 auto cover(Range)(Range r)
181     if (isRandomAccessRange!Range)
182 {
183     return new Cover!(Range, Random)(r, rndGen);
184 }
185 
186 /// ditto
187 alias randomCover = cover;
188 
189 unittest
190 {
191     import std.algorithm, std.string, std.typetuple;
192 
193     int[] a = [ 0, 1, 2, 3, 4, 5, 6, 7, 8 ];
194 
195     foreach (UniformRNG; TypeTuple!(void, UniformRNGTypes))
196     {
197         static if (is(UniformRNG == void))
198         {
199             auto c = cover(a);
200             static assert(isInputRange!(typeof(c)));
201             static assert((isForwardRange!Random && isForwardRange!(typeof(c))) ||
202                           (!isForwardRange!Random && !isForwardRange!(typeof(c))));
203         }
204         else
205         {
206             auto rng = new UniformRNG(unpredictableSeed);
207             auto c = cover(a, rng);
208             static assert(isInputRange!(typeof(c)));
209             static assert((isForwardRange!UniformRNG && isForwardRange!(typeof(c))) ||
210                           (!isForwardRange!UniformRNG && !isForwardRange!(typeof(c))));
211         }
212 
213         auto c2 = c.save;
214 
215         int[] b = new int[9];
216         uint i;
217         foreach (e, e2; lockstep(c, c2))
218         {
219             b[i++] = e;
220             assert(e == e2);
221         }
222         sort(b);
223         assert(a == b, format("%s", b));
224     }
225 }
226 
227 // Sample
228 /**
229  * Selects a random subsample out of $(D r), containing exactly $(D n)
230  * elements. The order of elements is the same as in the original
231  * range. The total length of $(D r) must be known. If $(D total) is
232  * passed in, the total number of elements available to sample is
233  * considered to be $(D total). Otherwise, $(D Sample) uses
234  * $(D r.length).
235  *
236  * $(D Sample) implements Jeffrey Scott Vitter's Algorithm D
237  * (see Vitter $(WEB dx.doi.org/10.1145/358105.893, 1984), $(WEB
238  * dx.doi.org/10.1145/23002.23003, 1987)), which selects a sample
239  * of size $(D n) in O(n) steps and requiring O(n) random variates,
240  * regardless of the size of the data being sampled.  The exception
241  * to this is if traversing k elements on the input range is itself
242  * an O(k) operation (e.g. when sampling lines from an input file),
243  * in which case the sampling calculation will inevitably be of
244  * O(total).
245  *
246  * $(D Sample) will throw an error if $(D total) is verifiably less
247  * than the total number of elements available in the input, or if
248  * $(D n > total).
249  *
250  * If no random number generator is passed to $(D sample), the
251  * thread-local default RNG rndGen will be used as the source of
252  * randomness.
253  *
254  * Example:
255  * ----
256  * int[] arr = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ];
257  * // Print 5 random elements picked from arr
258  * foreach (e; sample(arr, 5))
259  * {
260  *     writeln(e);
261  * }
262  *
263  * // Print 5 random elements picked from arr,
264  * // using a specified random number generator
265  * auto gen = new Random(unpredictableSeed);
266  * foreach (e; sample(arr, 5, gen))
267  * {
268  *     writeln(e);
269  * }
270  * ----
271  *
272  * The alias $(D randomSample) is available to ease migration for
273  * code written using $(PHOBOSXREF random, randomSample).
274  */
275 final class Sample(Range, UniformRNG)
276     if (isInputRange!Range && isUniformRNG!UniformRNG)
277 {
278   private:
279     import hap.random.distribution;
280     enum ushort _alphaInverse = 13; // Vitter's recommended value.
281     enum Skip { None, A, D };
282     size_t _available, _toSelect, _index;
283     double _Vprime;
284     Range _input;
285     Skip _skip = Skip.None;
286     UniformRNG _rng;
287 
288     // Randomly reset the value of _Vprime.
289     double newVprime(size_t remaining)
290     {
291         return uniform!"()"(0.0, 1.0, _rng) ^^ (1.0 / remaining);
292     }
293 
294     void prime()
295     {
296         if (empty)
297         {
298             return;
299         }
300         assert(_available && _available >= _toSelect);
301         immutable size_t s = skip();
302         assert(s + _toSelect <= _available);
303         static if (hasLength!Range)
304         {
305             assert(s + _toSelect <= _input.length);
306         }
307         assert(!_input.empty);
308         _input.popFrontExactly(s);
309         _index += s;
310         _available -= s;
311         assert(_available > 0);
312     }
313 
314     size_t skip()
315     in
316     {
317         assert(_skip != Skip.None);
318     }
319     body
320     {
321         // Step D1: if the number of points still to select is greater
322         // than a certain proportion of the remaining data points, i.e.
323         // if n >= alpha * N where alpha = 1/13, we carry out the
324         // sampling with Algorithm A.
325         if (_skip == Skip.A)
326         {
327             return skipA();
328         }
329         else if ((_alphaInverse * _toSelect) > _available)
330         {
331             // We shouldn't get here unless the current selected
332             // algorithm is D.
333             assert(_skip == Skip.D);
334             _skip = Skip.A;
335             return skipA();
336         }
337         else
338         {
339             assert(_skip == Skip.D);
340             return skipD();
341         }
342     }
343 
344     /* Vitter's Algorithm A, used when the ratio of needed sample values
345      * to remaining data values is sufficiently large.
346      */
347     size_t skipA()
348     {
349         size_t s;
350         double v, quot, top;
351 
352         if (_toSelect==1)
353         {
354             s = uniform(0, _available, _rng);
355         }
356         else
357         {
358             top = _available - _toSelect;
359             quot = top / _available;
360             v = uniform!"()"(0.0, 1.0, _rng);
361 
362             while (quot > v)
363             {
364                 ++s;
365                 quot *= (top - s) / (_available - s);
366             }
367         }
368 
369         return s;
370     }
371 
372     /* Vitter's Algorithm D.  For an extensive description of the algorithm
373      * and its rationale, see:
374      *
375      *    * Vitter, J.S. (1984), "Faster methods for random sampling",
376      *      Commun. ACM 27(7): 703--718
377      *
378      *    * Vitter, J.S. (1987) "An efficient algorithm for sequential
379      *      random sampling", ACM Trans. Math. Softw. 13(1): 58-67.
380      *
381      * Variable names are chosen to match those in Vitter's paper.
382      */
383     size_t skipD()
384     {
385         import std.math;
386 
387         // Confirm that the check in Step D1 is valid and we
388         // haven't been sent here by mistake
389         assert((_alphaInverse * _toSelect) <= _available);
390 
391         // Now it's safe to use the standard Algorithm D mechanism.
392         if (_toSelect > 1)
393         {
394             size_t s;
395             size_t qu1 = 1 + _available - _toSelect;
396             double x, y1;
397 
398             assert(!_Vprime.isNaN);
399 
400             while (true)
401             {
402                 // Step D2: set values of x and u.
403                 while (true)
404                 {
405                     x = _available * (1 - _Vprime);
406                     s = cast(size_t) trunc(x);
407 
408                     if (s < qu1)
409                     {
410                         break;
411                     }
412 
413                     _Vprime = newVprime(_toSelect);
414                 }
415 
416                 double u = uniform!"()"(0.0, 1.0, _rng);
417 
418                 y1 = (u * (cast(double) _available) / qu1) ^^ (1.0/(_toSelect - 1));
419 
420                 _Vprime = y1 * ((-x/_available)+1.0) * ( qu1/( (cast(double) qu1) - s ) );
421 
422                 // Step D3: if _Vprime <= 1.0 our work is done and we return S.
423                 // Otherwise ...
424                 if (_Vprime > 1.0)
425                 {
426                     size_t top = _available - 1, limit;
427                     double y2 = 1.0, bottom;
428 
429                     if (_toSelect > (s+1))
430                     {
431                         bottom = _available - _toSelect;
432                         limit = _available - s;
433                     }
434                     else
435                     {
436                         bottom = _available - (s+1);
437                         limit = qu1;
438                     }
439 
440                     foreach (size_t t; limit .. _available)
441                     {
442                         y2 *= top/bottom;
443                         top--;
444                         bottom--;
445                     }
446 
447                     // Step D4: decide whether or not to accept the current value of S.
448                     if (_available/(_available-x) < y1 * (y2 ^^ (1.0/(_toSelect-1))))
449                     {
450                         // If it's not acceptable, we generate a new value of _Vprime
451                         // and go back to the start of the for (;;) loop.
452                         _Vprime = newVprime(_toSelect);
453                     }
454                     else
455                     {
456                         // If it's acceptable we generate a new value of _Vprime
457                         // based on the remaining number of sample points needed,
458                         // and return S.
459                         _Vprime = newVprime(_toSelect-1);
460                         return s;
461                     }
462                 }
463                 else
464                 {
465                     // Return if condition D3 satisfied.
466                     return s;
467                 }
468             }
469         }
470         else
471         {
472             // If only one sample point remains to be taken ...
473             return cast(size_t) trunc(_available * _Vprime);
474         }
475     }
476 
477   public:
478     static if (hasLength!Range)
479     {
480         this(Range input, size_t howMany, UniformRNG rng)
481         {
482             this(input, howMany, input.length, rng);
483         }
484     }
485 
486     this(Range input, size_t howMany, size_t total, UniformRNG rng)
487     {
488         import std.exception, std.string : format;
489         _input = input;
490         _rng = rng;
491         _available = total;
492         _toSelect = howMany;
493         enforce(_toSelect <= _available,
494                 format("Sample: cannot sample %s items when only %s are available",
495                        _toSelect, _available));
496         static if (hasLength!Range)
497         {
498             enforce(_available <= _input.length,
499                     format("Sample: specified %s items as available when input contains only %s",
500                            _available, _input.length));
501         }
502 
503         /* We can save ourselves a random variate by checking right
504          * at the beginning if we should use Algorithm A.
505          */
506         if ((_alphaInverse * _toSelect) > _available)
507         {
508             _skip = Skip.A;
509         }
510         else
511         {
512             _skip = Skip.D;
513             _Vprime = newVprime(_toSelect);
514         }
515         prime();
516     }
517 
518     static if (isForwardRange!Range)
519     {
520         this(typeof(this) that)
521         {
522             this._available = that._available;
523             this._toSelect = that._toSelect;
524             this._index = that._index;
525             this._Vprime = that._Vprime;
526             this._input = that._input.save;
527             this._skip = that._skip;
528             this._rng = that._rng;
529         }
530     }
531 
532     /// Range primitives.
533     bool empty() @property @safe const nothrow pure
534     {
535         return _toSelect == 0;
536     }
537 
538     /// ditto
539     auto ref front() @property
540     in
541     {
542         /* Check if sample has been initialized and that
543          * there are still sample points left to generate
544          */
545         assert(_skip != Skip.None);
546         assert(!empty);
547     }
548     body
549     {
550         return _input.front;
551     }
552 
553     /// ditto
554     void popFront()
555     in
556     {
557         // Check that sample has been initialized
558         assert(_skip != Skip.None);
559     }
560     body
561     {
562         _input.popFront();
563         --_available;
564         --_toSelect;
565         ++_index;
566         prime();
567     }
568 
569     /// ditto
570     static if (isForwardRange!Range && isForwardRange!UniformRNG)
571     {
572         typeof(this) save() @property
573         {
574             auto ret = new typeof(this)(this);
575             ret._rng = this._rng.save;
576             return ret;
577         }
578     }
579 
580     /// ditto
581     size_t length() @property @safe const nothrow pure
582     {
583         return _toSelect;
584     }
585 
586     /// Returns the _index of the visited record.
587     size_t index()
588     in
589     {
590         assert(_skip != Skip.None);
591     }
592     body
593     {
594         return _index;
595     }
596 
597 }
598 
599 /// ditto
600 auto sample(Range)(Range r, size_t n, size_t total)
601     if (isInputRange!Range)
602 {
603     return new Sample!(Range, Random)(r, n, total, rndGen);
604 }
605 
606 /// ditto
607 auto sample(Range)(Range r, size_t n)
608     if (isInputRange!Range && hasLength!Range)
609 {
610     return new Sample!(Range, Random)(r, n, r.length, rndGen);
611 }
612 
613 /// ditto
614 auto sample(Range, UniformRNG)(Range r, size_t n, size_t total, UniformRNG rng)
615     if (isInputRange!Range && isUniformRNG!UniformRNG)
616 {
617     return new Sample!(Range, UniformRNG)(r, n, total, rng);
618 }
619 
620 /// ditto
621 auto sample(Range, UniformRNG)(Range r, size_t n, UniformRNG rng)
622     if (isInputRange!Range && hasLength!Range && isUniformRNG!UniformRNG)
623 {
624     return new Sample!(Range, UniformRNG)(r, n, r.length, rng);
625 }
626 
627 /// ditto
628 alias randomSample = sample;
629 
630 unittest
631 {
632     // For test purposes, an infinite input range
633     struct TestInputRange
634     {
635         private auto r = recurrence!"a[n-1] + 1"(0);
636         bool empty() @property const pure nothrow { return r.empty; }
637         auto front() @property pure nothrow { return r.front; }
638         void popFront() pure nothrow { r.popFront(); }
639     }
640     static assert(isInputRange!TestInputRange);
641     static assert(!isForwardRange!TestInputRange);
642 
643     int[] a = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 ];
644 
645     foreach (UniformRNG; UniformRNGTypes)
646     {
647         auto rng = new UniformRNG(unpredictableSeed);
648         /* First test the most general case: sample of input range, with and
649          * without a specified random number generator.
650          */
651         static assert(isInputRange!(typeof(sample(TestInputRange(), 5, 10))));
652         static assert(isInputRange!(typeof(sample(TestInputRange(), 5, 10, rng))));
653         static assert(!isForwardRange!(typeof(sample(TestInputRange(), 5, 10))));
654         static assert(!isForwardRange!(typeof(sample(TestInputRange(), 5, 10, rng))));
655         // test case with range initialized by direct call to struct
656         {
657             auto s = new Sample!(TestInputRange, UniformRNG)
658                                 (TestInputRange(), 5, 10, rng);
659             static assert(isInputRange!(typeof(s)));
660             static assert(!isForwardRange!(typeof(s)));
661             assert(s.length == 5);
662             assert(s._available == 10 - s.index);
663         }
664 
665         /* Now test the case of an input range with length.  We ignore the cases
666          * already covered by the previous tests.
667          */
668         static assert(isInputRange!(typeof(sample(TestInputRange().takeExactly(10), 5))));
669         static assert(isInputRange!(typeof(sample(TestInputRange().takeExactly(10), 5, rng))));
670         static assert(!isForwardRange!(typeof(sample(TestInputRange().takeExactly(10), 5))));
671         static assert(!isForwardRange!(typeof(sample(TestInputRange().takeExactly(10), 5, rng))));
672         // test case with range initialized by direct call to struct
673         {
674             auto s = new Sample!(typeof(TestInputRange().takeExactly(10)), UniformRNG)
675                                 (TestInputRange().takeExactly(10), 5, 10, rng);
676             static assert(isInputRange!(typeof(s)));
677             static assert(!isForwardRange!(typeof(s)));
678             assert(s.length == 5);
679             assert(s._available == 10 - s.index);
680         }
681 
682         // Now test the case of providing a forward range as input.
683         static assert(isForwardRange!(typeof(sample(a, 5))));
684         static if (isForwardRange!UniformRNG)
685         {
686             static assert(isForwardRange!(typeof(sample(a, 5, rng))));
687             // ... and test with range initialized directly
688             {
689                 auto s = new Sample!(int[], UniformRNG)(a, 5, rng);
690                 static assert(isForwardRange!(typeof(s)));
691                 assert(s.length == 5);
692                 assert(s._available == a.length - s.index);
693             }
694         }
695         else
696         {
697             static assert(isInputRange!(typeof(sample(a, 5, rng))));
698             static assert(!isForwardRange!(typeof(sample(a, 5, rng))));
699             // ... and test with range initialized directly
700             {
701                 auto s = new Sample!(int[], UniformRNG)(a, 5, rng);
702                 static assert(isInputRange!(typeof(s)));
703                 static assert(!isForwardRange!(typeof(s)));
704                 assert(s.length == 5);
705                 assert(s._available == a.length - s.index);
706             }
707         }
708 
709         /* Check that sample will throw an error if we claim more
710          * items are available than there actually are, or if we try to
711          * sample more items than are available. */
712         import std.exception;
713         assert(collectExceptionMsg(sample(a, 5, 15)) == "Sample: specified 15 items as available when input contains only 10");
714         assert(collectExceptionMsg(sample(a, 15)) == "Sample: cannot sample 15 items when only 10 are available");
715         assert(collectExceptionMsg(sample(a, 9, 8)) == "Sample: cannot sample 9 items when only 8 are available");
716         assert(collectExceptionMsg(sample(TestInputRange(), 12, 11)) == "Sample: cannot sample 12 items when only 11 are available");
717 
718         /* Check that sampling algorithm never accidentally overruns the end of
719          * the input range.  If input is an InputRange without .length, this
720          * relies on the user specifying the total number of available items
721          * correctly.
722          */
723         {
724             uint i = 0;
725             foreach (e; sample(a, a.length))
726             {
727                 assert(e == i);
728                 ++i;
729             }
730             assert(i == a.length);
731 
732             i = 0;
733             foreach (e; sample(TestInputRange(), 17, 17))
734             {
735                 assert(e == i);
736                 ++i;
737             }
738             assert(i == 17);
739         }
740 
741 
742         // Check length properties of random samples.
743         assert(sample(a, 5).length == 5);
744         assert(sample(a, 5, 10).length == 5);
745         assert(sample(a, 5, rng).length == 5);
746         assert(sample(a, 5, 10, rng).length == 5);
747         assert(sample(TestInputRange(), 5, 10).length == 5);
748         assert(sample(TestInputRange(), 5, 10, rng).length == 5);
749 
750         // ... and emptiness!
751         assert(sample(a, 0).empty);
752         assert(sample(a, 0, 5).empty);
753         assert(sample(a, 0, rng).empty);
754         assert(sample(a, 0, 5, rng).empty);
755         assert(sample(TestInputRange(), 0, 10).empty);
756         assert(sample(TestInputRange(), 0, 10, rng).empty);
757 
758         /* Test that the (lazy) evaluation of random samples works correctly.
759          *
760          * We cover 2 different cases: a sample where the ratio of sample points
761          * to total points is greater than the threshold for using Algorithm, and
762          * one where the ratio is small enough (< 1/13) for Algorithm D to be used.
763          *
764          * For each, we also cover the case with and without a specified RNG.
765          */
766         {
767             uint i = 0;
768 
769             // Small sample/source ratio, no specified RNG.
770             foreach (e; sample(cover(a), 5))
771             {
772                 ++i;
773             }
774             assert(i == 5);
775 
776             // Small sample/source ratio, specified RNG.
777             i = 0;
778             foreach (e; sample(cover(a), 5, rng))
779             {
780                 ++i;
781             }
782             assert(i == 5);
783 
784             // Large sample/source ratio, no specified RNG.
785             i = 0;
786             foreach (e; sample(TestInputRange(), 123, 123_456))
787             {
788                 ++i;
789             }
790             assert(i == 123);
791 
792             // Large sample/source ratio, specified RNG.
793             i = 0;
794             foreach (e; sample(TestInputRange(), 123, 123_456, rng))
795             {
796                 ++i;
797             }
798             assert(i == 123);
799 
800             /* Sample/source ratio large enough to start with Algorithm D,
801              * small enough to switch to Algorithm A.
802              */
803             i = 0;
804             foreach (e; sample(TestInputRange(), 10, 131))
805             {
806                 ++i;
807             }
808             assert(i == 10);
809         }
810 
811         // Test that the .index property works correctly
812         {
813             auto s1 = sample(TestInputRange(), 654, 654_321);
814             for (; !s1.empty; s1.popFront())
815             {
816                 assert(s1.front == s1.index);
817             }
818 
819             auto s2 = sample(TestInputRange(), 654, 654_321, rng);
820             for (; !s2.empty; s2.popFront())
821             {
822                 assert(s2.front == s2.index);
823             }
824 
825             /* Check that it also works if .index is called before .front.
826              * See: http://d.puremagic.com/issues/show_bug.cgi?id=10322
827              */
828             auto s3 = sample(TestInputRange(), 654, 654_321);
829             for (; !s3.empty; s3.popFront())
830             {
831                 assert(s3.index == s3.front);
832             }
833 
834             auto s4 = sample(TestInputRange(), 654, 654_321, rng);
835             for (; !s4.empty; s4.popFront())
836             {
837                 assert(s4.index == s4.front);
838             }
839         }
840 
841         /* Test behaviour if .popFront() is called before sample is read.
842          * This is a rough-and-ready check that the statistical properties
843          * are in the ballpark -- not a proper validation of statistical
844          * quality!  This incidentally also checks for reference-type
845          * initialization bugs, as the foreach() loop will operate on a
846          * copy of the popFronted (and hence initialized) sample.
847          */
848         {
849             size_t count1, count99;
850             foreach(_; 0 .. 100_000)
851             {
852                 auto s = sample(iota(100), 5);
853                 s.popFront();
854                 foreach(e; s)
855                 {
856                     /* This is a sequential sampling process: 0 can only be
857                      * the first sample point, so _can't_ be in the remainder
858                      * of the sample after .popFront() is called.
859                      */
860                     assert(e != 0);
861 
862                     if (e == 1)
863                     {
864                         ++count1;
865                     }
866                     else if (e == 99)
867                     {
868                         ++count99;
869                     }
870                 }
871             }
872             /* We already established that because this is a sequential sampling
873              * process, 0 can only be the _first_ sample point and not one of the
874              * sample points remaining after .popFront() is first called.  By the
875              * same argument, 1 can only be in the remainder if it's the 2nd point
876              * of the whole sample, and hence if 0 was the first; probability of 0
877              * being first and 1 second is 5/100 * 4/99 (thank you, Algorithm S:-)
878              * and so the mean count of 1 should be about 202.
879 
880              * Conversely, 99 can only be the _last_ sample point to be picked,
881              * so its probability of inclusion should be independent of the initial
882              * .popFront() and it should occur with frequency 5/100, hence its count
883              * should be about 5000.
884              *
885              * Ideally we would perform far more trials and have a much higher
886              * tolerance for this unittest, but the time required to do so is
887              * out of all proportion to all other Phobos unittests, so we have
888              * to put up with quite high variance in the outcome.  The current
889              * test should at least highlight any extreme biases and could be
890              * backed up by a hardcore external random test suite.
891              */
892             import std.string : format;
893             assert(count1 < 300, format("1: %s > 300.", count1));
894             assert(4_700 < count99, format("99: %s <= 4700.", count99));
895             assert(count99 < 5_300, format("99: %s >= 5300.", count99));
896         }
897 
898         /* Test that the save property works where input is a forward range,
899          * and Sample is using a (forward range) random number generator
900          * that is not rndGen.
901          */
902         static if (isForwardRange!UniformRNG)
903         {
904             auto s1 = sample(a, 5, rng);
905             auto s2 = s1.save;
906             assert(s1.array() == s2.array());
907         }
908 
909         // Bugzilla 8314
910         {
911             UniformRNG gen = new UniformRNG;
912 
913             auto sampleFirst(UniformRNG)(uint seed, UniformRNG gen)
914                 if (isUniformRNG!UniformRNG)
915             {
916                 gen.seed(seed);
917                 return sample(a, 1, gen).front;
918             }
919 
920             // Start from 1 because not all RNGs accept 0 as seed.
921             immutable fst = sampleFirst(1, gen);
922             uint n = 1;
923             while (sampleFirst(++n, gen) == fst && n < n.max) {}
924             assert(n < n.max);
925         }
926     }
927 }
928 
929 /**
930  * Shuffles elements of $(D r) using $(D gen) as a shuffler. $(D r) must be
931  * a random-access range with length.  If no RNG is specified, $(D rndGen)
932  * will be used.  Returns the newly-shuffled range, and so is composable:
933  *
934  * Example:
935  * --------
936  * // generates the array [0, 1, ..., 10],
937  * // shuffles it, and writes to console
938  * iota(10).array.shuffle.writeln;
939  * --------
940  *
941  * A $(D randomShuffle) alias has been provided to ease migration from code
942  * written using $(PHOBOSXREF random, randomShuffle).
943  */
944 auto shuffle(Range, UniformRNG)(Range r, UniformRNG gen)
945     if(isRandomAccessRange!Range && isUniformRNG!UniformRNG)
946 {
947     return partialShuffle!(Range, UniformRNG)(r, r.length, gen);
948 }
949 
950 /// ditto
951 auto shuffle(Range)(Range r)
952     if(isRandomAccessRange!Range)
953 {
954     return shuffle(r, rndGen);
955 }
956 
957 /// ditto
958 alias randomShuffle = shuffle;
959 
960 unittest
961 {
962     import std.algorithm;
963 
964     foreach(UniformRNG; UniformRNGTypes)
965     {
966         // Also tests partialShuffle indirectly.
967         auto a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
968         auto b = a.dup;
969         auto gen = new UniformRNG(unpredictableSeed);
970         shuffle(a, gen);
971         sort(a);
972         assert(a == b);
973         shuffle(a);
974         sort(a);
975         assert(a == b);
976     }
977 }
978 
979 /**
980  * Partially shuffles the elements of $(D r) such that upon returning $(D r[0..n])
981  * is a random subset of $(D r) and is randomly ordered.  $(D r[n..r.length])
982  * will contain the elements not in $(D r[0..n]).  These will be in an undefined
983  * order, but will not be random in the sense that their order after
984  * $(D partialShuffle) returns will not be independent of their order before
985  * $(D partialShuffle) was called.
986  *
987  * $(D r) must be a random-access range with length.  $(D n) must be less than
988  * or equal to $(D r.length).  If no RNG is specified, $(D rndGen) will be used.
989  */
990 auto partialShuffle(Range, UniformRNG)(Range r, in size_t n, UniformRNG gen)
991     if(isRandomAccessRange!Range && isUniformRNG!UniformRNG)
992 {
993     import std.algorithm, std.exception, hap.random.distribution;
994     enforce(n <= r.length, "n must be <= r.length for partialShuffle.");
995     foreach (i; 0 .. n)
996     {
997         swapAt(r, i, i + uniform(0, n - i, gen));
998     }
999     return r;
1000 }
1001 
1002 /// ditto
1003 auto partialShuffle(Range)(Range r, in size_t n)
1004     if(isRandomAccessRange!Range)
1005 {
1006     return partialShuffle(r, n, rndGen);
1007 }
1008 
1009 unittest
1010 {
1011     import std.algorithm;
1012 
1013     foreach(UniformRNG; UniformRNGTypes)
1014     {
1015         auto a = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9];
1016         auto b = a.dup;
1017         auto gen = new UniformRNG(unpredictableSeed);
1018         partialShuffle(a, 5, gen);
1019         assert(a[5 .. $] == b[5 .. $]);
1020         sort(a[0 .. 5]);
1021         assert(a[0 .. 5] == b[0 .. 5]);
1022         partialShuffle(a, 6);
1023         assert(a[6 .. $] == b[6 .. $]);
1024         sort(a[0 .. 6]);
1025         assert(a[0 .. 6] == b[0 .. 6]);
1026     }
1027 }