-- -- RANDOM -- Test random() and allies -- -- Tests in this file may have a small probability of failure, -- since we are dealing with randomness. Try to keep the failure -- risk for any one test case under 1e-9. -- -- There should be no duplicates in 1000 random() values. -- (Assuming 52 random bits in the float8 results, we could -- take as many as 3000 values and still have less than 1e-9 chance -- of failure, per https://en.wikipedia.org/wiki/Birthday_problem) SELECT r, count(*) FROM (SELECT random() r FROM generate_series(1, 1000)) ss GROUP BY r HAVING count(*) > 1; r | count ---+------- (0 rows) -- The range should be [0, 1). We can expect that at least one out of 2000 -- random values is in the lowest or highest 1% of the range with failure -- probability less than about 1e-9. SELECT count(*) FILTER (WHERE r < 0 OR r >= 1) AS out_of_range, (count(*) FILTER (WHERE r < 0.01)) > 0 AS has_small, (count(*) FILTER (WHERE r > 0.99)) > 0 AS has_large FROM (SELECT random() r FROM generate_series(1, 2000)) ss; out_of_range | has_small | has_large --------------+-----------+----------- 0 | t | t (1 row) -- Check for uniform distribution using the Kolmogorov-Smirnov test. CREATE FUNCTION ks_test_uniform_random() RETURNS boolean AS $$ DECLARE n int := 1000; -- Number of samples c float8 := 1.94947; -- Critical value for 99.9% confidence ok boolean; BEGIN ok := ( WITH samples AS ( SELECT random() r FROM generate_series(1, n) ORDER BY 1 ), indexed_samples AS ( SELECT (row_number() OVER())-1.0 i, r FROM samples ) SELECT max(abs(i/n-r)) < c / sqrt(n) FROM indexed_samples ); RETURN ok; END $$ LANGUAGE plpgsql; -- As written, ks_test_uniform_random() returns true about 99.9% -- of the time. To get down to a roughly 1e-9 test failure rate, -- just run it 3 times and accept if any one of them passes. SELECT ks_test_uniform_random() OR ks_test_uniform_random() OR ks_test_uniform_random() AS uniform; uniform --------- t (1 row) -- now test random_normal() -- As above, there should be no duplicates in 1000 random_normal() values. SELECT r, count(*) FROM (SELECT random_normal() r FROM generate_series(1, 1000)) ss GROUP BY r HAVING count(*) > 1; r | count ---+------- (0 rows) -- ... unless we force the range (standard deviation) to zero. -- This is a good place to check that the mean input does something, too. SELECT r, count(*) FROM (SELECT random_normal(10, 0) r FROM generate_series(1, 100)) ss GROUP BY r; r | count ----+------- 10 | 100 (1 row) SELECT r, count(*) FROM (SELECT random_normal(-10, 0) r FROM generate_series(1, 100)) ss GROUP BY r; r | count -----+------- -10 | 100 (1 row) -- Check standard normal distribution using the Kolmogorov-Smirnov test. CREATE FUNCTION ks_test_normal_random() RETURNS boolean AS $$ DECLARE n int := 1000; -- Number of samples c float8 := 1.94947; -- Critical value for 99.9% confidence ok boolean; BEGIN ok := ( WITH samples AS ( SELECT random_normal() r FROM generate_series(1, n) ORDER BY 1 ), indexed_samples AS ( SELECT (row_number() OVER())-1.0 i, r FROM samples ) SELECT max(abs((1+erf(r/sqrt(2)))/2 - i/n)) < c / sqrt(n) FROM indexed_samples ); RETURN ok; END $$ LANGUAGE plpgsql; -- As above, ks_test_normal_random() returns true about 99.9% -- of the time, so try it 3 times and accept if any test passes. SELECT ks_test_normal_random() OR ks_test_normal_random() OR ks_test_normal_random() AS standard_normal; standard_normal ----------------- t (1 row) -- setseed() should produce a reproducible series of random() values. SELECT setseed(0.5); setseed --------- (1 row) SELECT random() FROM generate_series(1, 10); random --------------------- 0.9851677175347999 0.825301858027981 0.12974610012450416 0.16356291958601088 0.6476186144084 0.8822771983038762 0.1404566845227775 0.15619865764623442 0.5145227426983392 0.7712969548127826 (10 rows) -- Likewise for random_normal(); however, since its implementation relies -- on libm functions that have different roundoff behaviors on different -- machines, we have to round off the results a bit to get consistent output. SET extra_float_digits = -1; SELECT random_normal() FROM generate_series(1, 10); random_normal ------------------- 0.20853464493838 0.26453024054096 -0.60675246790043 0.82579942785265 1.7011161173536 -0.22344546371619 0.249712419191 -1.2494722990669 0.12562715204368 0.47539161454401 (10 rows) SELECT random_normal(mean => 1, stddev => 0.1) r FROM generate_series(1, 10); r ------------------ 1.0060597281173 1.09685453015 1.0286920613201 0.90947567671234 0.98372476313426 0.93934454957762 1.1871350020636 0.96225768429293 0.91444120680041 0.96403105557543 (10 rows)