Creating a Uniform, Normal and Benford Law’s Distribution from Random Numbers in SQL Server 0

Creating test data we often utilise random numbers, within SQL Server we can use the RAND() function or NEWID().

This quick post shows you how to create three different distributions based on the set {1..9} – Uniform (evenly distributed), Normal (distributed about the mean) and Benford – distribution follows Benfords Law of Log10( 1 + ( 1 / digit ) ).

Using RAND()

ProbDistributionUniNorBenRAND

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
--
--	Create a table of digits 1 - 9
--
declare @digits table (
	numb			tinyint not null unique,
	occurrance_uniform	int not null default( 0 ),
	occurrance_normal	int not null default( 0 ),
	occurrance_benford	int not null default( 0 )
	);
 
insert @digits ( numb )
	select top 9 row_number() over( order by object_id ) as numb
	from sys.objects;
 
--
--	Create distribution from 100,000 iterations
--
 
set nocount on;
 
declare @i int = 0;
 
while @i <= 100000
begin
    --
    --	Uniform distribution
    --
    update @digits	
    	set occurrance_uniform = occurrance_uniform + 1
    where numb = cast( 10 * rand() as tinyint )
 
    --
    --	Normal distribution around the mean
    --	(created from the average of 4 independant variables)
    --
    update @digits	
    	set occurrance_normal = occurrance_normal + 1
    where numb = ( cast( ceiling( 10 * rand() ) as tinyint )
    	    + cast( ceiling( 10 * rand() ) as tinyint ) 
    	    + cast( ceiling( 10 * rand() ) as tinyint ) 
    	    + cast( ceiling( 10 * rand() ) as tinyint ) 
    		 ) / 4
    set @i = @i + 1
 
end
 
--	
--	Results, including create 
--
select *, sum( occurrance_benford ) over() 
from (
	select  numb, 
		occurrance_uniform, 
		cast( occurrance_uniform as decimal( 15, 3 ) ) 
                            / sum( occurrance_uniform ) over() as p_uniform,
		occurrance_normal, 
		cast( occurrance_normal as decimal( 15, 3 ) ) 
                            / sum( occurrance_normal ) over() as p_normal,
		--	Create distribution according to Benfords Law
		ceiling( sum( occurrance_normal ) over() 
                            * log10( 1. + ( 1./numb) ) ) as occurrance_benford,
		log10( 1. + ( 1./numb) ) as p_benford
	from @digits
	) as d
order by numb

Using NEWID()

ProbDistributionUniNorBenNEWID

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
--
--	Create a table of digits 1 - 9
--
declare @digits table (
	numb			tinyint not null unique,
	occurrance_uniform	int not null default( 0 ),
	occurrance_normal	int not null default( 0 ),
	occurrance_benford	int not null default( 0 )
	);
 
insert @digits ( numb )
	select top 9 row_number() over( order by object_id ) as numb
	from sys.objects;
 
--
--	Create distribution from 100,000 iterations
--
 
set nocount on;
 
declare @i int = 0;
 
while @i <= 100000
begin
    --
    --	Uniform distribution
    --
    update @digits	
    	set occurrance_uniform = occurrance_uniform + 1
    where numb = ( abs( cast( cast( newid() as binary(16) ) as bigint ) ) % 9 ) + 1
 
    --
    --	Normal distribution around the mean
    --	(created from the average of 4 independant variables)
    --
    update @digits	
    	set occurrance_normal = occurrance_normal + 1
    where numb = ( ( abs( cast( cast( newid() as binary(16) ) as bigint ) ) % 9 ) + 1
    	   + ( abs( cast( cast( newid() as binary(16) ) as bigint ) ) % 9 ) + 1
    	   + ( abs( cast( cast( newid() as binary(16) ) as bigint ) ) % 9 ) + 1
    	   + ( abs( cast( cast( newid() as binary(16) ) as bigint ) ) % 9 ) + 1
    		 ) / 4
    set @i = @i + 1
 
end
 
--	
--	Results, including create 
--
select *, sum( occurrance_benford ) over() 
from (
	select  numb, 
		occurrance_uniform, 
		cast( occurrance_uniform as decimal( 15, 3 ) ) 
                            / sum( occurrance_uniform ) over() as p_uniform,
		occurrance_normal, 
		cast( occurrance_normal as decimal( 15, 3 ) ) 
                            / sum( occurrance_normal ) over() as p_normal,
		--	Create distribution according to Benfords Law
		ceiling( sum( occurrance_normal ) over() 
                            * log10( 1. + ( 1./numb) ) ) as occurrance_benford,
		log10( 1. + ( 1./numb) ) as p_benford
	from @digits
	) as d
order by numb