When parent is assigned randomly from the previously inserted rows, there is no control over the tree height (number of levels) and the way levels populated, which may not be desired in some scenarios.
It may be more convenient to populate tree with a data level by level.
Auxiliary table valued function is taken to generate numbers sequence using Itzik's cross joined CTE method (see e.g. here about it)
create function ftItziksCJCTE
(
@cnt int
)
returns table as
return
(
WITH
E00(N) AS (SELECT 1 UNION ALL SELECT 1),
E02(N) AS (SELECT 1 FROM E00 a, E00 b),
E04(N) AS (SELECT 1 FROM E02 a, E02 b),
E08(N) AS (SELECT 1 FROM E04 a, E04 b),
E16(N) AS (SELECT 1 FROM E08 a, E08 b),
E32(N) AS (SELECT 1 FROM E16 a, E16 b),
E(N) AS (SELECT ROW_NUMBER() OVER (ORDER BY N) FROM E32)
select N from E where N <= @cnt
)
Simple table to control elements distribution in the tree:
create table #TreeLevels
(
LevelNo int identity(1, 1) not NULL,
MinElements int not NULL,
MaxElements int not NULL,
primary key clustered (LevelNo)
)
Sample distribution:
insert into #TreeLevels values (7, 10)
insert into #TreeLevels values (70, 100)
insert into #TreeLevels values (700, 1000)
Will give us something like 7 to 10 elements with ParentID = NULL, each of them having something like 70 to 100 elements, etc. With total number of elements 343000 to 1000000
Or other distribution:
insert into #TreeLevels values (1, 1)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
insert into #TreeLevels values (9, 15)
insert into #TreeLevels values (10, 12)
Meaning there will be single root element with something between 9 and 15 child elements, each of them having something like 10 to 12 elements, etc.
Then tree can be populated level by level:
declare @levelNo int, @eMin int, @eMax int
create table #Inserted (ID int not NULL, primary key nonclustered (ID))
create table #Inserted2 (ID int not NULL, primary key nonclustered (ID))
set @levelNo = 1
while 1=1
begin
select @eMin = MinElements, @eMax = MaxElements from #TreeLevels where LevelNo = @levelNo
if @@ROWCOUNT = 0
break
if @levelNo = 1
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select NULL from ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0))
end
else
begin
if exists (select 1 from #Inserted)
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
#Inserted I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
truncate table #Inserted
end
else
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
#Inserted2 I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
truncate table #Inserted2
end
end
set @levelNo = @levelNo + 1
end
However, there is no control on the exact number of elements the tree will contain and leaf nodes are on the last level only. It would be good to have additional parameter controlling level population (percent of nodes on the same level which will have children).
create table #TreeLevels
(
LevelNo int identity(1, 1) not NULL,
MinElements int not NULL,
MaxElements int not NULL,
PopulatedPct float NULL,
primary key clustered (LevelNo)
)
Sample distribution:
insert into #TreeLevels values (1, 1, NULL)
insert into #TreeLevels values (9, 15, NULL)
insert into #TreeLevels values (10, 12, NULL)
insert into #TreeLevels values (9, 15, 80)
insert into #TreeLevels values (10, 12, 65)
insert into #TreeLevels values (9, 15, 35)
insert into #TreeLevels values (10, 12, NULL)
NULL for a PopulatedPct percent is treated as 100%. PopulatedPct controls next level population and should be taken from previous level during cycle. Also it has no meaning for the last row in the #TreeLevels hence.
Now we can cycle trough levels taking PopulatedPct into account.
declare @levelNo int, @eMin int, @eMax int
create table #Inserted (ID int not NULL, primary key nonclustered (ID))
create table #Inserted2 (ID int not NULL, primary key nonclustered (ID))
set @levelNo = 1
while 1=1
begin
select @eMin = MinElements, @eMax = MaxElements from #TreeLevels where LevelNo = @levelNo
if @@ROWCOUNT = 0
break
if @levelNo = 1
begin
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select NULL from ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0))
end
else
begin
declare @pct float
select @pct = PopulatedPct from #TreeLevels where LevelNo = @levelNo - 1
if exists (select 1 from #Inserted)
begin
if (@pct is NULL)
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
#Inserted I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
else
insert into TestTree (ParentID)
output inserted.ID into #Inserted2 (ID)
select
I.ID
from
(select top (@pct) PERCENT ID from #Inserted order by rand(checksum(newid()))) I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
truncate table #Inserted
end
else
begin
if (@pct is NULL)
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
#Inserted2 I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
else
insert into TestTree (ParentID)
output inserted.ID into #Inserted (ID)
select
I.ID
from
(select top (@pct) PERCENT ID from #Inserted2 order by rand(checksum(newid()))) I
cross apply ftItziksCJCTE(round(rand(checksum(newid())) * (@eMax - @eMin) + @eMin, 0)) F
truncate table #Inserted2
end
end
set @levelNo = @levelNo + 1
end
Still there is no control over the exact number of elements, but better control over the tree shape is gained.