I have a version for you that will allow any number of factors and tree depth (with only slight modifications necessary to demo more). I don't know what the performance will be like, but it is potentially good if appropriate indexes are added.
First we load the raw data:
CREATE TABLE dbo.WekaTree (
ID int,
Ruleset varchar(70)
);
INSERT dbo.WekaTree (ID, Ruleset)
VALUES
(1, 'fac_a < 64'),
(2, '| fac_d < 71.5'),
(3, '| | fac_a < 49.5'),
(4, '| | | fac_d < 23.5 : 19.44 (13/43.71) [13/77.47]'),
(5, '| | | fac_d >= 23.5 : 24.25 (32/23.65) [16/49.15]'),
(6, '| | fac_a >= 49.5 : 30.8 (10/17.68) [5/22.44]'),
(7, '| fac_d >= 71.5 : 33.6 (25/53.05) [15/47.35]'),
(8, 'fac_a >= 64'),
(9, '| fac_d < 83.5'),
(10, '| | fac_a < 91'),
(11, '| | | fac_e < 93.5'),
(12, '| | | | fac_d < 45 : 31.9 (16/23.25) [3/64.14]'),
(13, '| | | | fac_d >= 45'),
(14, '| | | | | fac_e < 21.5 : 44.1 (5/16.58) [2/21.39]'),
(15, '| | | | | fac_e >= 21.5'),
(16, '| | | | | | fac_a < 77.5 : 33.45 (4/2.89) [1/0.03]'),
(17, '| | | | | | fac_a >= 77.5 : 39.46 (7/10.21) [1/11.69]'),
(18, '| | | fac_e >= 93.5 : 45.97 (2/8.03) [1/107.71]'),
(19, '| | fac_a >= 91 : 42.26 (9/9.57) [4/69.03]'),
(20, '| fac_d >= 83.5 : 47.1 (9/30.24) [6/40.15]')
;
Then we parse this into a RuleSets
table that encodes the tree in the form needed for a data-probing query:
WITH A AS (SELECT A = 1 UNION ALL SELECT 1),
B AS (SELECT A = 1 FROM A, A B),
C AS (SELECT A = 1 FROM B, B C),
N AS (SELECT Num = Row_Number() OVER (ORDER BY (SELECT 1)) FROM C, C D),
Data AS (
SELECT
ID,
Ruleset,
Depth = Len(Ruleset) - Len(Replace(Ruleset, '|', '')) + 1,
Data = Replace(Ruleset, '| ', '')
FROM
dbo.WekaTree
), Depths AS (
SELECT
D.ID,
D.Ruleset,
D.Depth,
F.Factor,
O.Operator,
V.Value,
V.Remainder
FROM
Data D
CROSS APPLY (
SELECT
Factor = Left(D.Data, CharIndex(' ', D.Data) - 1),
OperatorString = Substring(D.Data, CharIndex(' ', D.Data) + 1, 8000)
) F
CROSS APPLY (
SELECT
Operator = Left(F.OperatorString, CharIndex(' ', F.OperatorString) - 1),
ValueString = Substring(F.OperatorString, CharIndex(' ', F.OperatorString) + 1, 8000)
) O
CROSS APPLY (
SELECT
Value = Convert(decimal(10,2), Left(O.ValueString, CharIndex(' ', O.ValueString + ' ') - 1)),
Remainder = Substring(O.ValueString, CharIndex(' ', O.ValueString + ' ') + 3, 8000)
) V
)
SELECT
D.ID,
D.Remainder,
H.Factor,
H.Operator,
H.Value
INTO
dbo.Rulesets
FROM
Depths D
OUTER APPLY (
SELECT
X.Factor,
X.Operator,
Value = Min(X.Value * M.Multiplier) * M.Multiplier
FROM
N
CROSS APPLY (
SELECT TOP 1
*
FROM
Depths D2
WHERE
N.Num = D2.Depth
AND D.ID >= D2.ID
ORDER BY
D2.ID DESC
) X
CROSS APPLY (
SELECT 1 WHERE X.Operator = '<'
UNION ALL SELECT -1 WHERE X.Operator = '>='
) M (Multiplier)
WHERE
N.Num <= D.Depth
GROUP BY
X.Factor,
X.Operator,
M.Multiplier
) H
WHERE
D.Remainder <> ''
ORDER BY
D.ID,
H.Factor,
H.Operator
;
Here's what the resulting data looks like (only leaf node IDs are needed and present):
ID Remainder Factor Operator Value
---- --------------------------- ------ -------- ---------------------------------------
4 19.44 (13/43.71) [13/77.47] fac_a < 49.5
4 19.44 (13/43.71) [13/77.47] fac_d < 23.5
5 24.25 (32/23.65) [16/49.15] fac_a < 49.5
5 24.25 (32/23.65) [16/49.15] fac_d < 71.5
5 24.25 (32/23.65) [16/49.15] fac_d >= 23.5
6 30.8 (10/17.68) [5/22.44] fac_a < 64.0
6 30.8 (10/17.68) [5/22.44] fac_a >= 49.5
6 30.8 (10/17.68) [5/22.44] fac_d < 71.5
7 33.6 (25/53.05) [15/47.35] fac_a < 64.0
7 33.6 (25/53.05) [15/47.35] fac_d >= 71.5
12 31.9 (16/23.25) [3/64.14] fac_a < 91.0
12 31.9 (16/23.25) [3/64.14] fac_a >= 64.0
12 31.9 (16/23.25) [3/64.14] fac_d < 45.0
12 31.9 (16/23.25) [3/64.14] fac_e < 93.5
14 44.1 (5/16.58) [2/21.39] fac_a < 91.0
14 44.1 (5/16.58) [2/21.39] fac_a >= 64.0
14 44.1 (5/16.58) [2/21.39] fac_d < 83.5
14 44.1 (5/16.58) [2/21.39] fac_d >= 45.0
14 44.1 (5/16.58) [2/21.39] fac_e < 21.5
16 33.45 (4/2.89) [1/0.03] fac_a < 77.5
16 33.45 (4/2.89) [1/0.03] fac_a >= 64.0
16 33.45 (4/2.89) [1/0.03] fac_d < 83.5
16 33.45 (4/2.89) [1/0.03] fac_d >= 45.0
16 33.45 (4/2.89) [1/0.03] fac_e < 93.5
16 33.45 (4/2.89) [1/0.03] fac_e >= 21.5
17 39.46 (7/10.21) [1/11.69] fac_a < 91.0
17 39.46 (7/10.21) [1/11.69] fac_a >= 77.5
17 39.46 (7/10.21) [1/11.69] fac_d < 83.5
17 39.46 (7/10.21) [1/11.69] fac_d >= 45.0
17 39.46 (7/10.21) [1/11.69] fac_e < 93.5
17 39.46 (7/10.21) [1/11.69] fac_e >= 21.5
18 45.97 (2/8.03) [1/107.71] fac_a < 91.0
18 45.97 (2/8.03) [1/107.71] fac_a >= 64.0
18 45.97 (2/8.03) [1/107.71] fac_d < 83.5
18 45.97 (2/8.03) [1/107.71] fac_e >= 93.5
19 42.26 (9/9.57) [4/69.03] fac_a >= 91.0
19 42.26 (9/9.57) [4/69.03] fac_d < 83.5
20 47.1 (9/30.24) [6/40.15] fac_a >= 64.0
20 47.1 (9/30.24) [6/40.15] fac_d >= 83.5
And I've created some fake sample probe data. Note that here, the factors are in rows, not in columns. If you have fac_a
through fac_z
and then fac_aa
through fac_zz
, you're still in business.
WITH A AS (SELECT A = 1 UNION ALL SELECT 1),
B AS (SELECT A = 1 FROM A, A B),
C AS (SELECT A = 1 FROM B, B C),
N AS (SELECT Num = Row_Number() OVER (ORDER BY (SELECT 1)) - 1 FROM B, C, C D)
SELECT
N.Num,
F.Factor,
V.Value
INTO
dbo.LookupData
FROM
N
CROSS JOIN (VALUES
(1, 'fac_a'), (4, 'fac_b'), (16, 'fac_c'), (64, 'fac_d'), (256, 'fac_e')
) F (Mult, Factor)
INNER JOIN (VALUES
(0, 25), (1, 50), (2, 75), (3, 100)
) V (Pattern, Value)
ON (N.Num / F.Mult) % 4 = V.Pattern
WHERE
N.Num <= 1023
;
Example probe data:
Num Factor Value
------ ------ -----------
0 fac_a 25
0 fac_b 25
0 fac_c 25
0 fac_d 25
0 fac_e 25
1 fac_a 50
1 fac_b 25
1 fac_c 25
1 fac_d 25
1 fac_e 25
2 fac_a 75
2 fac_b 25
2 fac_c 25
2 fac_d 25
2 fac_e 25
...
1021 fac_a 50
1021 fac_b 100
1021 fac_c 100
1021 fac_d 100
1021 fac_e 100
1022 fac_a 75
1022 fac_b 100
1022 fac_c 100
1022 fac_d 100
1022 fac_e 100
1023 fac_a 100
1023 fac_b 100
1023 fac_c 100
1023 fac_d 100
1023 fac_e 100
Finally, here's the query that shows the innermost ID row from the Weka Tree that matches the conditions of the probe row. Please keep in mind that I have not created suitable indexes here, and you should do so. Using the values 25, 50, 75, and 100 for each of the factors, this creates every possibly combination:
WITH Matches AS (
SELECT
L.Num,
R.ID
FROM
dbo.LookupData L
INNER JOIN dbo.Rulesets R
ON L.Factor = R.Factor
GROUP BY
L.Num,
R.ID
HAVING
Min(CASE WHEN (
R.Operator = '<'
AND L.Value < R.Value
) OR (
R.Operator = '>='
AND L.Value >= R.Value
) THEN 1 ELSE 0 END) = 1
)
SELECT
L.*,
W.*
FROM
dbo.LookupData L
INNER JOIN Matches M
ON L.Num = M.Num
LEFT JOIN dbo.WekaTree W
ON M.ID = W.ID
ORDER BY
L.Num
;
Example results:
Num Factor Value ID Ruleset
--- ------ ----- -- -------------------------------------------------------
0 fac_a 25 5 | | | fac_d >= 23.5 : 24.25 (32/23.65) [16/49.15]
0 fac_b 25 5 | | | fac_d >= 23.5 : 24.25 (32/23.65) [16/49.15]
0 fac_c 25 5 | | | fac_d >= 23.5 : 24.25 (32/23.65) [16/49.15]
0 fac_d 25 5 | | | fac_d >= 23.5 : 24.25 (32/23.65) [16/49.15]
0 fac_e 25 5 | | | fac_d >= 23.5 : 24.25 (32/23.65) [16/49.15]
1 fac_a 50 6 | | fac_a >= 49.5 : 30.8 (10/17.68) [5/22.44]
1 fac_b 25 6 | | fac_a >= 49.5 : 30.8 (10/17.68) [5/22.44]
1 fac_c 25 6 | | fac_a >= 49.5 : 30.8 (10/17.68) [5/22.44]
1 fac_d 25 6 | | fac_a >= 49.5 : 30.8 (10/17.68) [5/22.44]
1 fac_e 25 6 | | fac_a >= 49.5 : 30.8 (10/17.68) [5/22.44]
2 fac_a 75 12 | | | | fac_d < 45 : 31.9 (16/23.25) [3/64.14]
2 fac_b 25 12 | | | | fac_d < 45 : 31.9 (16/23.25) [3/64.14]
2 fac_c 25 12 | | | | fac_d < 45 : 31.9 (16/23.25) [3/64.14]
2 fac_d 25 12 | | | | fac_d < 45 : 31.9 (16/23.25) [3/64.14]
2 fac_e 25 12 | | | | fac_d < 45 : 31.9 (16/23.25) [3/64.14]
Please feel free to ask any questions you like--I'd be happy to help you get this working in a test against your own data. I can't promise instant response but I generally do check for activity on SO daily so would at least be able to respond within a day or two in most cases.