Question

There has to be a better way to do this.

I have a table that I flagged all the unique records like this:

    WITH
    CTE( TransId, OriginalName, StrippedName, RowNumber )
    AS
    (
        SELECT  TransID ,
                Name ,
                StrippedName,
                RN = ROW_NUMBER() OVER ( PARTITION BY StrippedName ORDER BY StrippedName )
        FROM    dbo.Members
    )

UPDATE  dbo.Members
SET ParenId = TransID
WHERE   TransID IN ( SELECT TransId FROM CTE WHERE RowNumber = 1 )

Now I would like to update all the duplicates records (where RowNumber > 1) to the unique ParentId. Right now I'm using a UDF, which works, but it takes a very long time. Here is the UDF:

CREATE FUNCTION dbo.getParentTransId ( @TransId INT, @strippedBusName VARCHAR(200) )
RETURNS INT
AS
BEGIN
    DECLARE @ParentTransId INT

    SELECT  @ParentTransId = TransId
    FROM    dbo.Members
    WHERE   StrippedBusName = @strippedBusName
      AND   ParenId IS NOT NULL

    IF  @ParentTransId IS NULL
    BEGIN
        SET @ParentTransId = @TransId
    END

    RETURN @ParentTransId
END
GO

And here is the update statement I'm currently using:

UPDATE  dt
SET dt.ParenId = dbo.getParentTransId ( dt.TransID, dt.StrippedBusName )
FROM    dbo.Members dt
GO

Is there a way to do the same update without using the UDF?

Was it helpful?

Solution 2

I guess this should work:

UPDATE  dbo.Members dt
SET dt.ParenId = COALESCE( (SELECT TrandsID FROM dbo.Members WHERE  dt.StrippedBusName   = StrippedBusName  AND   ParenId IS NOT NULL), dt.TransId )

and I am not sure, but the following way, should be a faster one:

UPDATE  dbo.Members dt
SET dt.ParenId = COALESCE(dt1.TransId, dt.TransId ) 
FROM  dbo.Members dt
LEFT JOIN dbo.Members dt1
    ON dt.StrippedBusName   = dt1.StrippedBusName
    AND  dt1.ParenId IS NOT NULL

OTHER TIPS

Here's a test script that shows @gotqn answer works:

-- create table
CREATE TABLE DupUpdateTest
(
    Id      INT NOT NULL PRIMARY KEY IDENTITY( 1, 1 ),
    TransId INT NOT NULL,
    OriginalName    VARCHAR(MAX),
    StrippedName    VARCHAR(MAX),
    ParentId    INT NULL
)
GO

-- insert data
INSERT  INTO dbo.DupUpdateTest VALUES (2, 'name 1', 'name1', NULL)
INSERT  INTO dbo.DupUpdateTest VALUES (3, 'name 1', 'name1', NULL)
INSERT  INTO dbo.DupUpdateTest VALUES (6, 'name 1', 'name1', NULL)
INSERT  INTO dbo.DupUpdateTest VALUES (8, 'name 2', 'name2', NULL)
INSERT  INTO dbo.DupUpdateTest VALUES (11, 'name 2', 'name2', NULL)
INSERT  INTO dbo.DupUpdateTest VALUES (15, 'name 55', 'name55', NULL)
INSERT  INTO dbo.DupUpdateTest VALUES (23, 'name 55', 'name55', NULL)
INSERT  INTO dbo.DupUpdateTest VALUES (99, 'name 100', 'name100', NULL)
INSERT  INTO dbo.DupUpdateTest VALUES (122, 'name 108', 'name108', NULL)

-- update parent id match
UPDATE dt
SET dt.ParentId = COALESCE(dt1.TransId, dt.TransId ) -- Returns the first nonnull expression among its arguments
FROM  dbo.DupUpdateTest dt
LEFT JOIN dbo.DupUpdateTest dt1
    ON dt.StrippedName   = dt1.StrippedName

-- view results
SELECT * FROM dbo.DupUpdateTest
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top