Query to get duplicate records then update it by putting a sequence number
-
02-03-2021 - |
Question
I was trying to get all records with duplicate data and update it by putting a sequence number.
Let's say i have this data:
+-------------------------------------------+
| first_name | last_name | sequence_number |
+-------------------------------------------+
| John | Smith | NULL |
| John | Smith | NULL |
| Jane | Doe | NULL |
| Jane | Doe | NULL |
| Jane | Doe | NULL |
| Rock | Lee | NULL |
+-------------------------------------------+
and my expected output is like this:
+-------------------------------------------+
| first_name | last_name | sequence_number |
+-------------------------------------------+
| John | Smith | 1 |
| John | Smith | 2 |
| Jane | Doe | 1 |
| Jane | Doe | 2 |
| Jane | Doe | 3 |
| Rock | Lee | NULL |
+-------------------------------------------+
Solution
create table data
(
first_name varchar2(20 char),
last_name varchar2(20 char),
sequence_number number(10, 0)
);
insert into data values ('John', 'Smith', null);
insert into data values ('John', 'Smith', null);
insert into data values ('Jane', 'Doe', null);
insert into data values ('Jane', 'Doe', null);
insert into data values ('Jane', 'Doe', null);
insert into data values ('Rock', 'Lee', null);
commit;
select * from data;
FIRST_NAME LAST_NAME SEQUENCE_NUMBER
-------------------- -------------------- ---------------
John Smith
John Smith
Jane Doe
Jane Doe
Jane Doe
Rock Lee
--
select
first_name, last_name, rowid,
row_number() over (partition by first_name, last_name order by rowid) as sequence_number
from data
where (first_name, last_name) in
(select first_name, last_name from
data group by first_name, last_name having count(*) > 1
)
;
FIRST_NAME LAST_NAME ROWID SEQUENCE_NUMBER
-------------------- -------------------- ------------------ ---------------
Jane Doe AAAJ7hAAEAAAAFjAAC 1
Jane Doe AAAJ7hAAEAAAAFjAAD 2
Jane Doe AAAJ7hAAEAAAAFjAAE 3
John Smith AAAJ7hAAEAAAAFjAAA 1
John Smith AAAJ7hAAEAAAAFjAAB 2
--
merge into data using
(select
first_name, last_name, rowid,
row_number() over (partition by first_name, last_name order by rowid) as sequence_number
from data
where (first_name, last_name) in
(select first_name, last_name from
data group by first_name, last_name having count(*) > 1
)
) data_with_sequences
on (data.rowid = data_with_sequences.rowid)
when matched then update set data.sequence_number = data_with_sequences.sequence_number;
5 rows merged.
--
commit;
select * from data;
FIRST_NAME LAST_NAME SEQUENCE_NUMBER
-------------------- -------------------- ---------------
John Smith 1
John Smith 2
Jane Doe 1
Jane Doe 2
Jane Doe 3
Rock Lee
Licensed under: CC-BY-SA with attribution
Not affiliated with dba.stackexchange