Developers new to SQL often expect that it should be easy to write a query to delete duplicate records, but that is not the case. Here is a stored procedure you can use to delete duplicates. There are two stored procedures here and you need both, both one just calls the other. After compiling the stored procedures, you can use the following to delete all the duplicate records from a table:
SPINTERNAL_DELETEDUPLICATES ‘mytable’
(
@table_name VARCHAR(32),
@Column_List VARCHAR(8000) OUT
)
AS
BEGIN
DECLARE @Column_ID INT,
@Column_Name VARCHAR(128)
SELECT @COlumn_List = ”
–partial query from http://vyaskn.tripod.com/code/generate_inserts.txt
SELECT @Column_ID = MIN(ORDINAL_POSITION)
FROM INFORMATION_SCHEMA.COLUMNS (NOLOCK)
WHERE TABLE_NAME = @table_name
–Loop through all the columns of the table, to get the column names and their data types
WHILE @Column_ID IS NOT NULL
BEGIN
SELECT @Column_Name = QUOTENAME(COLUMN_NAME)
FROM INFORMATION_SCHEMA.COLUMNS (NOLOCK)
WHERE ORDINAL_POSITION = @Column_ID AND
TABLE_NAME = @table_name
SET @Column_List = @Column_List + @Column_Name + ‘,’
SKIP_LOOP: –The label used in GOTO
SELECT @Column_ID = MIN(ORDINAL_POSITION)
FROM INFORMATION_SCHEMA.COLUMNS (NOLOCK)
WHERE TABLE_NAME = @table_name AND
ORDINAL_POSITION > @Column_ID
END
SET @Column_List = LEFT(@Column_List,LEN(@Column_List) – 1)
END
CREATE PROCEDURE SPINTERNAL_DELETEDUPLICATES
(
@table_name VARCHAR(32)
)
AS
BEGIN
–http://www.simple-talk.com/sql/t-sql-programming/removing-duplicates-from-a-table-in-sql-server/
DECLARE @columns VARCHAR(8000)
EXEC SPINTERNAL_LISTALLCOLUMNS @table_name, @columns OUT
DECLARE @SQL VARCHAR(8000)
SELECT @SQL = ‘with numbered as ( select ‘ + @columns + ‘, row_number() over
(partition by ‘ + @columns + ‘ order by ‘ + @columns + ‘) as nr
from ‘ + @table_name + ‘) DELETE FROM numbered where nr > 1’
EXECUTE( @SQL)
END