Delete duplicate records

Delete Duplicate Records using SQL

By duplicate record I mean that every field in one record is identical to every field in a different record, i.e. a duplicate is where there is no way of telling two or more records apart. If you just need to remove records which are similar (i.e. one or more fields are identical but there are one or more fields which are different) then instead refer to how to delete similar records.

To check that you have duplicate records in your table do the following:

select count(*) from MyTable

and

select distinct * from MyTable

unfortunately SQL does not permit a select count(distinct). You have duplicate records if the number of records returned by the second query is less than the number of records returned by the first.

Unfortunately there is no way in SQL to delete one of these duplicates without deleting all of them. They are identical after all, so there is no SQL query that you could put together which could distinguish between them.

What you can do is to copy all the distinct records into a new table:

select distinct *  into NewTable  from MyTable

This query will create a new table (NewTable in my example) containing all the records in the original table but without any records being duplicated. It will therefore preserve a single copy of those records which were duplicated.

Because this query creates (and populates) a new table, it will fail if the table already exists.

Delete Similar Records

To delete similar records, i.e. where the records are not the same but one field is the same and only one copy needs to be preserved, try the following SQL:

delete T1  from MyTable T1, MyTable T2  where T1.dupField = T2.dupField  and T1.uniqueField > T2.uniqueField

This will delete all records from the table MyTable which have the same value for the field dupField, leaving that record which has the lowest value in uniqueField.

The above SQL works on SQL-Server and MySQL but not in MS-Access. For MS-Access try:

delete from MyTable 
where uniqueField not in  (select min(uniqueField) from MyTable T2
 where T2.dupField=MyTable.dupField)

This syntax also works in SQLServer (but not in MySQL) but may not be as efficient for large tables.

using System;
using System.Data;
using System.Configuration;
using System.Collections;
using System.Web;
using System.Web.Security;
using System.Web.UI;
using System.Web.UI.WebControls;
using System.Web.UI.WebControls.WebParts;
using System.Web.UI.HtmlControls;
using App.Common;

public partial class modules_Kit_paging : System.Web.UI.UserControl
{
private int totalCount = 0, pageSize = 15, pageIndex = 1, pageCount = 0, range = 7;
private string applicationKey = null;

    /// <summary>
    /// 应用程序标识符
    /// </summary>
    public string ApplicationKey
    {
        set { applicationKey = value; }
    }

    /// <summary>
    /// 设置记录总计数
    /// </summary>
    public int TotalCount
    {
        set { totalCount = value; }
    }

    /// <summary>
    /// 设置当前页记录条数
    /// </summary>
    public int PageCount
    {
        set { pageCount = value; }
    }

    /// <summary>
    /// 设置页显示数
    /// </summary>
    public int PageSize
    {
        get { return pageSize; }
        set { pageSize = value; }
    }

    /// <summary>
    /// 获取和设置当前页索引数
    /// </summary>
    //public int PageIndex
    //{
    //    get {
    //        if (ViewState["pageIndex"] != null)
    //            pageIndex = (int)ViewState["pageIndex"];
    //        else
    //            pageIndex = 0;

    //        return pageIndex;
    //    }
    //    set
    //    {
    //        pageIndex = value;
    //        Toolkit.PageIndex=value;
    //        ViewState["pageIndex"] = value;
    //    }
    //}

    /// <summary>
    /// 设置页显示序列范围
    /// </summary>
    public int Range
    {
        set { range = value; }
    }

    protected void Page_Load(object sender, EventArgs e)
    {
        if (IsPostBack)
        {
            if (int.TryParse(tbGoto.Text, out pageIndex))
            {

                CacheKit.SetPageIndex(applicationKey, pageIndex);
                Response.Redirect(HttpContext.Current.Request.RawUrl);
            }
        }

        lbTotalRs.Text = totalCount.ToString();
        pageIndex = CacheKit.PageIndex(applicationKey);
        if (totalCount > 0) Bind();

}

    private void Bind()
    {
        //设置记录开始数
        int startRec = pageSize * (pageIndex - 1) + 1;

//'设置记录结尾数
int stopRec = pageSize + startRec - 1;

bool isPrev = false, isNext = false;

        //本页记录数
        lbStartRec.Text = startRec.ToString();
        lbStopRec.Text = stopRec.ToString();

        int arg = 1;
        //设置向前翻页
        if (pageIndex > 0)
        {
            isPrev = true;
            arg = pageIndex;
            lkbtnPrev.CommandArgument = arg.ToString();
        }
        else
        {
            isPrev = false;
        }

lkbtnPrev.Enabled = isPrev;

//设置后翻页

        if (totalCount > stopRec)
        {
            isNext = true;
            arg = pageIndex;
            lkbtnNext.CommandArgument = arg.ToString();
        }
        else
        {
            isNext = false;
        }

lkbtnNext.Enabled = isNext;

//当前页

arg = pageIndex;
lbPageIndex.Text = arg.ToString();

ArrayList al = new ArrayList();

int dx1 = ((startRec) / (pageSize * range)) * pageSize * range + 1;
int dx2 = 0;

        if ((dx1 + pageSize * range - 1) > totalCount)
            dx2 = (totalCount / pageSize) * pageSize + 1;
        else
            dx2 = dx1 + pageSize * range - 1;

int x = 1, y = 1;

        while (x < totalCount + 1)
        {
            if (x >= dx1 && x < dx2)
            {
                al.Add(y);
                x = x + pageSize;
                y = y + 1;

            }
            else
            {
                al.Add(y);

                x = x + range * pageSize;
                y = y + range;
            }
        }

dlPagingNumber.DataSource = al;
dlPagingNumber.DataBind();

}

    protected void lkbtnPrev_Click(object sender, EventArgs e)
    {
        if (int.TryParse(lkbtnPrev.CommandArgument, out pageIndex))
        {
            CacheKit.SetPageIndex(applicationKey, pageIndex);
            Response.Redirect(HttpContext.Current.Request.RawUrl);
        }
    }
    protected void lkbtnNext_Click(object sender, EventArgs e)
    {
        if (int.TryParse(lkbtnNext.CommandArgument, out pageIndex))
        {
            CacheKit.SetPageIndex(applicationKey, pageIndex);
            Response.Redirect(HttpContext.Current.Request.RawUrl);
        }
    }

    protected void lbPagingNumber_Click(object sender, EventArgs e)
    {
        LinkButton lb = (LinkButton)sender;
        if (int.TryParse(lb.CommandArgument, out pageIndex))
        {
            CacheKit.SetPageIndex(applicationKey, pageIndex );
            Response.Redirect(HttpContext.Current.Request.RawUrl);
        }
    }
}

(责任编辑：IT)

搜索

热门标签:

Delete duplicate records

Delete Duplicate Records using SQL

Delete Similar Records