2

I apologize if already exist a similar question, but if yes, I not found.

I'm new to programming in VBA and still do not know much of it, now I'm trying to run a function that will verify if in a column "B" are repeated velores and if exist will check in a column "C" where the highest value, copying the lowest to another table and deleting it.

The code already does all this however need to run in tables of 65 000 lines and it takes a long time, never got for running these tables, because even when I run in tables with 5000 or 10000 lines takes approximately 6 to 15 minutes.

My question is if there is any way to optimize the cycle that I'm using, it will be better to use a For Each or maintain the Do While Loop?

Here is the code I am using:

Function Copy()

    Worksheets("Sheet1").Range("A1:AQ1").Copy _
    Destination:=Worksheets("Sheet2").Range("A1")

    Dim lRow As Long
    Dim lRow2 As Long
    Dim Row As Long
    Dim countA As Long
    Dim countB As Long
    Dim t As Double

    lRow = 5000
    Row = 2
    countA = 0
    countB = 0

    Application.ScreenUpdating = False 
    ViewMode = ActiveWindow.View
    ActiveWindow.View = xlNormalView
    Application.EnableEvents = False
    Application.DisplayStatusBar = False

    ActiveSheet.DisplayPageBreaks = False
    lRow2 = lRow - 1
    t = Timer

     Do While lRow > 2


            If (Cells.Item(lRow, "B") <> Cells.Item(lRow2, "B")) Then 

                lRow = lRow - 1
                lRow2 = lRow - 1

            Else

                If (Cells.Item(lRow, "C") > Cells.Item(lRow2, "C")) Then 

                    Sheets("Sheet1").Rows(lRow2).Copy Sheets("Sheet2").Rows(Row)
                    Rows(lRow2).Delete 
                    lRow = lRow - 1
                    Row = Row + 1
                    countA = countA + 1


                Else

                    Sheets("Sheet1").Rows(lRow).Copy Sheets("Sheet2").Rows(Row)
                    Rows(lRow).Delete 
                    lRow = lRow - 1
                    Row = Row + 1
                    countB = countB + 1

                End If

                lRow2 = lRow2 - 1

           End If

    Loop

    Application.DisplayStatusBar = True
    ActiveWindow.View = ViewMode
    Application.ScreenUpdating = False 
    MsgBox "A = " & countA & " B = " & countB & "Time (minutes): " & (Timer - t) / 60

End Function
4
  • Are the tables a maximum of 65,000 rows or could they be larger than 65,536 rows? Commented Oct 27, 2015 at 18:15
  • It should be mentioned that the above routine was designed to work on sorted data. Commented Oct 27, 2015 at 21:27
  • The tables could be larger than 65,536 rows. Commented Oct 28, 2015 at 9:03
  • I've tested my solution below on 75K rows. Commented Oct 28, 2015 at 9:11

3 Answers 3

1

As long as you've entered the VBA environment for a solution, there seems little point in not continuing that avenue toward the best route possible. The following uses a pair of Scripting.Dictionaries to build two sets of data from the original matrix in Sheet1. In addition to the main sub procedure, there are two short 'helper' functions that breach the 65536 barrier that Application.Index and Application.Transpose suffer from. These are necessary to peel out a row from a large two-dimensioned array and flip the orientation of the results while simultaneously splitting the stored records.

Sub Keep_Highest_BC()
    Dim d As Long, dHIGHs As Object, dDUPEs As Object
    Dim v As Long, vTMPs() As Variant, iCOLs As Long

    Debug.Print Timer
    'On Error GoTo bm_Safe_Exit
    Set dHIGHs = CreateObject("Scripting.Dictionary")
    Set dDUPEs = CreateObject("Scripting.Dictionary")

    With Worksheets("Sheet1")
        iCOLs = .Columns("AQ").Column
        .Cells(1, 1).Resize(2, iCOLs).Copy _
          Destination:=Worksheets("Sheet2").Cells(1, 1)
        With .Cells(2, 1).Resize(.Cells(Rows.Count, 2).End(xlUp).Row - 1, iCOLs)
            vTMPs = .Value2
        End With
    End With

    For v = LBound(vTMPs, 1) To UBound(vTMPs, 1)
        If dHIGHs.exists(vTMPs(v, 2)) Then
            If CDbl(Split(dHIGHs.Item(vTMPs(v, 2)), ChrW(8203))(2)) < vTMPs(v, 3) Then
                dDUPEs.Add Key:=vTMPs(v, 2) & v, Item:=dHIGHs.Item(vTMPs(v, 2))
                dHIGHs.Item(vTMPs(v, 2)) = joinAtoAQ(vTMPs, v)
            Else
                dDUPEs.Add Key:=vTMPs(v, 2) & v, Item:=joinAtoAQ(vTMPs, v)
            End If
        Else
            dHIGHs.Add Key:=vTMPs(v, 2), Item:=joinAtoAQ(vTMPs, v)
        End If
    Next v

    With Worksheets("Sheet1")
        With .Cells(2, 1).Resize(.Cells(Rows.Count, 2).End(xlUp).Row - 1, iCOLs)
            .ClearContents
            With .Resize(dHIGHs.Count, iCOLs)
                .Value = transposeSplitLargeItemArray(dHIGHs.items)
            End With
        End With
    End With

    With Worksheets("Sheet2")
        With .Cells(1, 1).CurrentRegion.Offset(1, 0)
            .ClearContents
            With .Resize(dDUPEs.Count, iCOLs)
                .Value = transposeSplitLargeItemArray(dDUPEs.items)
                .Rows(1).Copy
                .PasteSpecial Paste:=xlPasteFormats
                Application.CutCopyMode = False
            End With
        End With
    End With

bm_Safe_Exit:
    dHIGHs.RemoveAll: Set dHIGHs = Nothing
    dDUPEs.RemoveAll: Set dDUPEs = Nothing

    Debug.Print Timer
End Sub

Function joinAtoAQ(vTMP As Variant, ndx As Long)
    Dim sTMP As String, v As Long

    For v = LBound(vTMP, 2) To UBound(vTMP, 2)
        sTMP = sTMP & vTMP(ndx, v) & ChrW(8203)
    Next v
    joinAtoAQ = Left$(sTMP, Len(sTMP) - 1)
End Function

Function transposeSplitLargeItemArray(vITMs As Variant)
    Dim v As Long, w As Long, vTMPs As Variant, vITM As Variant

    ReDim vTMPs(LBound(vITMs) To UBound(vITMs), LBound(vITMs) To UBound(Split(vITMs(LBound(vITMs)), ChrW(8203))))
    For v = LBound(vITMs) To UBound(vITMs)
        vITM = Split(vITMs(v), ChrW(8203))
        For w = LBound(vITM) To UBound(vITM)
            vTMPs(v, w) = vITM(w)
        Next w
    Next v

    transposeSplitLargeItemArray = vTMPs
End Function

Once the two dictionaries have been filled with maximum values and duplicate lesser values, the arrays are returned to the two worksheets en masse and subsequently split back into the 43 columns. One final effort is made to restore the original formatting from Sheet1 into Sheet2's data area.

I tested this on 75,000 rows of columns A through column AQ containing random sample data first with predominantly duplicate values in column B and then with roughly half duplicate values in column B. The first single pass was processed in 13.19 seconds; the second in 14.22. While your own results will depend on the machine you are running it on, I would expect a significant improvement over your original code. Post your own timed results (start and stop in seconds within the VBE's Immediate window, Ctrl+G) into the comments if you can.

Sign up to request clarification or add additional context in comments.

8 Comments

btw, the data does not have to be sorted to use the above sub procedure.
Thanks for the help. I tested twice and the best time I take was 11 second. I just had a problem with it, which was unformat me two whole columns and I tested before in a table of 10,000 lines and would not allow run.
a) the values being transferred are raw .Value2 values. The formatting is retrieved from the first line of Sheet1. b) I've tested it on a number of row samples. If you would like me to check the 10K data that does not run, you will have to post a redacted data sample somewhere that I can download.
I don't know what happen early but it already work with the 10k rows. But the unformatted problem keeps. Well i cant say is unformatted because the cells are formatted but the data in that columns are changed, I have dates and time in that columns in (for example: 29-09-2015 13:04:34) and in the end of the program run they stay like this in the table ###########, and like this in the formula bar 42276544837963. It could be a problem on my table or is something in the code that need be change for could support date information ??
What happens if you apply a number format like dd-mmm-yyyy hh:mm:ss to that column? What happens if you change .Value2 to .Value in the code?
|
1

Everything i could think of has already been mentioned above, however this code snippet might help someone out, it's the least you could do to make a macro faster (in case no interaction is required during runtime of the macro)

Run Optimize(True) at the start of your code, Optimize(False) at the end.

'Toggles unnecessary excel features
Sub Optimize(start As Boolean) 
    On Error Resume Next
    With Application
        .ScreenUpdating = Not (start)
        .DisplayStatusBar = Not (start)
        .EnableEvents = Not (start)
        If start Then
            .Calculation = xlCalculationManual
        Else
            .Calculation = xlCalculationAutomatic
        End If
    End With
    On Error GoTo 0
End Sub

Comments

0

Typically it's faster to perform a single delete at the end of the loop.

Untested:

Function Copy()

    Dim shtSrc As Worksheet, shtDest As Worksheet
    Dim lRow As Long, Row As Long, viewmode
    Dim countA As Long, countB As Long
    Dim t As Double, rw As Range, rngDel As Range

    lRow = 5000
    Row = 2
    countA = 0
    countB = 0

    Set shtSrc = Worksheets("Sheet1")
    Set shtDest = Worksheets("Sheet2")

    shtSrc.Range("A1:AQ1").Copy Destination:=shtDest.Range("A1")

    Application.ScreenUpdating = False
    viewmode = ActiveWindow.View
    ActiveWindow.View = xlNormalView
    Application.EnableEvents = False
    Application.DisplayStatusBar = False

    ActiveSheet.DisplayPageBreaks = False

    t = Timer

     Do While lRow > 2

            Set rw = shtSrc.Rows(lRow)

            If (rw.Cells(2) = rw.Cells(2).Offset(-1, 0)) Then

                If (rw.Cells(3) > rw.Cells(3).Offset(-1, 0)) Then
                    rw.Offset(-1, 0).Copy shtDest.Rows(Row)
                    AddToRange rngDel, rw.Offset(-1, 0)
                    countA = countA + 1
                Else
                    rw.Copy shtDest.Rows(Row)
                    AddToRange rngDel, rw
                    countB = countB + 1
                End If

                Row = Row + 1

           End If

           lRow = lRow - 1

    Loop

    'anything to delete?
    If Not rngDel Is Nothing Then
        rngDel.Delete
    End If

    Application.DisplayStatusBar = True
    ActiveWindow.View = viewmode
    Application.ScreenUpdating = False
    MsgBox "A = " & countA & " B = " & countB & "Time (minutes): " & (Timer - t) / 60

End Function

'utility sub for building up a range
Sub AddToRange(rngTot, rng)
    If rngTot Is Nothing Then
        Set rngTot = rng
    Else
        Set rngTot = Application.Union(rng, rngTot)
    End If
End Sub

1 Comment

I tried running the code, but when it comes the part of deleting, he gaves a error: "Run-time error 1004 " "Application-defined or object-defined error"

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.