2

I have used code from this website to pull data from site:

Option Explicit
Public Sub GetInfo()
    Dim sResponse As String, i As Long, Html As New HTMLDocument
    Dim prices As Object, info As Object
    Application.ScreenUpdating = False
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://bazashifer.ru/proflist-profnastil", False
        .send
        sResponse = .responseText
    End With
    With Html
        .body.innerHTML = sResponse
        Set info = .querySelectorAll("div.views-field.views-field-title")
        Set prices = .querySelectorAll("div.views-field.views-field-field-cena-tovara")
    End With
    With Worksheets(2)
        For i = 0 To info.Length - 1
            .Cells(i + 1, 1) = info(i).innerText
            .Cells(i + 1, 2) = prices(i).innerText
        Next i
    End With
    Application.ScreenUpdating = True
End Sub

The code above works just as intended. I implemented code to take multiply links ( link 1, link 2, link 3 ) :

Option Explicit
Public Sub GetInfoAll()
Dim wsSheet As Worksheet, Rows As Long, http As New XMLHTTP60, Html As New HTMLDocument, links As Variant, link As Variant
Dim prices As Object, info As Object, i As Long, sResponse As String
Set wsSheet = Sheets(1)
Rows = wsSheet.Cells(wsSheet.Rows.Count, "A").End(xlUp).row
links = wsSheet.Range("A1:A" & Rows)
    With http
    For Each link In links
        .Open "GET", link, False
        .send
         sResponse = .responseText
        With Html
        .body.innerHTML = sResponse
        Set info = .querySelectorAll("div.views-field.views-field-title")
        Set prices = .querySelectorAll("div.views-field.views-field-field-cena-tovara")
    End With
    With Worksheets(2)
        For i = 0 To info.Length - 1
            .Cells(i + 1, 1) = info(i).innerText
            .Cells(i + 1, 2) = prices(i).innerText
        Next i
    End With
   Next link
 End With
End Sub

The above code works and should pull data into columns, but for the next link the code re-writes the data. Any help would be great. Thanks

3 Answers 3

3
Rows = wsSheet.Cells(wsSheet.Rows.Count, "A").End(xlUp).row

You need to have something like this during your output loop on Sheet 2 because you cant hard code the number of results.

Edit here's what I really meant about the output

Public Sub GetInfoAll()
Dim wsSheet As Worksheet, Rows As Long, http As New XMLHTTP60, Html As New HTMLDocument, links As Variant, link As Variant
Dim prices As Object, info As Object, i As Long, sResponse As String, offsetRows As Long

Dim wb As Workbook
Set wb = Application.Workbooks("Book1")
Set wsSheet = wb.Sheets(1)
Rows = wsSheet.Cells(wsSheet.Rows.Count, "A").End(xlUp).Row
links = wsSheet.Range("A1:A" & Rows)
    With http
    For Each link In links
        .Open "GET", link, False
        .send
         sResponse = .responseText
        With Html
        .body.innerHTML = sResponse
        Set info = .querySelectorAll("div.views-field.views-field-title")
        Set prices = .querySelectorAll("div.views-field.views-field-field-cena-tovara")
    End With
    With wb.Worksheets(2)
        For i = 0 To info.Length - 1
            offsetRows = 0
            offsetRows = wb.Worksheets(2).Cells(wb.Worksheets(2).Rows.Count, "A").End(xlUp).Row + 1
            .Cells(offsetRows, 1) = info(i).innerText
            .Cells(offsetRows, 2) = prices(i).innerText
        Next i
    End With
   Next link
 End With
End Sub
Sign up to request clarification or add additional context in comments.

3 Comments

Isn't the rows variable only used to determine the range in which all the links are found? This takes place on Sheets(1), which isn't being changed within the loop, so I'm not sure it has to updated within the loop.
and ideally you also need to check the column A for blank values, if your query selector gets the price but not the name. this could cause the output table to shift. Something to consider.
\m/_( ^ u < )_\m/
2

I think it is ideal to make use of container and then loop through it to parse the desired content. Consider the following an example. You can always append the rest to suit your need.

Public Sub GetInfo()
    Dim Html As New HTMLDocument, Htmldoc As New HTMLDocument
    Dim Wb As Workbook, ws As Worksheet, R&, I&
    Dim link As Variant, linklist As Variant

    Set Wb = ThisWorkbook
    Set ws = Wb.Worksheets("output")

    linklist = Array( _
        "https://bazashifer.ru/armatura-stekloplastikovaya", _
        "https://bazashifer.ru/truby-0", _
        "https://bazashifer.ru/setka-stekloplastikovaya" _
       )

    For Each link In linklist
        With CreateObject("MSXML2.XMLHTTP")
            .Open "GET", link, False
            .send
            Html.body.innerHTML = .responseText
        End With

        With Html.querySelectorAll(".view-content > .views-row")
            For I = 0 To .Length - 1
                Htmldoc.body.innerHTML = .item(I).outerHTML
                R = R + 1: ws.Cells(R, 1) = Htmldoc.querySelector(".views-field-title a").innerText
                ws.Cells(R, 2) = Htmldoc.querySelector("[class*='cena-tovara'] > .field-content").innerText
            Next I
        End With
    Next link
End Sub

1 Comment

Totally agree with parsing and improving the actual extraction of data. I like your implementation.
1

I think the problem is that your columns aren't updated for each link.

    For i = 0 To info.Length - 1
        .Cells(i + 1, 1) = info(i).innerText
        .Cells(i + 1, 2) = prices(i).innerText
    Next i

In this part you write everything to the first and second column. This should be updated everytime you move to a new link.

So maybe add a 'colcount' variable which updates just before you move to the next link?

something like this:

Infocol = 1
Pricecol = 2
For Each link In links

....

.Cells(i + 1, Infocol) = info(i).innerText
.Cells(i + 1, Priceol) = prices(i).innerText


....

Infocol = infocol + 2
Pricecol = Pricecol + 2
Next link

You go +2 so you don't overwrite your price column with your new info.

1 Comment

It needs to be dynamic though, since it looks like he wants to make a big table of the output prices. This is why he needs to add a relative Row number, and re-calculate it during the loop. See the above answer modified for an example.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.