2

I want to extract data from a URL.

I want data in an Excel column.

Title Name (Image 1 and 2)
Street Address
AddressLocality
postalcode
adressregion
addresscountry

Image 1
enter image description here

Image 2
enter image description here

Here is my code.

url = Sheets("ExtData").Range("A" & N).Value
With CreateObject("MSXML2.XMLHTTP")
    .Open "GET", url, False
    .setRequestHeader "If-Modified-Since", "Sat, 1 Jan 2000 00:00:00 GMT"
    .send
    sResponse = StrConv(.responseBody, vbUnicode)
End With

Set html = New HTMLDocument
Dim titles As Object, addresses As Object, storesTextToDecipher As Object
With html
    .body.innerHTML = sResponse

    Set titles = .querySelectorAll(".jcn [title]")
    Set addresses = .querySelectorAll(".desk-add.jaddt")

    Til = titles.Item(i).outerHTML
    Add = addresses.Item(i).innerText
    Sheets("ExtData").Range("B" & N) = .getElementsByClassName("Title").Item(0)
    Sheets("ExtData").Range("C" & N) = .getElementById("comp_add").outerHTML
    Range("A" & N + 1).Select
End With

1 Answer 1

2

You can use a css class selector

Option Explicit
Public Sub GetInfo()
    Dim html As HTMLDocument
    Set html = New HTMLDocument                  '<  VBE > Tools > References > Microsoft Scripting Runtime
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.justdial.com/Agartala/Abhay-Varieties-Nor-Banamalipu/9999PX381-X381-141028162716-U1Z5_BZDET", False
        .send
        html.body.innerHTML = .responseText
    End With
    Debug.Print html.querySelector(".ph_hdr").innerText
End Sub

If you want separate lines then use

Dim items() As String, i As Long
items = Split(html.querySelector(".ph_hdr").innerText, ", ")
For i = LBound(items) To UBound(items)
    If items(i) <> vbNullString Then
        Activesheet.Cells(i + 1, 1) = items(i)
    End If
Next

An odd situation is that normally I would grab all the script tags with json using a css selector of script\[type='application/ld+json'\] and loop that looking for the info1. However, despite the info being present when I look at a text file write out, the moment I use a DOM parser I cannot find that info. So, much as I hate to advocate regex with html, here is a regex solution:

Option Explicit
Public Sub GetInfo()
    Dim html As HTMLDocument, s As String, re As Object
    Set re = CreateObject("vbscript.regexp")
    Set html = New HTMLDocument                  '<  VBE > Tools > References > Microsoft Scripting Runtime
    With CreateObject("MSXML2.XMLHTTP")
        .Open "GET", "https://www.justdial.com/Agartala/Abhay-Varieties-Nor-Banamalipu/9999PX381-X381-141028162716-U1Z5_BZDET", False
        .send
        s = .responseText
        html.body.innerHTML = s
        Debug.Print html.querySelector(".fn").innerText
        Debug.Print Trim$(Replace$(GetString(re, s, "title>(.*)<"), Chr$(34), vbNullString))
        Debug.Print Trim$(Replace$(GetString(re, s, "streetAddress"":(.*"")"), Chr$(34), vbNullString))
        Debug.Print Trim$(Replace$(GetString(re, s, "addressLocality"":(.*"")"), Chr$(34), vbNullString))
        Debug.Print Trim$(Replace$(GetString(re, s, "postalCode"":(.*"")"), Chr$(34), vbNullString))
        Debug.Print Trim$(Replace$(GetString(re, s, "addressRegion"":(.*"")"), Chr$(34), vbNullString))
        Debug.Print Trim$(Replace$(GetString(re, s, "addressCountry"":(.*"")"), Chr$(34), vbNullString))
    End With
End Sub

Public Function GetString(ByVal re As Object, ByVal inputString As String, ByVal pattern As String) As Variant
    Dim matches As Object

    With re
        .Global = True
        .MultiLine = True
        .IgnoreCase = True
        .pattern = pattern
        If .test(inputString) Then
            Set matches = .Execute(inputString)
            GetString = matches(0).SubMatches(0)
            Exit Function
        End If
    End With
    GetString = "No match"
End Function
  1. I would use Instr looking for "address":
Sign up to request clarification or add additional context in comments.

4 Comments

thanks Qharr Sir your solutions is perfect working .... Debug.Print html.querySelector(".fn").innerText there is one this I want for title is Abhay Varieties, but I want the full line of title its look like "Abhay Varieties - Cosmetic Dealers in Agartala - Justdial " please sir can you see in my Image I make red line box in first Image for tite
I will add in later tonight for you
please sir How I can extract full title from URL
I have added it in. Have you run the latest edit above? Bottom version

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.