0
<table cellspacing="0" rules="all" border="1" id="MainContent_grdUsers2" style="border-style:None;width:100%;border-collapse:collapse;">
                    <tbody><tr class="listHeader">
                        <th scope="col" style="width:11%;">Name</th><th scope="col" style="width:12%;">Password</th><th scope="col" style="width:16%;">Rights</th><th scope="col" style="width:10%;">Bureaus</th><th scope="col" style="width:15%;">FullName</th><th scope="col" style="width:16%;">Email</th><th scope="col" style="width:12%;">Status</th><th scope="col" style="width:12%;">Logon Tries</th>
                    </tr><tr>
                        <td>user1</td><td align="center">
                                                    <input name="ctl00$MainContent$grdUsers2$ctl02$txtManageUsersPassword" type="text" maxlength="50" id="MainContent_grdUsers2_txtManageUsersPassword_0" style="width: 95%; background-image: url(&quot;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAASCAYAAABSO15qAAAAAXNSR0IArs4c6QAAAUBJREFUOBGVVE2ORUAQLvIS4gwzEysHkHgnkMiEc4zEJXCMNwtWTmDh3UGcYoaFhZUFCzFVnu4wIaiE+vvq6+6qTgthGH6O4/jA7x1OiCAIPwj7CoLgSXDxSjEVzAt9k01CBKdWfsFf/2WNuEwc2YqigKZpK9glAlVVwTTNbQJZlnlCkiTAZnF/mePB2biRdhwHdF2HJEmgaRrwPA+qqoI4jle5/8XkXzrCFoHg+/5ICdpm13UTho7Q9/0WnsfwiL/ouHwHrJgQR8WEwVG+oXpMPaDAkdzvd7AsC8qyhCiKJjiRnCKwbRsMw9hcQ5zv9maSBeu6hjRNYRgGFuKaCNwjkjzPoSiK1d1gDDecQobOBwswzabD/D3Np7AHOIrvNpHmPI+Kc2RZBm3bcp8wuwSIot7QQ0PznoR6wYSK0Xb/AGVLcWwc7Ng3AAAAAElFTkSuQmCC&quot;); background-repeat: no-repeat; background-attachment: scroll; background-size: 16px 18px; background-position: 98% 50%; cursor: auto;" autocomplete="off">
                                                </td><td align="center">
                                                    <select name="ctl00$MainContent$grdUsers2$ctl02$ddlManageUsersRights" id="MainContent_grdUsers2_ddlManageUsersRights_0" style="width:95%;">
                            <option value="User">User</option>
                            <option selected="selected" value="Supervisor">Supervisor</option>
                            <option value="Administrator">Administrator</option>
                            <option value="Child Supervisor">Child Supervisor</option>

                        </select>

                                                </td><td align="center">
                                                    <select name="ctl00$MainContent$grdUsers2$ctl02$ddlManageUsersBureaus" id="MainContent_grdUsers2_ddlManageUsersBureaus_0" style="width:95%;">
                            <option value="255">High</option>
                            <option selected="selected" value="128">Medium</option>
                            <option value="0">Low</option>

                        </select>

                                                </td><td align="center">
                                                    <input name="ctl00$MainContent$grdUsers2$ctl02$txtManageUsersFullName" type="text" value="First1 Last1" maxlength="50" id="MainContent_grdUsers2_txtManageUsersFullName_0" style="width: 95%; background-image: url(&quot;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAASCAYAAABSO15qAAAAAXNSR0IArs4c6QAAAUBJREFUOBGVVE2ORUAQLvIS4gwzEysHkHgnkMiEc4zEJXCMNwtWTmDh3UGcYoaFhZUFCzFVnu4wIaiE+vvq6+6qTgthGH6O4/jA7x1OiCAIPwj7CoLgSXDxSjEVzAt9k01CBKdWfsFf/2WNuEwc2YqigKZpK9glAlVVwTTNbQJZlnlCkiTAZnF/mePB2biRdhwHdF2HJEmgaRrwPA+qqoI4jle5/8XkXzrCFoHg+/5ICdpm13UTho7Q9/0WnsfwiL/ouHwHrJgQR8WEwVG+oXpMPaDAkdzvd7AsC8qyhCiKJjiRnCKwbRsMw9hcQ5zv9maSBeu6hjRNYRgGFuKaCNwjkjzPoSiK1d1gDDecQobOBwswzabD/D3Np7AHOIrvNpHmPI+Kc2RZBm3bcp8wuwSIot7QQ0PznoR6wYSK0Xb/AGVLcWwc7Ng3AAAAAElFTkSuQmCC&quot;); background-repeat: no-repeat; background-attachment: scroll; background-size: 16px 18px; background-position: 98% 50%; cursor: auto;" autocomplete="off">
                                                </td><td align="center">
                                                    <input name="ctl00$MainContent$grdUsers2$ctl02$txtManageUsersEmail" type="text" value="[email protected]" maxlength="50" id="MainContent_grdUsers2_txtManageUsersEmail_0" style="width: 95%; background-image: url(&quot;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAASCAYAAABSO15qAAAAAXNSR0IArs4c6QAAAUBJREFUOBGVVE2ORUAQLvIS4gwzEysHkHgnkMiEc4zEJXCMNwtWTmDh3UGcYoaFhZUFCzFVnu4wIaiE+vvq6+6qTgthGH6O4/jA7x1OiCAIPwj7CoLgSXDxSjEVzAt9k01CBKdWfsFf/2WNuEwc2YqigKZpK9glAlVVwTTNbQJZlnlCkiTAZnF/mePB2biRdhwHdF2HJEmgaRrwPA+qqoI4jle5/8XkXzrCFoHg+/5ICdpm13UTho7Q9/0WnsfwiL/ouHwHrJgQR8WEwVG+oXpMPaDAkdzvd7AsC8qyhCiKJjiRnCKwbRsMw9hcQ5zv9maSBeu6hjRNYRgGFuKaCNwjkjzPoSiK1d1gDDecQobOBwswzabD/D3Np7AHOIrvNpHmPI+Kc2RZBm3bcp8wuwSIot7QQ0PznoR6wYSK0Xb/AGVLcWwc7Ng3AAAAAElFTkSuQmCC&quot;); background-repeat: no-repeat; background-attachment: scroll; background-size: 16px 18px; background-position: 98% 50%; cursor: auto;" autocomplete="off">
                                                </td><td align="center">
                                                    <select name="ctl00$MainContent$grdUsers2$ctl02$ddlManageUsersStatus" id="MainContent_grdUsers2_ddlManageUsersStatus_0" style="width:95%;">
                            <option value="Active">Active</option>
                            <option selected="selected" value="Inactive">Inactive</option>
                            <option value="Terminated">Terminated</option>

                        </select>

                                                </td><td align="center">                                                    
                                                    <input name="ctl00$MainContent$grdUsers2$ctl02$txtManageUsersLogonTries" type="text" value="0" maxlength="1" id="MainContent_grdUsers2_txtManageUsersLogonTries_0" style="width:95%;">
                                                </td>
                    </tr><tr style="background-color:#CED6E7;">
                        <td>user2</td><td align="center">
                                                    <input name="ctl00$MainContent$grdUsers2$ctl03$txtManageUsersPassword" type="text" maxlength="50" id="MainContent_grdUsers2_txtManageUsersPassword_1" style="background-color: rgb(206, 214, 231); width: 95%; background-image: url(&quot;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAASCAYAAABSO15qAAAAAXNSR0IArs4c6QAAAUBJREFUOBGVVE2ORUAQLvIS4gwzEysHkHgnkMiEc4zEJXCMNwtWTmDh3UGcYoaFhZUFCzFVnu4wIaiE+vvq6+6qTgthGH6O4/jA7x1OiCAIPwj7CoLgSXDxSjEVzAt9k01CBKdWfsFf/2WNuEwc2YqigKZpK9glAlVVwTTNbQJZlnlCkiTAZnF/mePB2biRdhwHdF2HJEmgaRrwPA+qqoI4jle5/8XkXzrCFoHg+/5ICdpm13UTho7Q9/0WnsfwiL/ouHwHrJgQR8WEwVG+oXpMPaDAkdzvd7AsC8qyhCiKJjiRnCKwbRsMw9hcQ5zv9maSBeu6hjRNYRgGFuKaCNwjkjzPoSiK1d1gDDecQobOBwswzabD/D3Np7AHOIrvNpHmPI+Kc2RZBm3bcp8wuwSIot7QQ0PznoR6wYSK0Xb/AGVLcWwc7Ng3AAAAAElFTkSuQmCC&quot;); background-repeat: no-repeat; background-attachment: scroll; background-size: 16px 18px; background-position: 98% 50%;" autocomplete="off">
                                                </td><td align="center">
                                                    <select name="ctl00$MainContent$grdUsers2$ctl03$ddlManageUsersRights" id="MainContent_grdUsers2_ddlManageUsersRights_1" style="background-color:#CED6E7;width:95%;">
                            <option value="User">User</option>
                            <option selected="selected" value="Supervisor">Supervisor</option>
                            <option value="Administrator">Administrator</option>
                            <option value="Child Supervisor">Child Supervisor</option>

                        </select>

                                                </td><td align="center">
                                                    <select name="ctl00$MainContent$grdUsers2$ctl03$ddlManageUsersBureaus" id="MainContent_grdUsers2_ddlManageUsersBureaus_1" style="background-color:#CED6E7;width:95%;">
                            <option value="255">High</option>
                            <option selected="selected" value="128">Medium</option>
                            <option value="0">Low</option>

                        </select>

                                                </td><td align="center">
                                                    <input name="ctl00$MainContent$grdUsers2$ctl03$txtManageUsersFullName" type="text" value="First2 Last2" maxlength="50" id="MainContent_grdUsers2_txtManageUsersFullName_1" style="background-color: rgb(206, 214, 231); width: 95%; background-image: url(&quot;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAASCAYAAABSO15qAAAAAXNSR0IArs4c6QAAAUBJREFUOBGVVE2ORUAQLvIS4gwzEysHkHgnkMiEc4zEJXCMNwtWTmDh3UGcYoaFhZUFCzFVnu4wIaiE+vvq6+6qTgthGH6O4/jA7x1OiCAIPwj7CoLgSXDxSjEVzAt9k01CBKdWfsFf/2WNuEwc2YqigKZpK9glAlVVwTTNbQJZlnlCkiTAZnF/mePB2biRdhwHdF2HJEmgaRrwPA+qqoI4jle5/8XkXzrCFoHg+/5ICdpm13UTho7Q9/0WnsfwiL/ouHwHrJgQR8WEwVG+oXpMPaDAkdzvd7AsC8qyhCiKJjiRnCKwbRsMw9hcQ5zv9maSBeu6hjRNYRgGFuKaCNwjkjzPoSiK1d1gDDecQobOBwswzabD/D3Np7AHOIrvNpHmPI+Kc2RZBm3bcp8wuwSIot7QQ0PznoR6wYSK0Xb/AGVLcWwc7Ng3AAAAAElFTkSuQmCC&quot;); background-repeat: no-repeat; background-attachment: scroll; background-size: 16px 18px; background-position: 98% 50%; cursor: auto;" autocomplete="off">
                                                </td><td align="center">
                                                    <input name="ctl00$MainContent$grdUsers2$ctl03$txtManageUsersEmail" type="text" value="[email protected]" maxlength="50" id="MainContent_grdUsers2_txtManageUsersEmail_1" style="background-color: rgb(206, 214, 231); width: 95%; background-image: url(&quot;data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAASCAYAAABSO15qAAAAAXNSR0IArs4c6QAAAUBJREFUOBGVVE2ORUAQLvIS4gwzEysHkHgnkMiEc4zEJXCMNwtWTmDh3UGcYoaFhZUFCzFVnu4wIaiE+vvq6+6qTgthGH6O4/jA7x1OiCAIPwj7CoLgSXDxSjEVzAt9k01CBKdWfsFf/2WNuEwc2YqigKZpK9glAlVVwTTNbQJZlnlCkiTAZnF/mePB2biRdhwHdF2HJEmgaRrwPA+qqoI4jle5/8XkXzrCFoHg+/5ICdpm13UTho7Q9/0WnsfwiL/ouHwHrJgQR8WEwVG+oXpMPaDAkdzvd7AsC8qyhCiKJjiRnCKwbRsMw9hcQ5zv9maSBeu6hjRNYRgGFuKaCNwjkjzPoSiK1d1gDDecQobOBwswzabD/D3Np7AHOIrvNpHmPI+Kc2RZBm3bcp8wuwSIot7QQ0PznoR6wYSK0Xb/AGVLcWwc7Ng3AAAAAElFTkSuQmCC&quot;); background-repeat: no-repeat; background-attachment: scroll; background-size: 16px 18px; background-position: 98% 50%; cursor: auto;" autocomplete="off">
                                                </td><td align="center">
                                                    <select name="ctl00$MainContent$grdUsers2$ctl03$ddlManageUsersStatus" id="MainContent_grdUsers2_ddlManageUsersStatus_1" style="background-color:#CED6E7;width:95%;">
                            <option selected="selected" value="Active">Active</option>
                            <option value="Inactive">Inactive</option>
                            <option value="Terminated">Terminated</option>

                        </select>

                                                </td><td align="center">                                                    
                                                    <input name="ctl00$MainContent$grdUsers2$ctl03$txtManageUsersLogonTries" type="text" value="0" maxlength="1" id="MainContent_grdUsers2_txtManageUsersLogonTries_1" style="background-color:#CED6E7;width:95%;">
</td>
</tr>
</tbody>
</table>

I am trying to scrape a table that contains text, dropdown options, and values. The result would look: user1 | Supervisor | Medium | First1 Last1 | [email protected] | Inactive

user2 | Supervisor | Medium | First2 Last2 | [email protected] | Active

Intended to be outputted to csv. So far I have:

headers = [c.get_text(strip=True) for c in soup.find('tr', attrs={'class':'listHeader'}).findAll('th')]

#find_all doesn't work here it just grabs one
for table in soup.find('table', attrs={'id':'MainContent_grdUsers2'}):
        try:
            column3=(table.find("option", attrs={"selected": "selected"}).get('value')) 
        except:
            continue

#this only grabs a specific cell
for table in soup.find('table', attrs={'id':'MainContent_grdUsers2'}):
        try:
            column6=(table.find("input", attrs={"id": "MainContent_grdUsers2_txtManageUsersEmail_0"}).get('value')) 
        except:
            continue

I can go in and individually grab the cells I want but there are around 100 rows of records in this table and I am finding it difficult to figure out how to grab it all at once since there isn't just text, but dropdown option values, and values. Is there a way to do this with Beautifulsoup? I tried briefly with pandas and lxml but I have never used those before.

Updated code:

headers = [c.get_text(strip=True) for c in soup.find('tr', attrs={'class':'listHeader'}).findAll('th')]
table = soup.find('table', attrs={'id':'MainContent_grdUsers2'})
data = []

for tr in table.find_all('tr')[1:] : 
    td = tr.find_all('td') 
    try : 
        data += [ 
            [ 
                td[0].getText() , 
                td[2].find('option', {'selected':'selected'}).getText(), 
                td[3].find('option', {'selected':'selected'}).getText(),
                td[4].find('input').get('value'),
                    if value is None:
                        continue
                td[5].find('input').get('value'),
                td[6].find('option', {'selected':'selected'}).getText()
            ] 
        ]
    except Exception as ex : 
        #print(ex)  ## you can uncomment this line for debugging ##
        continue

for row in data : 
    print(' '.join(row))

1 Answer 1

1

Given the html you provided, this should work :

if soup.find('tr', attrs={'class':'listHeader'}) : 
    headers = [ 
        'none' if c is None else c.get_text(strip=True) 
        for c in soup.find('tr', attrs={'class':'listHeader'}).findAll('th') 
    ]
else : 
    headers = None

table = soup.find('table', attrs={'id':'MainContent_grdUsers2'})
data = []

for tr in table.find_all('tr')[1:] : 
    td = tr.find_all('td') 
    try : 
        data += [ 
            [ 
                td[0].getText() , 
                td[2].find('option', {'selected':'selected'}).getText(), 
                td[3].find('option', {'selected':'selected'}).getText(), 
                td[4].find('input').get('value'),  
                td[5].find('input').get('value'),
                td[6].find('option', {'selected':'selected'}).getText()
            ] 
        ]
    except Exception as ex : 
        #print(ex)  ## you can uncomment this line for debugging ##
        continue

for row in data : 
    print(' '.join(str(r) for r in row))

Output:

user1 Supervisor Medium First1 Last1 [email protected] Inactive
user2 Supervisor Medium First2 Last2 [email protected] Active
Sign up to request clarification or add additional context in comments.

9 Comments

That makes sense, but I'm getting a NoneType error: td[2].find('option', {'selected':'selected'}).getText(), AttributeError: 'NoneType' object has no attribute 'getText'
Can you give me the url or the entire html table ?
It is too large to include in the question, is there any other way I can clarify for you?
I updated the code to catch all exceptions, try again .
This works now. Thank you. It does however throw an error if it comes across a blank cell. How can I fix this? Something like: if 'value' == ' ' continue ?
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.