I am scraping a webpage and need to return data from a <table> in JSON format.
There's an enclosing <table class="grid_table"> that nests multiple <table class="mydetails"> (In the code below I only pasted 2).
From these nested tables I want to return a JSON array with the cell data. There's always 4 cells with different class Names. The first is a datestring and the other three are strings within a <div>. Below I put my desired output in JSON.
I am not familiar with javascript, so this is all I have come up with until now:
var t = document.querySelectorAll('table.mydetails');
t.forEach(function(table) {
datas = table.querySelectorAll("td");
datas.forEach(function(cell) {
var divs = cell.querySelectorAll('div');
divs.forEach(function(div) {
console.log(div.textContent);
});
});
});
I am sure there's better ways to do this and I would appreciate if someone could give me a little help here.
This is my webpage:
<html>
<table class="grid_table">
<tbody>
<tr class="grid_row_1 grid_row_pr_2021-11-01" role="row">
<td role="gridcell">
<div class="classCell">
<div class="classMultiLine">
<table class="mydetails">
<tr class="">
<td class="td-e">01.11.2021</td>
<td class="td-d Target">
<div class="">Foo1</div>
</td>
<td class="td-d Mine">
<div class="">Bar1</div>
</td>
<td class="td-d His">
<div class="">FooBar1</div>
</td>
</tr>
</table>
</div>
<div class="classMultiLine">empty</div>
<div class="classMultiLine">empty</div>
</div>
</td>
</tr>
<tr class="grid_row_2 grid_row_pr_2021-11-02" role="row">
<td role="gridcell">
<div class="classCell">
<div class="classMultiLine">
<table class="mydetails">
<tr class="">
<td class="td-e">02.11.2021</td>
<td class="td-d Target">
<div class="">Foo2</div>
</td>
<td class="td-d Mine">
<div class="">Bar2</div>
</td>
<td class="td-d Hi">
<div class="">FooBar2</div>
</td>
</tr>
</table>
</div>
<div class="classMultiLine">empty</div>
<div class="classMultiLine">empty</div>
</div>
</td>
</tr>
</tbody>
</table>
<script type="text/javascript">
This is my desired output:
[
{
"date": "01.11.2021",
"Target": "Foo1",
"Mine": "Bar1",
"His": "FooBar1"
},
{
"date": "02.11.2021",
"Target": "Foo2",
"Mine": "Bar2",
"His": "FooBar2"
}
]