-1

I have this Apps Script / Cheerio function that successfully scrapes the data I want from the url. The site only displays 25 entries at this url. I can find additional entries on subsequent pages (by increasing the number at the end of the url to count=25, count=50, etc.). How can I loop this function so it scrapes multiple pages into one table?

function yahooSkImport() {

  var url = 'https://hockey.fantasysports.yahoo.com/hockey/121356/players?status=ALL&eteam=ALL&fteam=NONE&pos=P&cut_type=33&stat1=S_S_2025&myteam=0&sort=PTS&sdir=1&count=0';
  var response = UrlFetchApp.fetch(url);
  var html = response.getContentText();
  var $ = Cheerio.load(html);

  var tableData = [];

  $('table').find('tr').each(function(j, row) {
    var cellData = [];
    // Player Name
    $(row).find('td:nth-of-type(3) > div > div > div:nth-of-type(2) > div > div > div > a').each(function(k, cell) {
      cellData.push($(cell).text());
    });
    // Team - Pos
    $(row).find('td:nth-of-type(3) > div > div > div:nth-of-type(2) > div > div > div > span[class="D-b"] > span').each(function(k, cell) {
      cellData.push($(cell).text());
    });
    // % Ros
    $(row).find('td:nth-of-type(10)').each(function(k, cell) {
      cellData.push($(cell).text());
    });
    // Status
    $(row).find('td:nth-of-type(3) > div > div > div:nth-of-type(2) > div > div > div > span > span[class="Pstart-sm"]').each(function(k, cell) {
      cellData.push($(cell).text());
    });
    tableData.push(cellData);
  });

  //Logger.log(tableData);

  const ss = SpreadsheetApp.getActiveSpreadsheet();
  const sheet = ss.getSheetByName('Sheet1');

  sheet.clear();

  const lastRow = sheet.getLastRow();
  const range = sheet.getRange(lastRow+1, 1, tableData.length,tableData[0].length);
  range.setValues(tableData);
  
};
2

1 Answer 1

1

Use a loop to iterate an index over 0, 25, 50... and string concatenation to add the index to the URL, like this:

function yahooSkImport() {
  const urlBase = 'https://hockey.fantasysports.yahoo.com/hockey/121356/players?status=ALL&eteam=ALL&fteam=NONE&pos=P&cut_type=33&stat1=S_S_2025&myteam=0&sort=PTS&sdir=1&count=';
  const specs = [
    'td:nth-of-type(3) > div > div > div:nth-of-type(2) > div > div > div > a', // Player Name
    'td:nth-of-type(3) > div > div > div:nth-of-type(2) > div > div > div > span[class="D-b"] > span', // Team - Pos
    'td:nth-of-type(10)', // % Ros
    'td:nth-of-type(3) > div > div > div:nth-of-type(2) > div > div > div > span > span[class="Pstart-sm"]', // Status
  ];
  const data = [];
  let response;
  let i = 0;
  while ((response = UrlFetchApp.fetch(urlBase + i)).getResponseCode() === 200) {
    const $ = Cheerio.load(response.getContentText());
    $('table').find('tr').each((j, row) => data.push(specs.map(spec => $(row).find(spec).each((k, cell) => $(cell).text()))));
    i += 25;
  }
  const ss = SpreadsheetApp.getActive();
  ss.getSheetByName('Sheet1').clear().getRange('A1').offset(0, 0, data.length, data[0].length).setValues(data);
  ss.toast(`Done. Imported ${data.length} rows.`);
}

See Apps Script at Stack Overflow and Clean Code JavaScript.

Sign up to request clarification or add additional context in comments.

Comments

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.