I have a scraper running on Puppeteer and Node.js. When I change the headless mode from false to true, the bot gets detected. I would like it to work with false since the deployment will be on Railway. Here is the website I need to scrape.
This is part of my code:
const puppeteer = require('puppeteer');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const AnonymizeUA = require('puppeteer-extra-plugin-anonymize-ua');
const { FingerprintGenerator } = require('fingerprint-generator');
puppeteer.use(StealthPlugin());
puppeteer.use(AnonymizeUA());
const fingerprintGenerator = new FingerprintGenerator({
devices: ['desktop'],
browsers: [{ name: 'chrome', minVersion: 114 }],
operatingSystems: ['windows'],
});
const MAX_RETRIES = 3;
const RETRY_DELAY = 5000;
const checkAdidas = async (urlConfig) => {
const { name, url, sizes } = urlConfig;
const fingerprint = fingerprintGenerator.getFingerprint({
locales: ['en-US', 'en'],
screen: { width: 1920, height: 1080 },
});
let browser;
let retryCount = 0;
while (retryCount < MAX_RETRIES) {
try {
console.log(`Pokus ${retryCount + 1}/${MAX_RETRIES} pre URL: ${url}`);
browser = await puppeteer.launch({
headless: 'new',
args: [
`--proxy-server=${proxyWebShare.host}:${proxyWebShare.port}`,
"--no-sandbox",
"--disable-setuid-sandbox",
],
ignoreHTTPSErrors: true,
});
const [page] = await browser.pages();
await page.authenticate({
username: proxyWebShare.user,
password: proxyWebShare.pass,
});
await page.setUserAgent(fingerprint.fingerprint.navigator.userAgent);
await page.setViewport({
width: fingerprint.fingerprint.screen.width,
height: fingerprint.fingerprint.screen.height,
deviceScaleFactor: fingerprint.fingerprint.screen.deviceScaleFactor ?? 1,
});
await page.setExtraHTTPHeaders({
'Accept-Language': 'en-US,en;q=0.9',
'Accept-Encoding': 'gzip, deflate, br',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1'
});
await page.goto(url, {
waitUntil: ['networkidle0', 'domcontentloaded'],
timeout: 80000,
referer: 'https://www.adidas.cz/',
});
How can I proceed?