Puppeteer-Infinite-Scroller provides a simple and efficient solution for scraping data loaded through infinite scrolling on web pages using Puppeteer.
You can install the package using npm:
npm install puppeteer-infinite-scroller
Import the puppeteerInfiniteScroller function from the package and use it to scrape data from infinite scrolling web pages.
const puppeteer = require("puppeteer");
const puppeteerInfiniteScroller = require('puppeteer-infinite-scroller');
(async () => {
const pageUrl = "https://infiniteajaxscroll.com/examples/blocks/";
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setViewport({
width: 1200,
height: 800,
});
await page.goto(pageUrl);
await page.waitForSelector(".blocks .blocks__block");
const options = {
scrollDelay: 1000, // Milliseconds between scrolls
itemCount: 50, // Number of items to scrape
selector: '.blocks .blocks__block', // CSS selector for items
// OR
// pageFunction: () => { /* Custom page function for scraping */ }
};
const scrapedData = await puppeteerInfiniteScroller(page, options);
console.log(scrapedData);
await browser.close();
})();
The following options can be configured when using the puppeteerInfiniteScroller
function:
scrollDelay
(optional): Milliseconds between scrolls. Default is 1000ms.itemCount
(optional): Number of items to scrape. Default is 10.selector
(optional): CSS selector for the items. Either this orpageFunction
must be provided.pageFunction
(optional): Custom function for scraping data from the page. Either this orselector
must be provided.
const puppeteer = require("puppeteer");
const puppeteerInfiniteScroller = require('puppeteer-infinite-scroller');
(async () => {
const pageUrl = "https://infiniteajaxscroll.com/examples/blocks/";
const browser = await puppeteer.launch();
const page = await browser.newPage();
await page.setViewport({
width: 1200,
height: 800,
});
await page.goto(pageUrl);
await page.waitForSelector(".blocks .blocks__block");
function extractElements() {
const items = [];
const extractedElements = document.querySelectorAll(".blocks .blocks__block");
for (let element of extractedElements) {
items.push({
class: element.getAttribute("class"),
id: element.getAttribute("id"),
tagName: element.tagName,
});
}
return items;
}
const options = {
scrollDelay: 1000, // Milliseconds between scrolls
itemCount: 50, // Number of items to scrape
pageFunction: extractElements
};
const scrapedData = await puppeteerInfiniteScroller(page, options);
console.log(scrapedData);
await browser.close();
})();
License This project is licensed under the MIT License - see the LICENSE file for details.