//This module is available on npm.
$ npm install -S sitemap-generator
//Usage
const SitemapGenerator = require('sitemap-generator');
// create generator
const generator = SitemapGenerator('http://example.com', {
stripQuerystring: false
});
// register event listeners
generator.on('done', () => {
// sitemaps created
});
// start the crawler
generator.start();
//This can be useful to ignore certain sites and don't add them to the sitemap.
const crawler = generator.getCrawler();
crawler.addFetchCondition((queueItem, referrerQueueItem, callback) => {
callback(null, !queueItem.path.match(/myregex/));
});
//This can be useful to add static URLs to the sitemap:
const crawler = generator.getCrawler()
const sitemap = generator.getSitemap()
// Add static URL on crawl init.
crawler.on('crawlstart', () => {
sitemap.addURL('/my/static/url')
})
//There are a couple of options to adjust the sitemap output. In addition to the options beneath the options of the used crawler can be changed. For a complete list please check it's official documentation.
var generator = SitemapGenerator('http://example.com', {
maxDepth: 0,
filepath: './sitemap.xml',
maxEntriesPerFile: 50000,
stripQuerystring: true
});
//Example:
// don't forget to:
// npm i http-proxy-agent https-proxy-agent
const HttpProxyAgent = require("http-proxy-agent");
const HttpsProxyAgent = require("https-proxy-agent");
const proxyAddress = 'http://localhost:1234';
const httpProxyAgent = new HttpProxyAgent(proxyAddress);
const httpsProxyAgent = new HttpsProxyAgent(proxyAddress);
options.httpAgent = httpProxyAgent;
options.httpsAgent = httpsProxyAgent;
//Example:
const generator = SitemapGenerator(url, {
ignore: url => {
// Prevent URLs from being added that contain `
`.
return //g.test(url)
}
})
//Example:
[1.0, 0.8, 0.6, 0.4, 0.2, 0]
//Triggered when the crawler successfully added a resource to the sitemap. Passes the url as argument.
generator.on('add', (url) => {
// log url
});
//Triggered when the crawler finished and the sitemap is created.
generator.on('done', () => {
// sitemaps created
});
//Thrown if there was an error while fetching an URL. Passes an object with the http status code, a message and the url as argument.
generator.on('error', (error) => {
console.log(error);
// => { code: 404, message: 'Not found.', url: 'http://example.com/foo' }
});
//If an URL matches a disallow rule in the robots.txt file or meta robots noindex is present this event is triggered. The URL will not be added to the sitemap. Passes the ignored url as argument.
generator.on('ignore', (url) => {
// log ignored url
})
Social Plugin