Parsing an XML Sitemap in JavaScript

This post shows you how to parse an XML Sitemap in JavaScript using a built-in XML Parser.

xml-sitemap-parsing.js
// sitemap file
var sitemapFile = 'sitemap.xml';

getXMLSitemapObject(sitemapFile, function(sitemapObject) {

    // retrieve properties from the sitemap object
    var urls = sitemapObject.getElementsByTagName('url');
    
    for (var i = 0; i < urls.length; i++) {
        var urlElement = urls[i];

        var loc = urlElement.getElementsByTagName('loc')[0].textContent;
        var changefreq = urlElement.getElementsByTagName('changefreq')[0].textContent;
        var priority = urlElement.getElementsByTagName('priority')[0].textContent;

        console.log('url: ' + loc);
        console.log('changefreq: ' + changefreq);
        console.log('priority: ' + priority);

        console.log('----');
    }
});

// get sitemap content and parse it to Document Object Model
function getXMLSitemapObject(sitemapFile, callback) {
    var xhttp = new XMLHttpRequest();
    xhttp.onreadystatechange = function() {
        if ((this.readyState === 4) && (this.status === 200)) {
            var sitemapContent = this.responseText;
            var sitemapObject = parseXMLSitemap(sitemapContent);
            callback(sitemapObject);
        }
    };
    xhttp.open('GET', sitemapFile, true);
    xhttp.send();
}

// parse a text string into an XML DOM object
function parseXMLSitemap(sitemapContent) {
    var parser = new DOMParser();
    var xmlDoc = parser.parseFromString(sitemapContent, 'text/xml');
    return xmlDoc;
}

In the above code, we first implement the following two functions:

  • parseXMLSitemap: function that parses XML sitemap content to an XML DOM object
  • getXMLSitemapObject: function that gets XML sitemap content using Ajax HTTP GET request, then use parseXMLSitemap to parse the content to XML DOM object

Finally, we iterate over the XML DOM object, retrieve necessary properties, and print the property values in view.

 

Output:

url: https://bytenota.com/
changefreq: always
priority: 1.0
----
url: https://bytenota.com/jquery-detect-if-a-checkbox-is-checked-or-unchecked/
changefreq: daily
priority: 0.8
----
url: https://bytenota.com/parsing-an-xml-sitemap-in-php/
changefreq: daily
priority: 0.7
----
url: https://bytenota.com/aspnet-get-ie-browser-version-using-csharp/
changefreq: daily
priority: 0.7
----
url: https://bytenota.com/javascript-save-objects-in-html5-session-storage/
changefreq: daily
priority: 0.7

Leave a Reply

avatar