How to Convert an HTML String to JavaScript DOM

The traditional method of parsing a string into a DOM fragment is to set the string as the innerHTML value of an empty <div> element, then iterate through the <div>'s child nodes, appending them to a DocumentFragment instance. Like this:

function parseHTML(str) {
    var div, docfrag;
    div = document.createElement('DIV');
    div.innerHTML = str;
    docfrag = document.createDocumentFragment();
    while (div.childNodes[0]) {
        docfrag.appendChild(div.childNodes[0]);
    }
    return docfrag;
}

This works beautifully in all modern browsers all the way back to Internet Explorer 8 and Firefox 3.5.

There is one problem. Incomplete HTML snippets such as <tr><td>text</td></tr> and <li>text</li> get auto-corrected. Because the <tr> and <li> elements require specific containers, the invalid markup is simply ignored and these two snippets get parsed as the string "text".

Partial views need special handling.

HTML5's new <template> element solves the problem. It's designed specifically for in-browser templating, so it works just as we expect it to. We retrieve the generated DOM via a special property called content, which behaves just like a DocumentFragment instance.

function parseHTML(str) {
    var temp;
    temp = document.createElement('TEMPLATE');
    if (temp.content) {
        temp.innerHTML = str;
        return temp.content;
    }
    // TODO: Fallback for other browsers
}

Native templating is well supported with the exceptions of Internet Explorer and some legacy mobile browsers that are still in common usage. We need a fallback for these browsers.

One solution that I investigated was to use the DOMParser library to parse the string:

function parseHTML(str) {
    var domparser, doc, docfrag;
    domparser = new DOMParser();
    doc = domparser.parseFromString(str, 'text/html');
    docfrag = document.createDocumentFragment();
    docfrag.appendChild(doc.documentElement);
    return docfrag;
}

Parsing the string as "text/html" has the same unfortunate side effect on partial HTML snippets as the old innerHTML trick. I tried parsing strings as XHTML ("application/xml") instead. This avoids the HTML auto-correction problem, but DOMParser expects well-formed XML and Internet Explorer and Edge are particularly fussy about this, throwing errors when HTML strings contain invalid XHTML such as boolean attributes with no explicit values. Clearly, this is not practical.

Anyway, the DOMParser API does not provide the full coverage of legacy browsers that I need. As of January 2016, I need my JavaScript libraries to continue to support Internet Explorer 9 and Android 4.0+.

For the time being I'm going to have to fallback on the old innerHTML trick. To handle common partial view patterns, I will peek at the beginning of the HTML string looking for problem tags such as <tr> and <li>, and wrap the original string in the appropriate container as required.

This is what I came up with. Here's the full and final version of my parseHTML function, heavily commented for your convenience:

var parseHTML = (function () {

    // Elements that require special handling when
    // not encapsulated in their standard containers:

    var specials = {
        td: {
            container: 'table',
            html: '<tbody><tr class="x_root"></tr></tbody>'
        },
        tr: {
            container: 'table',
            html: '<tbody class="x_root"></tbody>'
        },
        thead: {
            container: 'table',
            html: '<tbody class="x_root"></tbody>'
        },
        caption: {
            container: 'table',
            html: '<tbody class="x_root"></tbody>'
        },
        li: {
            container: 'ul',
        },
        dd: {
            container: 'dl',
        },
        dt: {
            container: 'dl',
        },
        optgroup: {
            container: 'select',
        },
        option: {
            container: 'select',
        }
    };

    return function (str) {
        var container, docfrag, output, root, special, tags;

        // Use native templating where available:

        container = document.createElement('TEMPLATE');
        if (container.content) {
            container.innerHTML = str;
            output = container.content;
        }

        // Fallback for Internet Explorer, early editions of Edge,
        // and Android < 4.4:

        else {

            // See if the template string starts with a "<tag",
            // and check if that tag is one of our specials:

            tags = str.match(/^\s*<([^>\s]+)/);
            if (tags) {
                special = specials[tags[1].toLowerCase()];
                if (special) {

                    // We have a match! Inject the template into an appropriate
                    // container, encapsulated in additional markup if necessary:

                    container = document.createElement(special.container);
                    if (special.html) {
                        container.innerHTML = special.html;
                        root = container.querySelector('.x_root');
                        root.innerHTML = str;
                    } else {
                        container.innerHTML = str;
                        root = container;
                    }
                }
            }

            // Templates that don't require special handling just
            // get injected into a <div>:

            if (!root) {
                container = document.createElement('DIV');
                container.innerHTML = str;
                root = container;
            }

            // The "root" is the element that contains the DOM
            // represented by the original template string. The "root"
            // element may not be the same as the outer "container".
            // Iterate through the root's child elements, moving them
            // to an empty DocumentFragment instance:

            docfrag = document.createDocumentFragment();
            while (root.firstChild) {
                docfrag.appendChild(root.firstChild);
            }
            output = docfrag;
        }
        return output;
    }

}());