Question

I want to remove all the html tags except <br> or <br/> tags from a string using javascript. I have seen many questions like this but their answers will remove all the html tags including <br> and <br/> tags.

Does anyone knows a regex to do this?

Was it helpful?

Solution 2

Try This

 function remove_tags(html)
 {
   var html = html.replace("<br>","||br||");  
   var tmp = document.createElement("DIV");
   tmp.innerHTML = html;
   html = tmp.textContent||tmp.innerText;
   return html.replace("||br||","<br>");  
 }

OTHER TIPS

Use a negative lookahead (by using a regex such as /<(?!br\s*\/?)[^>]+>/g):

var html = 'this is my <b>string</b> and it\'s pretty cool<br />isn\'t it?<br>Yep, it is. <strong>More HTML tags</strong>';
html = html.replace(/<(?!br\s*\/?)[^>]+>/g, '');

console.log(html); 
//this is my string and it's pretty cool<br />isn't it?<br>Yep, it is. More HTML tags

Demo

I've worked on the last suggestion to develop a function removing all or just keeping some tags

function strip_tags( _html /*you can put each single tag per argument*/ )
{
    var _tags = [], _tag = "" ;
    for( var _a = 1 ; _a < arguments.length ; _a++ )
    {
        _tag = arguments[_a].replace( /<|>/g, '' ).trim() ;
        if ( arguments[_a].length > 0 ) _tags.push( _tag, "/"+_tag );
    }

    if ( !( typeof _html == "string" ) && !( _html instanceof String ) ) return "" ;
    else if ( _tags.length == 0 ) return _html.replace( /<(\s*\/?)[^>]+>/g, "" ) ;
    else
    {
        var _re = new RegExp( "<(?!("+_tags.join("|")+")\s*\/?)[^>]+>", "g" );
        return _html.replace( _re, '' );
    }
}

var _html = "<b>Just</b> some <i>tags</i> and text to test <u>this code</u>" ;
document.write( "This is the original html code including some tags<br>" );
document.write( _html + "<br><br>" ); // original html code
document.write( "Now we remove all tags (plain text)<br>" );
document.write( strip_tags( _html ) + "<br><br>" ); // remove all tags
document.write( "Only the bold tag is kept<br>" );
document.write( strip_tags( _html, "b" ) + "<br><br>" ); // keep <b> only
document.write( "Only the underline tag is kept<br>" );
document.write( strip_tags( _html, "u" ) + "<br><br>" ); // keep <u> only
document.write( "Only the italic tag is kept<br>" );
document.write( strip_tags( _html, "<i>" ) + "<br><br>" ); // keep <i> only
document.write( "Keeping both italic and underline<br>" );
document.write( strip_tags( _html, "i", "u" ) ); // keep both <i> and <u>

This is an old, but still high-ranked question, so I thought I'd offer a more general ES6 solution.

This solution will strip all but the excluded tags, and also simplify those tags to remove attributes.

This is particularly useful if you want to handle Paste events, and simplify the HTML.

It also strips HTML comments as sometimes copy/paste includes <!--StartFragment--> etc.

  function strip_tags(html, ...args) {
    return html.replace(/<(\/?)(\w+)[^>]*\/?>/g, (_, endMark, tag) => {
      return args.includes(tag) ? '<' + endMark + tag + '>' :'';
    }).replace(/<!--.*?-->/g, '');
  }

Rather than building a complicated it just does a replace and then checks the tags, returning either the simple start/end tag or an empty string.

Usage example:

// Strip all except basic formatting and paragraphs and breaks.
const h = strip_tags(html, 'b', 'i', 'u', 'p', 'br');

This is what I am using to handle paste events for a simple HTML editor. It's not perfect, as it doesn't handle strange situations like a ">" embedded in a tag attribute, but that seems like an unlikely scenario.

Hope it's of use to someone. Improvements welcomed!

To expand h2ooooooo Answer to include leading spaces and be case insenctive you could use

/<(?!\s*br\s*\/?)[^>]+>/gi

Base on h2ooooooo answer correct regex for your problem is:

<((?!\/?br\s?\/?>)\s*)[^>]+>

This solution works even for cases mentioned by Wolfie and Olivier.

Here's Demo

I've adapted Sandro Rosa's function to address the issue mentioned by Nikita:

function strip_tags( _html /*you can put each single tag per argument*/ )
{
    var _tags = [], _tag = "";
    for ( var _a = 1 ; _a < arguments.length ; _a++ ) {
        _tag = arguments[_a].replace(/[<>\/]/g, '').trim();
        if ( arguments[_a].length > 0 ) _tags.push( _tag );
    }

    if ( !( typeof _html == "string" ) && !( _html instanceof String ) ) return "";
    else if ( _tags.length == 0 ) return _html.replace( /<\s*\/?[^>]+>/g, "" );
    else
    {
        var _re = new RegExp( "<(?!\\s*\\/?(" + _tags.join("|") + ")\\s*\\/?>)[^>]*>", "g" );
        return _html.replace( _re, '' );
    }
}

var _html = "<b>Just</b> some <i>tags</i> and text to test <u>this code</u>" ;
console.log( "This is the original html code including some tags" );
console.log( _html ); // original html code
console.log( "Now we remove all tags (plain text)" );
console.log( strip_tags( _html ) ); // remove all tags
console.log( "Only the bold tag is kept" );
console.log( strip_tags( _html, "b" ) ); // keep <b> only
console.log( "Only the underline tag is kept" );
console.log( strip_tags( _html, "u" ) ); // keep <u> only
console.log( "Only the italic tag is kept" );
console.log( strip_tags( _html, "<i>" ) ); // keep <i> only
console.log( "Keeping both italic and underline" );
console.log( strip_tags( _html, "i", "u" ) ); // keep both <i> and <u>

_html = "this is my <b>string</b> and it's pretty cool<br />isn't it?<br>Yep, it is.<strong>More HTML tags</strong><span></span><bol>" ;
console.log( "Keeping just the bold tag" );
console.log( strip_tags( _html, "b" ) ); // keep just the <b>, not the <br> or <bol>

your point is very good, then the whole approach shall be changed, because there is no way to combine regex boundaries and negative lookahead. Thus I rewrote the whole function and it seems to work fine with the examples proposed. I also extended the removal of tags with attributes embedded.

Basically, this is just one brutal approach: the input text is pre-scanned and all tags are taken first. If there is no match between the input arguments and the initial archive, they will be removed throughout all the input text. I also commented some lines, which could be useful to debugging.

<SCRIPT LANGUAGE="javascript" TYPE="text/javascript">
function strip_tags( _html /*you can put each single tag per argument*/ )
{
    var _tags = [], _tag = "" ;
    for( var _a = 1 ; _a < arguments.length ; _a++ )
    {
        _tag = arguments[_a].replace( /<|>/g, '' ).trim() ;
        if ( arguments[_a].length > 0 ) _tags.push( _tag );
    }
    
    _tags = [...new Set( _tags )]; // unique values

    //console.log( "KEEP THESE TAGS:", _tags.length == 0 ? "delete all" : _tags.join( ", " ) );

    if ( !( typeof _html == "string" ) && !( _html instanceof String ) ) return "" ;
    else if ( _tags.length == 0 ) return _html.replace( /<(\s*\/?)[^>]+>/g, "" ) ;
    else
    {
        //console.log( "in:", _html );
        var _all_tags_prescan = _html.match( /\<[A-Za-z]+/g ), _re;
            _all_tags_prescan = [...new Set( _all_tags_prescan )]; // unique values
        //console.log( "all tags prescan:", _all_tags_prescan );
            //cleaning
        _all_tags_prescan.forEach( function( _tag, _i ){
            _all_tags_prescan[ _i ] = _tag.replace( /[\<\>]/g, '' );
        } );

        //console.log( "all tags prescan (clean):", _all_tags_prescan );
        //console.log( "input tags (clean):", _tags );
            
        for( var _t = 0 ; _t < _all_tags_prescan.length; _t++ )
        {
            //console.log( _all_tags_prescan[_t], _tags.includes( _tags[_t] ) );
            if ( _tags.includes( _all_tags_prescan[_t] ) ) continue;
            _re = new RegExp( "<"+_all_tags_prescan[_t]+"\\s*\\w*\=*\"[\\w\:\;\#]*\">", "g" );
            //console.log( _re, _html.match( _re ) );
            _html = _html.replace( _re, '' );

            if ( _all_tags_prescan[_t][0] != "/" ) _all_tags_prescan[_t] = "\/?" + _all_tags_prescan[_t];
            _re = new RegExp( "<"+_all_tags_prescan[_t]+">", "g" );
            //console.log( _re, _html.match( _re ) );
            _html = _html.replace( _re, '' );
        }

        return _html;
    }
}

var _html = "<b>Just</b> some <i STYLE=\"color:#323232;\">tags</i> and <pre>text</pre> to <b>test</b> <u>this code</u>" ;
console.log( "This is the original html code including some tag" );
console.log( _html ); // original html code
console.log( "GOAL: remove all tags (plain text)" );
console.log( strip_tags( _html ) ); // remove all tags
console.log( "GOAL: only the bold tag is kept" );
console.log( strip_tags( _html, "b" ) ); // keep <b> only
console.log( "GOAL: only the bold and the underline tags are kept" );
console.log( strip_tags( _html, "b", "i" ) ); // keep <b> and <i>
console.log( "GOAL: only the italic tag is kept" );
console.log( strip_tags( _html, "i" ) ); // keep <i>
</SCRIPT>
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top