Javascript remove \ n or \ t in html content except prefix

In Javascript, how to remove line break (\ n or \ t) in html line excluding tags <pre>

.

I am using this code to delete a line:

htmlString.replace(/[\n\t]+/g,"");

      

However, it also removes the \ n \ t in the tag <pre>

. How to fix it?

+3


source to share


2 answers


You can start over by matching the text to be cleared, which can only be:

  • The text from the beginning of the line to the next start tag <pre>

    .
  • The text from the end tag </pre>

    to the next tag <pre>

    .
  • The text from the </pre>

    end tag to the end of the line.
  • Text from beginning of line to end of line (no items pre

    per line).

which can be described in regex as:

(?:^|<\/pre>)[^]*?(?:<pre>|$)/g

      

where [^]

matches anything, including newlines, and *?

is a non-greedy quantifier to match as few times as possible.


We then get the matched text to be cleaned up, so we clean it up with a regular expression /[\n\t]+/g

.




Example:

var htmlString = "<body>\n\t<p>\n\t\tLorem\tEpsum\n\t</p>\n\t<pre>\n\t\tHello, World!\n\t</pre>\n\n\t<pre>\n\t\tThis\n\t\tis\n\t\tawesome\n\t</pre>\n\n\n</body>";

var preview = document.getElementById("preview");
preview.textContent = htmlString;

document.getElementById("remove").onclick = function() {
    preview.textContent = htmlString.replace(/(?:^|<\/pre>)[^]*?(?:<pre>|$)/g, function(m) {
        return m.replace(/[\n\t]+/g, "");
    });
}
      

pre {
    background: #fffbec;
}
      

<button id="remove">Remove</button>
The pre bellow is just used to show the string, it is not THE PRE.
<pre id="preview"></pre>
      

Run code



Example Regex101 .

+1


source


You can use TreeWalker to select all text nodes and apply your regex to those nodes only:



//
// closest Polyfill from https://developer.mozilla.org/en-US/docs/Web/API/Element/closest
//
if (window.Element && !Element.prototype.closest) {
  Element.prototype.closest = function (s) {
      var matches = (this.document || this.ownerDocument).querySelectorAll(s), i, el = this;
      do {
          i = matches.length;
          while (--i >= 0 && matches.item(i) !== el) {
          };
      } while ((i < 0) && (el = el.parentElement));
      return el;
  };
}


document.getElementById("remove").addEventListener('click',  function(e) {
    //
    // traverse the DOM
    //
    var walker = document.createTreeWalker(
            document.body,
            NodeFilter.SHOW_TEXT,
            null,
            false
    );

    var node;
    while (node = walker.nextNode()) {
        if (node.parentElement.closest('PRE') != null) {
            node.textContent = node.textContent.replace(/[\n\t]+/g, "");
        }
    }
});
      

pre {
    background: #fffbec;
}
      

<button id="remove">Remove</button><br>



<pre>
    this is a pre tag

    with    tab
</pre>

<pre class="language-cpp">
  <code>
    void main() {
      printf("Hello");
    }
  </code>
</pre>

<p>
    first word

    new       end</p>
      

Run code


+2


source







All Articles