Overlapping regex matches with replace method

563 Views Asked by At

Regex newbie here! Lets say i have two arrays of text and I am searching through a text which says.

let text = This is a cool story.
I am looking for these phrases in the text.

ArrBlue = ["cool story"] 
ArrGreen = ["This is a cool"] 

And i want to highlight words in the array with the corresponding color. So all the words in ArrBlue will result in text that is blue and ArrGreen in green. I created two new RegExp like so..

let regexBlue = new RegExp(arrBlue.join('|'), "ig)  
let regexGreen = new RegExp(arrGreen.join('|'), "ig)

and then i use these new variables to then replace text like so attaching a span tag to the beginning and end of the matched expression.

let newText = text.replace(regexBlue, "<span class='blue'>$&</span>")    
.replace(regexGreen, "<span class='green'>$&</span>")

The issue that i am having is I want my html to look like so..

<span class="blue">This is a<span class="green" cool story </span> </span>

But in actuality what im getting is

This is a <span class="green">cool story</span>

Heres my quick snippet to better understand my situation.

let greenListArray = ["cool story"];
let blueListArray = ['This is a cool'];

$("#myform").submit(function(event){
   event.preventDefault();
   $('#results').html('');
   let text = $('textarea#textEntered').val();
   highlightText(text); 
});

function highlightText(text){

let regexGreen = new RegExp(greenListArray.join('[,!?.]?|'), "ig");
let regexBlue = new RegExp(blueListArray.join('[,!?.]?|') + "ig");

let newText = text.replace(regexGreen, "<span class='green'>$&</span>")
.replace(regexBlue, "<span class='blue'>$&</span>");

$('#results').html(newText);
}
.blue{
  background-color: red;
  font: red;
}
.green{
  background-color: green;
}


.greyHighlight:hover{
  background-color: grey;
  color: white;
  
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
 <form id="myform">
      <fieldset>
        <textarea name='textEntered' id='textEntered' />This is a cool story.</textarea>
        <button type='submit' class="bttn">Enter</button>
      </fieldset>
    </form>
   
    <div class="results-container"> <span class="results-title">Highlighted Text:</span> <div id="results"> </div> <br> </div>


<div class="wantedResults">
Results I wish to have <br>
<span class="blue">This is a</span><span class="green">cool story</span> 
</div>

3

There are 3 best solutions below

1
Vignesh Raja On BEST ANSWER

This works for basic implementation. Overlapping more than 2 times will not work properly. And also need to handle white spaces.

var arr = [
  {str:"cool story",
   color: "green"},
   {str:"This is a cool",
   color: "blue"},
   {str:"two best friends",
   color: "red"},
   {str:"about two best",
   color: "yellow"},
];

$("#myform").submit(function(event){
   event.preventDefault();
   $('#results').html('');
   let text = $('textarea#textEntered').val();
   highlightText(text); 
});

function highlightText(text)
{
    for(var index=0;index<arr.length;index++)
    {
        var matches=text.match(arr[index].str.split(" ").join("\\s*(<.*>)*\\s*"));
        if(matches)
        {
            for(var i=1;i<matches.length;i++)
            {
                if(!matches[i]) continue;
                if(matches[i].indexOf("/")==-1)
                {
                    text = text.replace(matches[0],matches[0].replace(matches[i],"")+matches[i]);
                }
                else
                {
                
                    text = text.replace(matches[0],matches[i]+matches[0].replace(matches[i],""));
                }
            }
        }
        text = text.replace(arr[index].str, "<span class='"+arr[index].color+"'>$&</span>");
    }
    $('#results').append($(text))
}
.blue{
  background-color: blue;
}
.green{
  background-color: green;
}
.red{
  background-color: red;
}
.yellow{
  background-color: yellow;
}
.grey{
  background-color: grey;
}

.greyHighlight:hover{
  background-color: grey;
  color: white;
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
  <form id="myform">
      <fieldset>
        <textarea name='textEntered' id='textEntered'>This is a cool story about two best friends.</textarea>
        <button type='submit' class="bttn">Enter</button>
      </fieldset>
  </form>
   
    <div class="results-container"> <span class="results-title">Highlighted Text:</span> <div id="results"> </div> <br> </div>


<div class="wantedResults">
Results I wish to have <br>
<span class="blue">This is a</span><span class="green">cool story</span> 
</div>

0
Anony Mous On

I think this may accomplish what you are wanting. It isn't ideal though, as it won't work if there are multiple layers of <span> or </span> but it does produce the correct result in your example. Hopefully this will help in some way :)

let greenListArray = ["cool story"];
let blueListArray = ['This is a cool'];

$("#myform").submit(function(event){
   event.preventDefault();
   $('#results').html('');
   let text = $('textarea#textEntered').val();
   highlightText(text); 
});

function highlightText(text){

    // this replaces all spaces with regex that will allow the capturing of html tags '<>'
    // note that I am only doing this for the first element in the array, this would need to loop over the elements if so desired...
    greenListArray[0] = greenListArray[0].replace(new RegExp(' ', 'g'), '( |<.*>| <.*>|<.*> )');
    blueListArray[0] = blueListArray[0].replace(new RegExp(' ', 'g'), '( |<.*>| <.*>|<.*> )');
    
    let regexGreen = new RegExp(greenListArray.join('[,!?.]?|'), "ig");
    let regexBlue = new RegExp(blueListArray.join('[,!?.]?|'), "ig");
    
    
    text = replaceStuffs(text, regexGreen, 'green');
    text = replaceStuffs(text, regexBlue, 'blue');
    $('#results').html(text);
}

function replaceStuffs(text, regex, classToAdd) {
 let matchRegex = regex.exec(text);
 let needToAppend = true;
 
 matchRegex = matchRegex[0]; // grab the matching string... as it seems to always be the first element in the array returned by .exec...
 
 let replacement = matchRegex;
 
 if (matchRegex.indexOf('<span') !== -1 && matchRegex.indexOf('</span>') !== -1) // there is a beginning and ending span tag... we leave it alone
 {
 }
 else if (matchRegex.indexOf('<span') !== -1 && matchRegex.indexOf('</span>') === -1) // there's a beginning tag. we need to find the ending of that tag, and then append our ending </span>
 {
  let regexTemp = new RegExp('<span.*', "ig"); // lets select as much of this beginning tag as we can, so it'll hopefully be unique.
  let str = regexTemp.exec(matchRegex);
  // debugger; <-- this pauses Chrome's execution of JS. useful for testing...
  str = str[0];
  
  regexTemp = new RegExp(str+'.*</span>', "ig"); // use the string we found, look for the closing </span>
  text = text.replace(regexTemp, '$&</span>'); // we have now put our closing span after the closing </span> that we found.
  needToAppend = false; // set a variable so we don't append the </span> again later...
 }
 else if (matchRegex.indexOf('</span>') !== -1) // closing span, no beginning tag... we need to move it...
 {
  replacement = matchRegex.replace('</span>','');
  replacement += '</span>';
 }
 
 replacement = '<span class="'+classToAdd+'">'+replacement+(needToAppend ? '</span>' : '');
 
 text = text.replace(matchRegex, replacement);
 return text;
}
.blue{
  background-color: red;
  font: red;
}
.green{
  background-color: green;
}


.greyHighlight:hover{
  background-color: grey;
  color: white;
  
}
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
 <form id="myform">
      <fieldset>
        <textarea name='textEntered' id='textEntered' />This is a cool story.</textarea>
        <button type='submit' class="bttn">Enter</button>
      </fieldset>
    </form>
   
    <div class="results-container"> <span class="results-title">Highlighted Text:</span> <div id="results"> </div> <br> </div>


<div class="wantedResults">
Results I wish to have <br>
<span class="blue">This is a</span><span class="green">cool story</span> 
</div>

0
Robert Cotterman On

it could have something to do with "this is a cool" and "cool story" overlapping? you might be over writing one search with another, especially since you do the latter... maybe include </span> as part of your search, as in

this is a cool(</span>)? or cool(</span>)? story