Having looked into XML::Twig
I'm not so sure it's the correct tool. It's surprising how awkward such a simple task can be.
This is a working program that uses HTML::TreeBuilder
. Unfortunately it doesn't produce formatted output so I've added some whitespace myself.
use strict;
use warnings;
use HTML::TreeBuilder;
my $html = HTML::TreeBuilder->new_from_content(<<__HTML__);
<div>
<p>Boring Text:</p>
<p>
Highlight Cool whenever we see it.
but not <a href="/Cool.html">here</a>.
<code>
sub Cool {
print "Foo\n";
}
</code>
And here is more Cool.
</p>
</div>
__HTML__
$html->objectify_text;
for my $text_node ($html->look_down(_tag => '~text')) {
my $text = $text_node->attr('text');
if (my @replacement = process_text($text)) {
my $old_node = $text_node->replace_with(@replacement);
$old_node->delete;
}
}
$html->deobjectify_text;
print $html->guts->as_XML;
sub process_text {
my @nodes = split /\bCool\b/, shift;
return unless @nodes > 1;
my $span = HTML::Element->new('span', class => 'fun');
$span->push_content('Cool');
for (my $i = 1; $i < @nodes; $i += 2) {
splice @nodes, $i, 0, $span->clone;
}
$span->delete;
@nodes;
}
output
<div>
<p>Boring Text:</p>
<p>
Highlight <span class="fun">Cool</span> whenever we see it.
but not <a href="/Cool.html">here</a>.
<code> sub <span class="fun">Cool</span> { print "Foo "; } </code>
And here is more <span class="fun">Cool</span>.
</p>
</div>