The code below wraps each <h2>
and its .episode
siblings in .season
container
$page = '<div class="block">
<h2>Season 1</h2>
<div class="episode"><a href="s1ep1.com">Episode 1</a></div>
<div class="episode"><a href="s1ep2.com">Episode 2</a></div>
<h2>Season 2</h2>
<div class="episode"><a href="s2ep1.com">Episode 1</a></div>
<div class="episode"><a href="s2ep1.com">Episode 2</a></div>
</div>';
$dom = new DOMDocument();
$origVal = libxml_use_internal_errors(true);
@$dom->loadHTML($page);
libxml_clear_errors();
libxml_use_internal_errors($origVal);
//create a tmeplate 'season' div
$season = $dom->createElement('div');
$season->setAttribute('class', 'season');
//get all '.block' divs using xpath
$xpath = new DOMXPath($dom);
$divs = $xpath->query("//*[@class='block']");
$clones = array();
$clone = '';
foreach($divs as $currDiv) {
//check if the 'block' contains any <h2> elemnts, if not, skip this block
if(!count($currDiv->getElementsByTagName('h2'))) {
continue;
}
foreach($currDiv->childNodes as $child) {
if(in_array($child->nodeName, array(
'#text',
'#comment'
))
) {
//ignore white space (and text content), and comments in 'block' div
continue;
}
if($child->nodeName == 'h2') {
if($clone) {
//save all clones of 'season' template div in an array for further use
$clones[] = $clone;
}
$clone = $season->cloneNode(true);
}
//this is the tricky part. If we do not append a clone of original div, then it actually moves the div to $clone. This changes HTML structure and disrupts the current loop
//so we append the clones of child to the 'season' div
if($child->nodeName == 'h2' || $child->getAttribute('class') == 'episode') {
$clone->appendChild($child->cloneNode(true));
}
}
$clones[] = $clone;
//remove all children of current 'block' div
while($currDiv->childNodes->length) {
$currDiv->removeChild($currDiv->firstChild);
}
//isnert all 'season' nodes in it
foreach($clones as $c) {
$currDiv->appendChild($c);
}
}
echo $dom->saveHTML();