Question

Could someone help me out.

I trying to get info from some page that the html looks like this.

<div class="block">
  <h2>Season 1</h2>
  <div class="episode"><a href="somelink.com">Episode 1</a></div>
  <div class="episode"><a href="somelink.com">Episode 2</a></div>
  <h2>Season 2</h2>
  <div class="episode"><a href="somelink.com">Episode 1</a></div>
</div>

But what i am stuck on is for every Season i want to wrap them in divs with the seasons episode inside the div for example

<div class="block">
    <div class="season">
      <h2>Season 1</h2>
      <div class="episode"><a href="somelink.com">Episode 1</a></div>
      <div class="episode"><a href="somelink.com">Episode 2</a></div>
    </div>
    <div class="season">
      <h2>Season 2</h2>
      <div class="episode"><a href="somelink.com">Episode 1</a></div>
    </div>
</div>

And the PHP code i'm working with

$page = "someurl.com";

$page = $this->curl->get($page);
$dom = new DOMDocument();
@$dom->loadHTML($page);

$divs = $dom->getElementsByTagName('div');
for($i=0;$i<$divs->length;$i++){
  if ($divs->item($i)->getAttribute("class")=="block") {
    $h2s = $divs->item($i)->getElementsByTagName('h2');
    if (count($h2s) > 0) {
      foreach ($h2s as $h2) {
      // Stuck at this point
      }
    }
  }
}

How can i do this in PHP DOM could someone please give me a example thanks.

Was it helpful?

Solution

The code below wraps each <h2> and its .episode siblings in .season container

   $page = '<div class="block">
      <h2>Season 1</h2>
      <div class="episode"><a href="s1ep1.com">Episode 1</a></div>
      <div class="episode"><a href="s1ep2.com">Episode 2</a></div>
      <h2>Season 2</h2>
      <div class="episode"><a href="s2ep1.com">Episode 1</a></div>
      <div class="episode"><a href="s2ep1.com">Episode 2</a></div>
    </div>';

  $dom = new DOMDocument();

  $origVal = libxml_use_internal_errors(true);
  @$dom->loadHTML($page);
  libxml_clear_errors();
  libxml_use_internal_errors($origVal);

  //create a tmeplate 'season' div
  $season = $dom->createElement('div');
  $season->setAttribute('class', 'season');

  //get all '.block' divs using xpath
  $xpath = new DOMXPath($dom);
  $divs = $xpath->query("//*[@class='block']");

  $clones = array();
  $clone = '';

  foreach($divs as $currDiv) {

     //check if the 'block' contains any <h2> elemnts, if not, skip this block
     if(!count($currDiv->getElementsByTagName('h2'))) {
        continue;
     }

     foreach($currDiv->childNodes as $child) {

        if(in_array($child->nodeName, array(
                                           '#text',
                                           '#comment'
                                      ))
        ) {
           //ignore white space (and text content), and comments in 'block' div
           continue;
        }

        if($child->nodeName == 'h2') {
           if($clone) {
              //save all clones of 'season' template div in an array for further use
              $clones[] = $clone;
           }

           $clone = $season->cloneNode(true);
        }

        //this is the tricky part. If we do not append a clone of original div, then it actually moves the div to $clone. This changes HTML structure and disrupts the current loop
        //so we append the clones of child to the 'season' div
        if($child->nodeName == 'h2' || $child->getAttribute('class') == 'episode') {
           $clone->appendChild($child->cloneNode(true));
        }
     }
     $clones[] = $clone;

     //remove all children of current 'block' div
     while($currDiv->childNodes->length) {
        $currDiv->removeChild($currDiv->firstChild);
     }

     //isnert all 'season' nodes in it
     foreach($clones as $c) {
        $currDiv->appendChild($c);
     }
  }

  echo $dom->saveHTML();
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top