Using DOMDocument
, this is possible. If you need to search for every possible valid heading tag: h1, h2, h3, h4, h5, h6
, then this could all be done in one loop. After finding a heading tag, we will use that node as the root node to start searching for the other required tags.
$dom = new DOMDocument('');
// prevents PHP from warning us that header, footer are invalid tags.
@$dom->loadHTMLFile($url);
$links = array();
$images = array();
for($i = 1; $i <= 6; $i++) {
$heading_level = (string)$i;
$heading = 'h' . $heading_level;
foreach($dom->getElementsByTagName($heading) as $h) {
foreach($h->getElementsByTagName('a') as $link) {
array_push($links, array(
"href" => $link->getAttribute('href'),
"innerHTML" => $link->nodeValue
));
}
foreach($h->getElementsByTagName('img') as $img) {
array_push($images, array(
"src" => $img->getAttribute('src')
));
}
}
}