Here is a DOM way:
$results = array();
$fields = array('name', 'img', 'url', 'blurb');
$queries = array('name' => '//img/@alt',
'img' => '//img[@class = "picture"]/@style |
//img/@src |
//div[@class = "picture"]/@style',
'url' => '//div[@class = "blurb"]//a/@href',
'blurb' => '//div[@class = "blurb"]');
$imgPattern = <<<'EOD'
~
(?|
.*? background-image:url\( [^)]*? ([^?="\')/]+ \.(?:png|jpe?g|gif) ).*
|
.*? ([^=;/]+)$
)
~ix
EOD;
foreach ($data as $html) {
$srcDom = new DOMDocument();
@$srcDom->loadHTML($html);
$elts = $srcDom->getElementsbyTagName("body")->item(0)->childNodes;
$tmp['other'] = '';
foreach ($elts as $elt) {
if ( $elt->nodeType === XML_ELEMENT_NODE &&
$elt->hasAttribute('class') &&
$elt->getAttribute('class') == 'bottom-block' )
$bbnode = $elt;
else
$tmp['other'] .= $srcDom->saveHTML($elt);
}
echo htmlspecialchars(print_r($other, true));
if ( $bbnode ):
$bbDom = new DOMDocument();
$bbDom->appendChild($bbDom->importNode($bbnode, true));
$xpath = new DOMXPath($bbDom);
foreach($fields as $field) {
$$field = $xpath->query($queries[$field]);
if ( $field == 'blurb' ):
$tmp[$field] = '';
foreach ($$field->item(0)->childNodes as $child) {
$tmp[$field] .= $bbDom->saveHTML($child);
}
else:
$tmp[$field] = ($$field->length) ? $$field->item(0)->nodeValue : '';
endif;
}
$tmp['img'] = preg_replace($imgPattern, '$1', $tmp['img']);
endif;
$results[] = $tmp;
}
echo htmlspecialchars(print_r($results, true));