The following code does what I want using XMLReader, XMLReader::read()
and XMLReader::nodeType
:
<?php
$refl = new ReflectionClass('XMLReader');
$xml_consts = $refl->getConstants();
$xml = <<<XML
<root>
<element>
<text>A <x>b</x> c <y>d</y> e.</text>
</element>
</root>
XML;
$reader = new XMLReader();
$reader->XML($xml);
// For validation only
$reader->setParserProperty(XMLReader::VALIDATE, true);
if ($reader->isValid()) {
print("No matter what people say, this XML is valid!\n\n");
}
// Prevent warnings about missing DTD
$reader->setParserProperty(XMLReader::VALIDATE, false);
while ($reader->read()) {
$info = ': ';
switch ($reader->nodeType) {
case XMLReader::TEXT:
$info .= "'$reader->value'";
break;
case XMLReader::ELEMENT:
$info .= "<$reader->name>";
break;
case XMLReader::END_ELEMENT:
$info .= "</$reader->name>";
break;
default:
$info = '';
}
print(array_search($reader->nodeType, $xml_consts) . $info . PHP_EOL);
}
?>
It outputs:
No matter what people say, this XML is valid!
ELEMENT: <root>
SIGNIFICANT_WHITESPACE
ELEMENT: <element>
SIGNIFICANT_WHITESPACE
ELEMENT: <text>
TEXT: 'A '
ELEMENT: <x>
TEXT: 'b'
END_ELEMENT: </x>
TEXT: ' c '
ELEMENT: <y>
TEXT: 'd'
END_ELEMENT: </y>
TEXT: ' e.'
END_ELEMENT: </text>
SIGNIFICANT_WHITESPACE
END_ELEMENT: </element>
SIGNIFICANT_WHITESPACE
END_ELEMENT: </root>