This will do what you need.
It starts by pulling all the <div class="customer">
elements into array @customers
and extracting the information from there.
I have taken your example of the address label, sorted by the customer number (by which I assume you mean the field with class="customer_id"
). All of the address values are pulled from the array into the hash %customers
, keyed by the customer ID and the name of the element class. The information is then printed in the order of the ID.
use strict;
use warnings;
use HTML::TreeBuilder::XPath;
my $tree = HTML::TreeBuilder::XPath->new_from_file('html.html');
my @customers = $tree->findnodes('/html/body/div[@class="customer"');
my %customers;
for my $cust (@customers) {
my $id = $cust->findvalue('div[@class="customer_id"]');
for my $field (qw/ customer_name customer_addr customer_city customer_state /) {
my $xpath = "div[\@class='$field']";
my $val = $cust->findvalue($xpath);
$customers{$id}{$field} = $val;
}
}
for my $id (sort keys %customers) {
my $info = $customers{$id};
print "Customer ID $id\n";
print $info->{customer_name}, "\n";
print $info->{customer_addr}, "\n";
print $info->{customer_city}, "\n";
print $info->{customer_state}, "\n";
print "\n";
}
output
Customer ID 001
Joe Blough
123 That Road
Smallville
Nebraska