and what do
<star/>
and<star index="2"/>
mean in XML ?
According to the XML spec section 3.1 , the grammar rule [44] describes "Tags for Empty Elements"
which means, the element might have some attributes, but it has no content(in other words, no descendants, no text).
UPDATE
After reading more comments from OP and after some new updates on the question, here's one possible solution:
test.pl
#!/usr/bin/env perl
package Bot::Find::Answer;
use strict;
use warnings;
use XML::LibXML;
use Data::Dumper;
use List::Util qw/first/;
#### Constructor
#### Get path to XML with question/answer data.
#### Calls init to process data.
#### Returns new instance of object Bot::Find::Answer
sub new {
my ($class,$xml_path) = @_;
my $obj = bless {
#### Path on disk to XML
xml_path => $xml_path,
#### Knowlege Base
kb => [],
}, $class;
$obj->init();
return $obj;
};
#### Parse XML
#### Get stars in question and replace them with regex capture groups
#### Get all answers for each question and store them.
#### Store everything in $self->{kb}
sub init {
my ($self) = @_;
my $kb = $self->{kb};
my $xml = XML::LibXML->load_xml(
location => $self->{xml_path}
);
for my $cat ($xml->findnodes('//category')) {
my $question_pattern = ($cat->findnodes('pattern'))[0]->textContent;
$question_pattern =~ s/\*/(.*)/g;
my @answers =
map { $_->textContent }
$cat->findnodes('template/random/li');
push @$kb, {
p => $question_pattern,
a => \@answers
};
};
};
#### Get first category for which the question matches the associated pattern
#### Pick a random answer
#### Fill random answer with captures from pattern.
#### Return answer
sub compute_answer {
my ($self,$q) = @_;
my $kb = $self->{kb};
my $cat_found = first { $q =~ /$_->{p}/ } @$kb;
my $idx = int(rand(@{ $cat_found->{a}}));
my $picked_answer = $cat_found->{a}->[$idx];
my (@captures) = $q =~ $cat_found->{p};
for my $i (0..(-1+@captures)) {
my $j = $i + 1;
my $capture_val = $captures[$i];
$picked_answer =~ s/\[capture$j\]/$capture_val/g;
};
return $picked_answer;
}
package main;
my $o = Bot::Find::Answer->new('sample.xml');
print $o->compute_answer("you know michael jordan");
sample.xml:
<?xml version="1.0" encoding="iso-8859-1"?>
<data>
<category>
<pattern>you know *</pattern>
<template>
<random>
<li>No, who is [capture1]?</li>
<li>who is [capture1]?</li>
<li>i don't know.</li>
</random>
</template>
</category>
<category>
<pattern>name a country from south america</pattern>
<template>
<random>
<li>ecuador</li>
<li>uruguay</li>
<li>chile</li>
<li>panama</li>
<li>brazil</li>
</random>
</template>
</category>
</data>