the XPath
attribute of the HtmlAgilityPack.HtmlNode
shows the nodes on the path from root to the node. For example \div[1]\div[2]\table[0]
. You can traverse this path in the live document to find the corresponding live element. However this path may not be precise as HtmlAgilityPack removes some tags like <form>
then before using this solution add the omitted tags back using
HtmlNode.ElementsFlags.Remove("form");
struct DocNode
{
public string Name;
public int Pos;
}
///// structure to hold the name and position of each node in the path
The following method finds the live element according to the XPath
static public HtmlElement GetLiveElement(HtmlNode node, HtmlDocument doc)
{
var pattern = @"/(.*?)\[(.*?)\]"; // like div[1]
// Parse the XPath to extract the nodes on the path
var matches = Regex.Matches(node.XPath, pattern);
List<DocNode> PathToNode = new List<DocNode>();
foreach (Match m in matches) // Make a path of nodes
{
DocNode n = new DocNode();
n.Name = n.Name = m.Groups[1].Value;
n.Pos = Convert.ToInt32(m.Groups[2].Value)-1;
PathToNode.Add(n); // add the node to path
}
HtmlElement elem = null; //Traverse to the element using the path
if (PathToNode.Count > 0)
{
elem = doc.Body; //begin from the body
foreach (DocNode n in PathToNode)
{
//Find the corresponding child by its name and position
elem = GetChild(elem, n);
}
}
return elem;
}
the code for GetChild Method used above
public static HtmlElement GetChild(HtmlElement el, DocNode node)
{
// Find corresponding child of the elemnt
// based on the name and position of the node
int childPos = 0;
foreach (HtmlElement child in el.Children)
{
if (child.TagName.Equals(node.Name,
StringComparison.OrdinalIgnoreCase))
{
if (childPos == node.Pos)
{
return child;
}
childPos++;
}
}
return null;
}