Question

I traverse an html document with SGML and XmlDocument. When I find an XmlNode which its type is Text, I need to change its value that has an xml element. I can't change InnerXml because it's readonly. I tried to change InnerText, but this time tag descriptor chars < and > encoded to &lt; and &gt;. for example:

<p>
    This is a text that will be highlighted.
    <anothertag />
    <......>
</p>

I'm trying to change to:

<p>
    This is a text that will be <span class="highlighted">highlighted</span>.
    <anothertag />
    <......>
</p>

What is the easiest way to modify the value of a text XmlNode?

Was it helpful?

Solution

I have a workaround, I don't know it is a real solution or what, but it can result what I want. Please comment for this code if it is worthy solution or not

    private void traverse(ref XmlNode node)
    {
        XmlNode prevOldElement = null;
        XmlNode prevNewElement = null;
        var element = node.FirstChild;
        do
        {
            if (prevNewElement != null && prevOldElement != null)
            {
                prevOldElement.ParentNode.ReplaceChild(prevNewElement, prevOldElement);
                prevNewElement = null;
                prevOldElement = null;
            }
            if (element.NodeType == XmlNodeType.Text)
            {
                var el = doc.CreateElement("text");
                //Here is manuplation of the InnerXml.
                el.InnerXml = element.Value.Replace(a_search_term, "<b>" + a_search_term + "</b>");
                //I don't replace element right now, because element.NextSibling will be null.
                //So I replace the new element after getting the next sibling.
                prevNewElement = el;
                prevOldElement = element;
            }
            else if (element.HasChildNodes)
                traverse(ref element);
        }
        while ((element = element.NextSibling) != null);
        if (prevNewElement != null && prevOldElement != null)
        {
            prevOldElement.ParentNode.ReplaceChild(prevNewElement, prevOldElement);
        }

    }

Also, I remove <text> and </text> strings after the traverse function:

        doc = new XmlDocument();
        doc.PreserveWhitespace = true;
        doc.XmlResolver = null;
        doc.Load(sgmlReader);
        var html = doc.FirstChild;
        traverse(ref html);
        textBox1.Text = doc.OuterXml.Replace("<text>", String.Empty).Replace("</text>", String.Empty);

OTHER TIPS

using System;
using System.Xml;

public class Sample {

  public static void Main() {
    XmlDocument doc = new XmlDocument();
    doc.LoadXml(
    "<p>" +
    "This is a text that will be highlighted." +
    "<br />" +
    "<img />" +
    "</p>");
    string ImpossibleMark = "_*_";
    XmlNode elem = doc.DocumentElement.FirstChild;
    string thewWord ="highlighted";
    if(elem.NodeType == XmlNodeType.Text){
        string OriginalXml = elem.ParentNode.InnerXml;
        while(OriginalXml.Contains(ImpossibleMark)) ImpossibleMark += ImpossibleMark;
        elem.InnerText = elem.InnerText.Replace(thewWord, ImpossibleMark);
        string replaceString = "<span class=\"highlighted\">" + thewWord + "</span>";
        elem.ParentNode.InnerXml = elem.ParentNode.InnerXml.Replace(ImpossibleMark, replaceString);
    }

    Console.WriteLine(doc.DocumentElement.InnerXml);
  }
}

The InnerText property will give you the text content of all the child nodes of the XmlNode. What you really want to set is the InnerXml property, which will be construed as XML, not as text.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top