Domanda

Consider the following XML which I have to parse.

<root>
  <item>
    <itemId>001</itemId>
    <itemName>test 1</itemName>
    <description/>
  </item>
</root>

I have to parse each of its tag and store it into a table as follows:

TAG_NAME        TAG_VALUE         IsContainer
------------    --------------    -----------
root            null              true
item            null              true
itemId          001               false
itemName        test 1            false
description     null              false
/item           null              true
/root           null              true

Now to get this done, I am using XmlReader as this allows us to parse each & every node.

I am doing it as follows:

I created the following class to contain each tag's data

public class XmlTag
{
  public string XML_TAG { get; set; }      
  public string XML_VALUE { get; set; }      
  public bool IsContainer { get; set; }
}

I am trying to get the list of tags(including closing ones) as follows:

    private static List<XmlTag> ParseXml(string path)
    {
        var tags = new List<XmlTag>();

        using (var reader = XmlReader.Create(path))
        {
            while (reader.Read())
            {
                var tag = new XmlTag();
                bool shouldAdd = false;
                switch (reader.NodeType)
                {
                    case XmlNodeType.Element:
                        shouldAdd = true;
                        tag.XML_TAG = reader.Name;

                        //How do I get the VALUE of current reader?
                        //How do I determine if the current node contains children nodes to set IsContainer property of XmlTag object?
                        break;
                    case XmlNodeType.EndElement:
                        shouldAdd = true;
                        tag.XML_TAG = string.Format("/{0}", reader.Name);
                        tag.XML_VALUE = null;
                        //How do I determine if the current closing node belongs to a node which had children.. like ROOT or ITEM in above example?
                        break;
                }

                if(shouldAdd)
                    tags.Add(tag);
            }
        }

        return tags;
    }

but I am having difficulty determining the following:

  1. How to determine if current ELEMENT contains children XML nodes? To set IsContainer property.
  2. How to get the value of current node value if it is of type XmlNodeType.Element

Edit:

I have tried to use LINQ to XML as follows:

var xdoc = XDocument.Load(@"SampleItem.xml");

var tags = (from t in xdoc.Descendants()
            select new XmlTag
            {
                XML_TAG = t.Name.ToString(),
                ML_VALUE = t.HasElements ? null : t.Value,
                IsContainer = t.HasElements
            }).ToList();

This gives me the XML tags and their values but this does not give me ALL the tags including the closing ones. That's why I decided to try XmlReader. But If I have missed anything in LINQ to XML example, please correct me.

È stato utile?

Soluzione

First of all, as noted by Jon Skeet in the comments you should probably consider using other tools, like XmlDocument possibly with LINQ to XML (EDIT: an example with XmlDocument follows).

Having said that, here is the simplest solution for what you have currently (note that it's not the cleanest possible code, and it doesn't have much validation):

private static List<XmlTag> ParseElement(XmlReader reader, XmlTag element)
{
    var result = new List<XmlTag>() { element };
    while (reader.Read())
    {
        switch (reader.NodeType)
        {
            case XmlNodeType.Element:
                element.IsContainer = true;
                var newTag = new XmlTag() { XML_TAG = reader.Name };
                if (reader.IsEmptyElement)
                {
                    result.Add(newTag);
                }
                else
                {
                    result.AddRange(ParseElement(reader, newTag));
                }
                break;
            case XmlNodeType.Text:
                element.XML_VALUE = reader.Value;
                break;
            case XmlNodeType.EndElement:
                if (reader.Name == element.XML_TAG)
                {
                    result.Add(new XmlTag()
                        {
                            XML_TAG = string.Format("/{0}", reader.Name),
                            IsContainer = element.IsContainer
                        });
                }

                return result;
        }
    }

    return result;
}

private static List<XmlTag> ParseXml(string path)
{
    var result = new List<XmlTag>();

    using (var reader = XmlReader.Create(path))
    {
        while (reader.Read())
        {
            if (reader.NodeType == XmlNodeType.Element)
            {
                result.AddRange(ParseElement(
                    reader,
                    new XmlTag() { XML_TAG = reader.Name }));
            }
            else if (reader.NodeType == XmlNodeType.EndElement)
            {
                result.Add(new XmlTag() 
                    { 
                        XML_TAG = string.Format("/{0}",current.Name)
                    });
            }
        }
    }

    return result;
}

An example using XmlDocument. This will give slightly different result for self-enclosing tags (<description/> in your case). You can change this behaviour easily, depending on what you want.

private static IEnumerable<XmlTag> ProcessElement(XElement current)
{
    if (current.HasElements)
    {
        yield return new XmlTag() 
            { 
                XML_TAG = current.Name.ToString(),
                IsContainer = true
            };

        foreach (var tag in current
            .Elements()
            .SelectMany(e => ProcessElement(e)))
        {
            yield return tag;
        }

        yield return new XmlTag() 
            { 
                XML_TAG = string.Format("/{0}", current.Name.ToString()),
                IsContainer = true
            };
    }
    else
    {
        yield return new XmlTag()
            { 
                XML_TAG = current.Name.ToString(), 
                XML_VALUE = current.Value
            };

        yield return new XmlTag()
            {
                XML_TAG = string.Format("/{0}",current.Name.ToString())
            };
    }
}

And using it:

var xdoc = XDocument.Load(@"test.xml");
var tags = ProcessElement(xdoc.Root).ToList();    
Autorizzato sotto: CC-BY-SA insieme a attribuzione
Non affiliato a StackOverflow
scroll top