Question

I have a legacy application that I inherited that passes a lot of XML around as Strings.

I often need the ability to check if a String will be valid XML. What is the fastest and least expensive way to check if a string is valid XML in .NET?

I'm working in .NET 3.5 and would most likely use this as an extension method (off of string) in this one project within the solution.

UPDATE:
What I mean by "valid" in my case is properly formed XML. I don't need to validate resources or schema.

Was it helpful?

Solution

It's not possible to validate the well-formedness of an XML string without parsing it. And a quick benchmark shows that the fastest way to parse a string to see if it's valid (actually the fastest way to parse the particular string I using as a test case)is with an XmlReader:

    static void Main(string[] args)
    {
        const int iterations = 20000;
        const string xml = @"<foo><bar><baz a='b' c='d'/><foo><bar><baz a='b' c='d'/></bar><bar/></foo><foo><bar><baz a='b' c='d'/></bar><bar/></foo><foo><bar><baz a='b' c='d'/></bar><bar/></foo><foo><bar><baz a='b' c='d'/></bar><bar/></foo><foo><bar><baz a='b' c='d'/></bar><bar/></foo><foo><bar><baz a='b' c='d'/></bar><bar/></foo><foo><bar><baz a='b' c='d'/></bar><bar/></foo><foo><bar><baz a='b' c='d'/></bar><bar/></foo><foo><bar><baz a='b' c='d'/></bar><bar/></foo></bar><bar/></foo>";

        Stopwatch st = new Stopwatch();

        st.Start();
        for (int i=0; i<iterations; i++)
        {
            using (StringReader sr = new StringReader(xml))
            using (XmlReader xr = XmlReader.Create(sr))
            {
                while (xr.Read())
                {
                }
            }
        }
        st.Stop();
        Console.WriteLine(String.Format("XmlReader: {0} ms.", st.ElapsedMilliseconds));

        st.Reset();
        st.Start();
        for (int i=0; i<iterations; i++)
        {
            XElement.Parse(xml);
        }
        st.Stop();
        Console.WriteLine(String.Format("XElement: {0} ms.", st.ElapsedMilliseconds));

        st.Reset();
        st.Start();
        for (int i = 0; i < iterations; i++)
        {
            XmlDocument d= new XmlDocument();
            d.LoadXml(xml);
        }
        st.Stop();
        Console.WriteLine(String.Format("XmlDocument: {0} ms.", st.ElapsedMilliseconds));

        st.Reset();
        st.Start();
        for (int i = 0; i < iterations; i++)
        {
            using (StringReader sr = new StringReader(xml))
            {
                XPathDocument d = new XPathDocument(new StringReader(xml));                    
            }
        }
        st.Stop();
        Console.WriteLine(String.Format("XPathDocument: {0} ms.", st.ElapsedMilliseconds));

        Console.ReadKey();
    }

On my machine XmlReader is nearly twice as fast as any of the alternatives. This makes sense. Though I haven't used Reflector to check, I'd be very surprised if XmlDocument, XDocument, and XPathDocument weren't all using XmlReader under the hood.

OTHER TIPS

I'm not aware of a built-in facility in .NET to validate the formed-ness (?) of XML without parsing it. Given that, something like this should work:

public static class XmlUtilities
{
    public static bool IsXml(this string data)
    {
        if (string.IsNullOrEmpty(data)) return false;

        try
        {
            System.Xml.XmlDocument doc = new System.Xml.XmlDocument();

            doc.LoadXml(data);

            return true;            
        }
        catch
        {
            return false;
        }
    }
}

Agree with Adam, and the XElement version :

public static class XmlUtilities
{

    public static bool IsXml(this string data)
    {
        if (string.IsNullOrEmpty(data)) return false;

        try
        {
            var doc = XElement.Parse(data)

            return true;            
        }
        catch (XmlException)
        {
            return false;
        }
    }
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top