Question

I'm attempting to validate xml files in a multi-threaded application and I'm coming across issues (Validation error: The '' element is not declared.) If I run the code with a single thread, or just with one xml file at a time everything works as expected.

I believe it to be the same issue as in this Schema validation error / Thread safety of XmlSchemaSet? question, however I cannot work out how to get this working.

I was initially using an inline schema which was fine but slowed down too much resolving the external file. I decided to cache the schema within a ConcurrentDictionary to speed things up.

Here's the code:

This method belongs to an object local to each thread.

public void validate() 
{
    XmlReaderSettings settings = new XmlReaderSettings();
    settings.ValidationType = ValidationType.Schema;
    //settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessInlineSchema;
    //settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessSchemaLocation;
    settings.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings;
    settings.ValidationEventHandler += 
        new ValidationEventHandler(ValidationCallBack);

    using (var tr = new XmlTextReader(xmlFile))
    {
        tr.MoveToContent();
        var url = tr.GetAttribute("xsi:noNamespaceSchemaLocation");
        var schema = SchemaFactory.GetSchema(url);
        settings.Schemas.Add(schema);
    }

    using (XmlReader reader = XmlReader.Create(xmlFile, settings))
    {
        while (reader.Read());
    }

SchemaFactory definition:

public static class SchemaFactory
{
    static ConcurrentDictionary<string, XmlSchema> schemaStore = 
        new ConcurrentDictionary<string, XmlSchema>();

    public static XmlSchema GetSchema(string url)
    {
        XmlSchema schema = null;
        if (!schemaStore.TryGetValue(url, out schema))
        {
            var schemadata = new System.Net.WebClient().DownloadString(url);
            schema = XmlSchema.Read(new StringReader(schemadata), (sender, args) => {  });
            schemaStore.TryAdd(url, schema);
        }

        return schema;
    }
}

How can I add schema to the cache in real time upon first encounter within an xml file when processing in multiple threads?

Was it helpful?

Solution

XMLSchema ist not threadsafe. Cache the schema as string, then it works:

class Program
{           

    private ConcurrentDictionary<string, string> schemaStore =
            new ConcurrentDictionary<string, string>();

    static void Main(string[] args)
    {

        Program p = new Program();

        for (int i = 0; i < 40;i++ )
            new Thread(new ThreadStart(p.validate)).Start();

        Console.ReadKey();
    }


    public void validate()
    {

        string xmlFile = "XMLFile1.xml";

        XmlReaderSettings settings = new XmlReaderSettings();
        settings.ValidationType = ValidationType.Schema;
        //settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessInlineSchema;
        //settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessSchemaLocation;
        settings.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings;
        settings.ValidationEventHandler +=
            new ValidationEventHandler(ValidationCallBack);



            using (var tr = new XmlTextReader(xmlFile))
            {
                tr.MoveToContent();
                var url = tr.GetAttribute("xsi:noNamespaceSchemaLocation");

                string schemaXml =null;
                if (!schemaStore.TryGetValue(url, out schemaXml))
                {
                    //Console.WriteLine("Need download");

                    using (System.Net.WebClient wc = new System.Net.WebClient())
                    {
                        string schemadata = wc.DownloadString(url);
                        schemaStore.TryAdd(url, schemadata);
                        schemaXml = schemadata;

                    }
                }else
                {
                        //Console.WriteLine("Cache hit");
                }


                XmlSchema schema = XmlSchema.Read(new StringReader(schemaXml), (sender, args) => { });
                settings.Schemas.Add(schema);

            }



        using (XmlReader reader = XmlReader.Create(xmlFile, settings))
        {

            while (reader.Read()) ;
        }


        Console.WriteLine("Thread "+Thread.CurrentThread.ManagedThreadId+" completes");
    }

    private void ValidationCallBack(object sender, ValidationEventArgs args)
    {           
            if (args.Severity == XmlSeverityType.Error)
                Console.WriteLine("Thread " + Thread.CurrentThread.ManagedThreadId + " -> Error: " + args.Message);
            else
                Console.WriteLine("Thread " + Thread.CurrentThread.ManagedThreadId + " -> Warning: " + args.Message);          
    }





}

OTHER TIPS

If it's still true as stated in Schema validation error / Thread safety of XmlSchemaSet? that you can't run multiple parallel schema validations against the same compiled schema, then a workaround might be to use the Saxon schema validator [shameless plug] which does not have this restriction.

http://www.saxonica.com/

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top