Question

I'm attempting to validate xml files in a multi-threaded application and I'm coming across issues (Validation error: The '' element is not declared.) If I run the code with a single thread, or just with one xml file at a time everything works as expected.

I believe it to be the same issue as in this Schema validation error / Thread safety of XmlSchemaSet? question, however I cannot work out how to get this working.

I was initially using an inline schema which was fine but slowed down too much resolving the external file. I decided to cache the schema within a ConcurrentDictionary to speed things up.

Here's the code:

This method belongs to an object local to each thread.

public void validate() 
{
    XmlReaderSettings settings = new XmlReaderSettings();
    settings.ValidationType = ValidationType.Schema;
    //settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessInlineSchema;
    //settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessSchemaLocation;
    settings.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings;
    settings.ValidationEventHandler += 
        new ValidationEventHandler(ValidationCallBack);

    using (var tr = new XmlTextReader(xmlFile))
    {
        tr.MoveToContent();
        var url = tr.GetAttribute("xsi:noNamespaceSchemaLocation");
        var schema = SchemaFactory.GetSchema(url);
        settings.Schemas.Add(schema);
    }

    using (XmlReader reader = XmlReader.Create(xmlFile, settings))
    {
        while (reader.Read());
    }

SchemaFactory definition:

public static class SchemaFactory
{
    static ConcurrentDictionary<string, XmlSchema> schemaStore = 
        new ConcurrentDictionary<string, XmlSchema>();

    public static XmlSchema GetSchema(string url)
    {
        XmlSchema schema = null;
        if (!schemaStore.TryGetValue(url, out schema))
        {
            var schemadata = new System.Net.WebClient().DownloadString(url);
            schema = XmlSchema.Read(new StringReader(schemadata), (sender, args) => {  });
            schemaStore.TryAdd(url, schema);
        }

        return schema;
    }
}

How can I add schema to the cache in real time upon first encounter within an xml file when processing in multiple threads?

Était-ce utile?

La solution

XMLSchema ist not threadsafe. Cache the schema as string, then it works:

class Program
{           

    private ConcurrentDictionary<string, string> schemaStore =
            new ConcurrentDictionary<string, string>();

    static void Main(string[] args)
    {

        Program p = new Program();

        for (int i = 0; i < 40;i++ )
            new Thread(new ThreadStart(p.validate)).Start();

        Console.ReadKey();
    }


    public void validate()
    {

        string xmlFile = "XMLFile1.xml";

        XmlReaderSettings settings = new XmlReaderSettings();
        settings.ValidationType = ValidationType.Schema;
        //settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessInlineSchema;
        //settings.ValidationFlags |= XmlSchemaValidationFlags.ProcessSchemaLocation;
        settings.ValidationFlags |= XmlSchemaValidationFlags.ReportValidationWarnings;
        settings.ValidationEventHandler +=
            new ValidationEventHandler(ValidationCallBack);



            using (var tr = new XmlTextReader(xmlFile))
            {
                tr.MoveToContent();
                var url = tr.GetAttribute("xsi:noNamespaceSchemaLocation");

                string schemaXml =null;
                if (!schemaStore.TryGetValue(url, out schemaXml))
                {
                    //Console.WriteLine("Need download");

                    using (System.Net.WebClient wc = new System.Net.WebClient())
                    {
                        string schemadata = wc.DownloadString(url);
                        schemaStore.TryAdd(url, schemadata);
                        schemaXml = schemadata;

                    }
                }else
                {
                        //Console.WriteLine("Cache hit");
                }


                XmlSchema schema = XmlSchema.Read(new StringReader(schemaXml), (sender, args) => { });
                settings.Schemas.Add(schema);

            }



        using (XmlReader reader = XmlReader.Create(xmlFile, settings))
        {

            while (reader.Read()) ;
        }


        Console.WriteLine("Thread "+Thread.CurrentThread.ManagedThreadId+" completes");
    }

    private void ValidationCallBack(object sender, ValidationEventArgs args)
    {           
            if (args.Severity == XmlSeverityType.Error)
                Console.WriteLine("Thread " + Thread.CurrentThread.ManagedThreadId + " -> Error: " + args.Message);
            else
                Console.WriteLine("Thread " + Thread.CurrentThread.ManagedThreadId + " -> Warning: " + args.Message);          
    }





}

Autres conseils

If it's still true as stated in Schema validation error / Thread safety of XmlSchemaSet? that you can't run multiple parallel schema validations against the same compiled schema, then a workaround might be to use the Saxon schema validator [shameless plug] which does not have this restriction.

http://www.saxonica.com/

Licencié sous: CC-BY-SA avec attribution
Non affilié à StackOverflow
scroll top