Question

I have xml file from 3rd party. Because xml file is big (40MB), I am parsing file step by step using XmlTextReader using this way:

class Parser
{
    // ...

    public void ImportFile(string fileName)
    {
        bool isCountryStateFile = IsCountryStateFile(fileName);

        XmlTextReader xmlReader = new XmlTextReader(fileName);
        namespaceManager = new XmlNamespaceManager(xmlReader.NameTable);
        namespaceManager.FillRuianNamespaces();

        xmlReader.WhitespaceHandling = WhitespaceHandling.None;

        while (xmlReader.Read())
        {
            if (xmlReader.NodeType == XmlNodeType.Element &&
                xmlReader.IsStartElement() == true)
            {
                // skip items in country state file (are included in sity state files)
                if (isCountryStateFile && nodesToSkipInCountryStateFile.Contains(xmlReader.Name))
                {
                    Console.WriteLine("Skiping {0}", xmlReader.Name);
                    xmlReader.Skip();
                }

                if (xmlReader.Name == "vf:Vusc")
                {
                    var item = ParseRegion(xmlReader);
                    repository.Save(item);
                    xmlReader.Skip();
                }
                if (xmlReader.Name == "vf:Obec")
                {
                    var item = ParseCity(xmlReader);
                    repository.Save(item);
                    xmlReader.Skip();
                }
            }
        }

        xmlReader.Close();
    }

    private Region ParseRegion(XmlTextReader xmlReader)
    {
        XmlDocument node = new XmlDocument();
        node.LoadXml(xmlReader.ReadOuterXml());

        // parse and compose Region
        // ...

        return result;
    }

    // ...
}

Normally nodes are structured as <Cities><City/></Cities> and everything works well, but some nodes contains child nodes with same name as its parent:

<?xml version="1.0" encoding="UTF-8"?>
<vf:VymennyFormat xsi:schemaLocation="urn:cz:isvs:ruian:schemas:VymennyFormatTypy:v1 ../ruian/xsd/vymenny_format/VymennyFormatTypy.xsd" xmlns:gml="http://www.opengis.net/gml/3.2" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:ami="urn:cz:isvs:ruian:schemas:AdrMistoIntTypy:v1" xmlns:base="urn:cz:isvs:ruian:schemas:BaseTypy:v1" xmlns:coi="urn:cz:isvs:ruian:schemas:CastObceIntTypy:v1" xmlns:com="urn:cz:isvs:ruian:schemas:CommonTypy:v1" xmlns:kui="urn:cz:isvs:ruian:schemas:KatUzIntTypy:v1" xmlns:kri="urn:cz:isvs:ruian:schemas:KrajIntTypy:v1" xmlns:mci="urn:cz:isvs:ruian:schemas:MomcIntTypy:v1" xmlns:mpi="urn:cz:isvs:ruian:schemas:MopIntTypy:v1" xmlns:obi="urn:cz:isvs:ruian:schemas:ObecIntTypy:v1" xmlns:oki="urn:cz:isvs:ruian:schemas:OkresIntTypy:v1" xmlns:opi="urn:cz:isvs:ruian:schemas:OrpIntTypy:v1" xmlns:pai="urn:cz:isvs:ruian:schemas:ParcelaIntTypy:v1" xmlns:pui="urn:cz:isvs:ruian:schemas:PouIntTypy:v1" xmlns:rsi="urn:cz:isvs:ruian:schemas:RegSouIntiTypy:v1" xmlns:spi="urn:cz:isvs:ruian:schemas:SpravObvIntTypy:v1" xmlns:sti="urn:cz:isvs:ruian:schemas:StatIntTypy:v1" xmlns:soi="urn:cz:isvs:ruian:schemas:StavObjIntTypy:v1" xmlns:uli="urn:cz:isvs:ruian:schemas:UliceIntTypy:v1" xmlns:vci="urn:cz:isvs:ruian:schemas:VuscIntTypy:v1" xmlns:vf="urn:cz:isvs:ruian:schemas:VymennyFormatTypy:v1" xmlns:zji="urn:cz:isvs:ruian:schemas:ZsjIntTypy:v1">
    <vf:Data>
        <vf:Vusc> <!-- HERE -->
            <vf:Vusc gml:id="VC.19">
                <vci:Kod>19</vci:Kod>
                <vci:Nazev>Hlavní město Praha</vci:Nazev>
                <vci:RegionSoudrznosti>
                    <rsi:Kod>19</rsi:Kod>
                </vci:RegionSoudrznosti>
                <vci:PlatiOd>2013-06-11T00:00:00</vci:PlatiOd>
                <vci:IdTransakce>241736</vci:IdTransakce>
                <vci:GlobalniIdNavrhuZmeny>413702</vci:GlobalniIdNavrhuZmeny>
                <vci:NutsLau>CZ010</vci:NutsLau>
                <vci:Geometrie>
                    <vci:DefinicniBod>
                        <gml:Point gml:id="DVC.19" srsName="urn:ogc:def:crs:EPSG::5514" srsDimension="2">
                            <gml:pos>-743100.00 -1043300.00</gml:pos>
                        </gml:Point>
                    </vci:DefinicniBod>
                </vci:Geometrie>
            </vf:Vusc>
        </vf:Vusc>
        ...
    </vf:Data>
</vf:VymennyFormat>

How can in such situation select proper node to parse only child and how to use XmlTextReader.Skip() to skip parent node like top level with all its children? If I skip on it, it reads children anyway.

Was it helpful?

Solution

You can solve this by using XmlReader.ReadSubTree. For example:

if (xmlReader.Name == "vf:Vusc")
{
    using (var subtree = xmlReader.ReadSubTree())
    {
        var item = ParseRegion(subtree);
        repository.Save(item);
    }
}

ReadSubTree limits how much of the XML your ParseRegion can see. When ParseRegion returns, the reader is positioned at the end element of the tag that it was in. So your main reader loop won't ever see that inner "vf:Vusc" tag.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top