Question

I am trying to find the best solution to prevent us to allocate too much memory when building an Xml document. I have to construct a fairly large Xml with the less resource possible (the web service must be able to handle hundreds of calls per seconds). The structure of the Xml itself doesn’t change much, however the data consistently change. My current solution is XDocument and XElement (LINQ). Below is a quick sample of what I am doing today:

static Stream GetXml(string d1, string d2, string d3)
{
    XElement x = 
        new XElement("myElement",
            new XElement("myOtherElement1", d1),
            new XElement("myOtherElement2", d2),
            new XElement("myOtherElement3", d3));
    // ... more XElement
    // ... return Stream
}

When the Xml document becomes too large, instantiating an XDocument and many hundreds of XElement becomes very expensive and the number of calls per second drops. I am currently thinking of creating some sort of template engine that would simply stream the strings (XElement) without instantiating any objects. How would you do that? Is that the right thing to do?

static Stream GetXml(string d1, string d2, string d3)
{
    const string xml = @"
<myElement>
  <myOtherElement1>{0}</myOtherElement1>
  <myOtherElement2>{1}</myOtherElement2>
  <myOtherElement3>{2}</myOtherElement3>
</myElement>";

    // What's the best way to {0}, {1}, {2} without allocating 
    // objects and using as little RAM as possible. I cannot 
    // use string.Format since it allocates strings.

    StreamWriter sw = new StreamWriter(stream);
    sw.Write(xml);
}
Was it helpful?

Solution

The only reason I can think of for dismissing string.Format is that you don't want to keep the whole XML-document in memory at once. I wrote this Stream class that should only keep a small portion of the document in memory at the time.

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;

namespace StreamTest
{
    public class EnumeratorStream : Stream
    {
        private readonly IEnumerator<string> source;
        private readonly Encoding encoding;

        public Encoding Encoding { get { return encoding; } }

        private byte[] current = new byte[0];
        private int currentPos = 0;

        public EnumeratorStream(IEnumerable<string> source, Encoding encoding)
        {
            if (source == null) throw new ArgumentNullException("source");
            if (encoding == null) encoding = Encoding.Default;

            this.source = source.GetEnumerator();
            this.encoding = encoding;
        }

        private bool MoveNext()
        {
            while (source.MoveNext())
            {
                if (source.Current.Length > 0)
                {
                    current = encoding.GetBytes(source.Current);
                    currentPos = 0;
                    return true;
                }
            }
            current = new byte[0];
            currentPos = 0;
            return false;
        }

        #region Overrides of Stream

        public override bool CanRead { get { return true; } }
        public override bool CanSeek { get { return false; } }
        public override bool CanWrite { get { return false; } }

        public override int Read(byte[] buffer, int offset, int count)
        {
            if (buffer == null) throw new ArgumentNullException("buffer");
            if (offset < 0) throw new ArgumentOutOfRangeException("offset");
            if (offset + count > buffer.Length) throw new ArgumentException("Not enough buffer space");

            int totalWritten = 0;
            while (count > 0)
            {
                int remaining = current.Length - currentPos;
                if (remaining == 0 && !MoveNext()) break;
                remaining = current.Length - currentPos;
                if (remaining <= 0) break;
                if (remaining > count)
                {
                    remaining = count;
                }
                Array.Copy(current, currentPos, buffer, offset, remaining);
                offset += remaining;
                count -= remaining;
                totalWritten += remaining;
                currentPos += remaining;
            }
            return totalWritten;
        }

        public override void Flush() { }
        public override long Seek(long offset, SeekOrigin origin) { throw new NotSupportedException(); }
        public override void SetLength(long value) { throw new NotSupportedException(); }
        public override void Write(byte[] buffer, int offset, int count) { throw new NotSupportedException(); }
        public override long Length { get { throw new NotSupportedException(); } }
        public override long Position
        {
            get { throw new NotSupportedException(); }
            set { throw new NotSupportedException(); }
        }

        #endregion
    }
}

Example:

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;

namespace StreamTest
{
    class Program
    {
        static void Main(string[] args)
        {
            var stream = new EnumeratorStream(Generate("x","y","z"), null);
            var buffer = new byte[256];
            int read;
            while ((read = stream.Read(buffer,0,256)) > 0)
            {
                string s = stream.Encoding.GetString(buffer, 0, read);
                Console.Write(s);
            }
            Console.ReadLine();
        }

        public static IEnumerable<string> Generate(string d1, string d2, string d3)
        {
            yield return "<myElement>";
            yield return "<myOtherElement1>";
            yield return d1;
            yield return "</myOtherElement1>";
            yield return "<myOtherElement2>";
            yield return d2;
            yield return "</myOtherElement2>";
            yield return "<myOtherElement3>";
            yield return d3;
            yield return "</myOtherElement3>";
            yield return "</myElement>";
        }
    }
}

OTHER TIPS

Consider using the XStreamingElement implementation discussed at http://msdn.microsoft.com/en-us/library/system.xml.linq.xstreamingelement.aspx if you want to avoid loading your memory with the XML that you are parsing/generating.

You can pass a StringBuilder around. Any duplicate strings (like opening and closing tags) would reference the same data in memory, so you'd get some savings there.

static Stream GetXml(string d1, string d2, string d3)
{
    StringBuilder xml = new StringBuilder();
    xml.Append("<myElement>");
    AppendElement(xml, d1);
    AppendElement(xml, d2);
    AppendElement(xml, d3);
    xml.Append("</myElement>");

    // Create/return stream
}
static void AppendElement(StringBuilder xml, string value)
{
    xml.Append("<myOtherElement>");
    xml.Append(value);
    xml.Append("</myOtherElement>");
}

To save even more, you can piece together the opening and closing elements like this:

static Stream GetXml(string d1, string d2, string d3)
{
    StringBuilder xml = new StringBuilder();
    OpenElement(xml, "myElement");
    AppendElement(xml, d1);
    AppendElement(xml, d2);
    AppendElement(xml, d3);
    CloseElement(xml, "myElement");

    // Create/return stream
}
static void AppendElement(StringBuilder xml, string value)
{
    OpenElement(xml, "myOtherElement");
    xml.Append(value);
    CloseElement(xml, "myOtherElement");
}    

static void OpenElement(StringBuilder xml, string elementName)
{
    xml.Append("<");
    xml.Append(elementName);
    xml.Append(">");
}
static void CloseElement(StringBuilder xml, string elementName)
{
    xml.Append("</");
    xml.Append(elementName);
    xml.Append(">");
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top