Question

I've been dealing with this for a few hours. I'm saving a string containing Polish diacritics ąśółńźć etc. to a file, but the software I must use to read that file reads only in Mazovia encoding, a pretty old encoding and not supported by the Microsoft Encoding class.

A .Net string consists of UTF-16 characters, so I've been using this code to convert from Unicode to Mazovia.

string rekord = (linia.Substring(0, linia.Length - 1)) + Environment.NewLine;
string rekordMazovia = Kodowanie.UnicodeNaMazovia(rekord);
File.AppendAllText(sciezka, rekordMazovia);
public static class Kodowanie {
  public static string UnicodeNaMazovia(string tekst) {
    return tekst
    .Replace((char)0x104, (char)0x8F) //Ą
    .Replace((char)0x106, (char)0x95) //Ć
    .Replace((char)0x118, (char)0x90) //Ę
    .Replace((char)0x141, (char)0x9C) //Ł
    .Replace((char)0x143, (char)0xA5) //Ń
    .Replace((char)0xD3, (char)0xA3) //Ó
    .Replace((char)0x15A, (char)0x98) //Ś
    .Replace((char)0x179, (char)0xA0) //Ź
    .Replace((char)0x17B, (char)0xA1) //Ż
    .Replace((char)0x105, (char)0x86) //ą
    .Replace((char)0x107, (char)0x8D) //ć
    .Replace((char)0x119, (char)0x91) //ę
    .Replace((char)0x142, (char)0x92) //ł
    .Replace((char)0x144, (char)0xA4) //ń
    .Replace((char)0xF3, (char)0xA2) //ó
    .Replace((char)0x15B, (char)0x9E) //ś
    .Replace((char)0x17A, (char)0xA6) //ź
    .Replace((char)0x17C, (char)0xA7); //ż            
  }
}

Everything would be fine except after reading the generated file in the application I get one redundant char > before every diacritic. It looks like this http://imgur.com/q7DZo

How to get rid of it? How to do it better?

Was it helpful?

Solution

Mazovia encoding is like code page 437 but it has different letters at some positions so you can't use 437.

If you implement MazoviaEncoding, you can easily use

Encoding encoding = new MazoviaEncoding();
String output = "ąśółńźć";
File.WriteAllText(@"test.txt", output, encoding);
//File.AppendAllText(@"test.txt", output, encoding);
// will work just as well, just pass the encoding as 3rd parameter

The file will contain:

0x86 0x9E 0xA2 0x92 0xA4 0xA6 0x8D

Which is correct according to http://en.wikipedia.org/wiki/Mazovia_encoding

The implementation can then be used like other Encoding in C#. For instance, reading the file back works as well:

Encoding encoding = new MazoviaEncoding();
String result = File.ReadAllText(@"test.txt", encoding);

Here's my implementation:

using System.Collections.Generic;
using System.Text;

namespace System.Text {
    class MazoviaEncoding : Encoding
    {
        private static int[] codePoints =  {
            0x0000,0x0001,0x0002,0x0003,0x0004,0x0005,0x0006,0x0007,0x0008,0x0009,0x000A,0x000B,0x000C,0x000D,0x000E,0x000F
            ,0x0010,0x0011,0x0012,0x0013,0x0014,0x0015,0x0016,0x0017,0x0018,0x0019,0x001A,0x001B,0x001C,0x001D,0x001E,0x001F
            ,0x0020,0x0021,0x0022,0x0023,0x0024,0x0025,0x0026,0x0027,0x0028,0x0029,0x002A,0x002B,0x002C,0x002D,0x002E,0x002F
            ,0x0030,0x0031,0x0032,0x0033,0x0034,0x0035,0x0036,0x0037,0x0038,0x0039,0x003A,0x003B,0x003C,0x003D,0x003E,0x003F
            ,0x0040,0x0041,0x0042,0x0043,0x0044,0x0045,0x0046,0x0047,0x0048,0x0049,0x004A,0x004B,0x004C,0x004D,0x004E,0x004F
            ,0x0050,0x0051,0x0052,0x0053,0x0054,0x0055,0x0056,0x0057,0x0058,0x0059,0x005A,0x005B,0x005C,0x005D,0x005E,0x005F
            ,0x0060,0x0061,0x0062,0x0063,0x0064,0x0065,0x0066,0x0067,0x0068,0x0069,0x006A,0x006B,0x006C,0x006D,0x006E,0x006F
            ,0x0070,0x0071,0x0072,0x0073,0x0074,0x0075,0x0076,0x0077,0x0078,0x0079,0x007A,0x007B,0x007C,0x007D,0x007E,0x007F
            ,0x00C7,0x00FC,0x00E9,0x00E2,0x00E4,0x00E0,0x0105,0x00E7,0x00EA,0x00EB,0x00E8,0x00EF,0x00EE,0x0107,0x00C4,0x0104
            ,0x0118,0x0119,0x0142,0x00F4,0x00F6,0x0106,0x00FB,0x00F9,0x015A,0x00D6,0x00DC,0x00A2,0x0141,0x00A5,0x015B,0x0192
            ,0x0179,0x017B,0x00F3,0x00D3,0x0144,0x0143,0x017A,0x017C,0x00BF,0x2310,0x00AC,0x00BD,0x00BC,0x00A1,0x00AB,0x00BB
            ,0x2591,0x2592,0x2593,0x2502,0x2524,0x2561,0x2562,0x2556,0x2555,0x2563,0x2551,0x2557,0x255D,0x255C,0x255B,0x2510
            ,0x2514,0x2534,0x252C,0x251C,0x2500,0x253C,0x255E,0x255F,0x255A,0x2554,0x2569,0x2566,0x2560,0x2550,0x256C,0x2567
            ,0x2568,0x2564,0x2565,0x2559,0x2558,0x2552,0x2553,0x256B,0x256A,0x2518,0x250C,0x2588,0x2584,0x258C,0x2590,0x2580
            ,0x03B1,0x00DF,0x0393,0x03C0,0x03A3,0x03C3,0x00B5,0x03C4,0x03A6,0x0398,0x03A9,0x03B4,0x221E,0x03C6,0x03B5,0x2229
            ,0x2261,0x00B1,0x2265,0x2264,0x2320,0x2321,0x00F7,0x2248,0x00B0,0x2219,0x00B7,0x221A,0x207F,0x00B2,0x25A0,0x00A0
        };

        private static Dictionary<char, byte> unicodeToByte;


        static MazoviaEncoding()
        {
            unicodeToByte = new Dictionary<char, byte>();

            for (int i = 0; i < codePoints.Length; ++i)
            {
                unicodeToByte.Add((char)codePoints[i], (byte)i);
            }

        }



        public override int GetMaxByteCount(int charCount)
        {
            if (charCount < 0)
            {
                throw new ArgumentOutOfRangeException();
            }
            return charCount;
        }

        public override int GetMaxCharCount(int byteCount)
        {
            if (byteCount < 0)
            {
                throw new ArgumentOutOfRangeException();
            }
            return byteCount;
        }

        public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
        {
            if( chars == null || bytes == null ) {
                throw new ArgumentNullException();
            }
            if( charIndex + charCount > chars.Length ||
                charIndex < 0 ||
                byteIndex < 0 ||
                byteIndex + charCount > bytes.Length
                ) {
                throw new ArgumentOutOfRangeException();
            }

            int total = 0;
            int j = 0;
            for (int i = charIndex; i < charIndex + charCount; ++i)
            {
                char cur = chars[i];
                byte asMazovia;
                if (!unicodeToByte.TryGetValue(cur, out asMazovia))
                {

                    asMazovia = (byte)0x003F; // "?"
                }
                total++;
                bytes[j+byteIndex] = asMazovia;
                j++;
            }
            return total;
        }

        public override int GetChars( byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex )
        {
            if (chars == null || bytes == null)
            {
                throw new ArgumentNullException();
            }
            if ( byteIndex + byteCount > bytes.Length ||
                charIndex < 0 ||
                byteIndex < 0 ||
                charIndex + byteCount > chars.Length
                )
            {
                throw new ArgumentOutOfRangeException();
            }

            int total = 0;
            int j = 0;
            for (int i = byteIndex; i < byteIndex + byteCount; ++i)
            {
                byte cur = bytes[i];
                char decoded = (char)codePoints[cur];
                total++;
                chars[charIndex + j] = decoded;
                j++;

            }
            return total;
        }

        public override int GetByteCount(char[] charArray, int index, int count)
        {
            if (charArray == null)
            {
                throw new ArgumentNullException();
            }

            if (index + count <= charArray.Length && index >= 0 && count >= 0)
            {
                return count;
            }
            else
            {

                throw new ArgumentOutOfRangeException();
            }
        }

        public override int GetCharCount( byte[] bytes, int index, int count )
        {
            if (bytes == null)
            {
                throw new ArgumentNullException();
            }

            if (index < 0 || count < 0 || index + count > bytes.Length)
            {
                throw new ArgumentOutOfRangeException();
            }

            return count;
        }



    }
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top