C#: 인용 인쇄 가능한 인코딩 디코딩을위한 클래스?

https://stackoverflow.com/questions/2226554

19-09-2019
|

문제

C#에는 기존 클래스가 있습니까? 인용문 인코딩 String? 인코딩에 대한 자세한 정보를 얻으려면 위의 링크를 클릭하십시오.

다음은 편의를 위해 위의 링크에서 인용됩니다.

8 비트 바이트 값은 3 자, "="에 이어 바이트의 숫자 값을 나타내는 2 개의 16 진수 숫자 (0–9 또는 A – F)로 인코딩 될 수 있습니다. 예를 들어, US-ASCII 양식 공급 문자 (10 진수 값 12)는 "= 0C"로 표시 될 수 있고, US-ASCII 동일한 부호 (10 진수 값 61)는 "= 3D"로 표시됩니다. 인쇄 가능한 ASCII 문자 또는 라인 문자 끝을 제외한 모든 문자는이 방식으로 인코딩해야합니다.

인쇄 가능한 모든 ASCII 문자 (33에서 126 사이의 소수점 값)는 "="(10 진수 61)를 제외하고 자체적으로 표시 될 수 있습니다.

ASCII 탭 및 공간 문자 인 10 진수 값 9 및 32는이 문자가 선의 끝에 나타나는 경우를 제외하고 자체적으로 표시 될 수 있습니다. 이 문자 중 하나가 줄 끝에 나타나면 "= 09"(탭) 또는 "= 20"(공간)로 인코딩해야합니다.

인코딩되는 데이터에 의미있는 라인 브레이크가 포함 된 경우 원래 바이트 값이 아닌 ASCII CR LF 시퀀스로 인코딩되어야합니다. 반대로 바이트 값 13과 10이 선 끝 이외의 의미를 갖는 경우 = 0d 및 = 0a로 인코딩되어야합니다.

인용 된 인쇄 가능한 인코딩 된 데이터 라인은 76 자를 초과하지 않아야합니다. 인코딩 된 텍스트를 변경하지 않고이 요구 사항을 충족시키기 위해 소프트 라인 브레이크가 원하는대로 추가 될 수 있습니다. 소프트 라인 브레이크는 인코딩 된 라인의 끝에서 "="로 구성되며 디코딩 된 텍스트에서 라인 브레이크를 유발하지 않습니다.

해결책

Framework 라이브러리에는이를 수행하는 기능이 있지만 깨끗하게 노출되지는 않습니다. 구현은 내부 클래스에 있습니다 System.Net.Mime.QuotedPrintableStream. 이 클래스는 호출되는 메소드를 정의합니다 DecodeBytes 당신이 원하는 것을합니다. 이 방법은 마임 헤더를 디코딩하는 데 사용되는 하나의 방법 만 사용하는 것으로 보입니다. 이 방법은 또한 내부이지만 몇 곳에서 상당히 직접 호출됩니다. Attachment.Name 세터. 데모 :

using System;
using System.Net.Mail;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            Attachment attachment = Attachment.CreateAttachmentFromString("", "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=");
            Console.WriteLine(attachment.Name);
        }
    }
}

출력을 생성합니다.

¡ Hola, _señor!

캐리지 리턴 등을 올바르게 처리하기 위해 약간의 테스트를 수행해야 할 수도 있습니다. 그러나 사용 사례가 라이브러리의 변경으로 인해 깨지지 않을 것이라고 생각하지 않는 MIME 헤더 문자열의 디코딩에 충분히 가까워지지 않는 한이 기능에 의존하는 것이 현명하지 않을 수 있습니다. 자신의 인용문 인쇄 가능한 디코더를 작성하는 것이 좋습니다.

다른 팁

나는 Martin Murphy의 해결책을 확장했고 모든 경우에 그것이 효과가 있기를 바랍니다.

private static string DecodeQuotedPrintables(string input, string charSet)
{           
    if (string.IsNullOrEmpty(charSet))
    {
        var charSetOccurences = new Regex(@"=\?.*\?Q\?", RegexOptions.IgnoreCase);
        var charSetMatches = charSetOccurences.Matches(input);
        foreach (Match match in charSetMatches)
        {
            charSet = match.Groups[0].Value.Replace("=?", "").Replace("?Q?", "");
            input = input.Replace(match.Groups[0].Value, "").Replace("?=", "");
        }
    }

    Encoding enc = new ASCIIEncoding();
    if (!string.IsNullOrEmpty(charSet))
    {
        try
        {
            enc = Encoding.GetEncoding(charSet);
        }
        catch
        {
            enc = new ASCIIEncoding();
        }
    }

    //decode iso-8859-[0-9]
    var occurences = new Regex(@"=[0-9A-Z]{2}", RegexOptions.Multiline);
    var matches = occurences.Matches(input);
    foreach (Match match in matches)
    {
        try
        {
            byte[] b = new byte[] { byte.Parse(match.Groups[0].Value.Substring(1), System.Globalization.NumberStyles.AllowHexSpecifier) };
            char[] hexChar = enc.GetChars(b);
            input = input.Replace(match.Groups[0].Value, hexChar[0].ToString());
        }
        catch { }
    }

    //decode base64String (utf-8?B?)
    occurences = new Regex(@"\?utf-8\?B\?.*\?", RegexOptions.IgnoreCase);
    matches = occurences.Matches(input);
    foreach (Match match in matches)
    {
        byte[] b = Convert.FromBase64String(match.Groups[0].Value.Replace("?utf-8?B?", "").Replace("?UTF-8?B?", "").Replace("?", ""));
        string temp = Encoding.UTF8.GetString(b);
        input = input.Replace(match.Groups[0].Value, temp);
    }

    input = input.Replace("=\r\n", "");
    return input;
}

나는 이것을 빨리 썼다.

    public static string DecodeQuotedPrintables(string input)
    {
        var occurences = new Regex(@"=[0-9A-H]{2}", RegexOptions.Multiline);
        var matches = occurences.Matches(input);
        var uniqueMatches = new HashSet<string>(matches);
        foreach (string match in uniqueMatches)
        {
            char hexChar= (char) Convert.ToInt32(match.Substring(1), 16);
            input =input.Replace(match, hexChar.ToString());
        }
        return input.Replace("=\r\n", "");
    }

UTF-8 인코딩으로 인쇄 할 수있는 인쇄 할 수있는 인쇄 할 수있는 경우 인용 된 인쇄 가능한 캐릭터의 실행이 함께 표시되는 경우 다른 인용 된 인쇄 가능한 시퀀스를 한 번에 한 번씩 해독 할 수 없다는 것을 알고 있어야합니다.

예를 들어-다음 시퀀스 = e2 = 80 = 99가있는 경우, 한 번에 UTF8을 사용하여 3 개의 "이상한"문자를 사용하여이를 해독합니다. 대신 3 바이트 배열을 만들고 3 바이트를 UTF8 인코딩은 단일 aphostrope를 얻습니다.

분명히 ASCII 인코딩을 사용하는 경우 한 번에 한 번에 한 번은 문제가되지 않지만 실행은 사용 된 텍스트 인코더에 관계없이 코드가 작동한다는 것을 의미합니다.

오 그리고 잊지 마라 = 3D는 한 번 더 한 번 더 해독해야한다는 특별한 경우입니다. 그것은 미친 Gotcha입니다!

도움이되기를 바랍니다

인용 된 인용 인쇄용 디코더는 훌륭하게 작동합니다!

public static byte[] FromHex(byte[] hexData)
    {
        if (hexData == null)
        {
            throw new ArgumentNullException("hexData");
        }

        if (hexData.Length < 2 || (hexData.Length / (double)2 != Math.Floor(hexData.Length / (double)2)))
        {
            throw new Exception("Illegal hex data, hex data must be in two bytes pairs, for example: 0F,FF,A3,... .");
        }

        MemoryStream retVal = new MemoryStream(hexData.Length / 2);
        // Loop hex value pairs
        for (int i = 0; i < hexData.Length; i += 2)
        {
            byte[] hexPairInDecimal = new byte[2];
            // We need to convert hex char to decimal number, for example F = 15
            for (int h = 0; h < 2; h++)
            {
                if (((char)hexData[i + h]) == '0')
                {
                    hexPairInDecimal[h] = 0;
                }
                else if (((char)hexData[i + h]) == '1')
                {
                    hexPairInDecimal[h] = 1;
                }
                else if (((char)hexData[i + h]) == '2')
                {
                    hexPairInDecimal[h] = 2;
                }
                else if (((char)hexData[i + h]) == '3')
                {
                    hexPairInDecimal[h] = 3;
                }
                else if (((char)hexData[i + h]) == '4')
                {
                    hexPairInDecimal[h] = 4;
                }
                else if (((char)hexData[i + h]) == '5')
                {
                    hexPairInDecimal[h] = 5;
                }
                else if (((char)hexData[i + h]) == '6')
                {
                    hexPairInDecimal[h] = 6;
                }
                else if (((char)hexData[i + h]) == '7')
                {
                    hexPairInDecimal[h] = 7;
                }
                else if (((char)hexData[i + h]) == '8')
                {
                    hexPairInDecimal[h] = 8;
                }
                else if (((char)hexData[i + h]) == '9')
                {
                    hexPairInDecimal[h] = 9;
                }
                else if (((char)hexData[i + h]) == 'A' || ((char)hexData[i + h]) == 'a')
                {
                    hexPairInDecimal[h] = 10;
                }
                else if (((char)hexData[i + h]) == 'B' || ((char)hexData[i + h]) == 'b')
                {
                    hexPairInDecimal[h] = 11;
                }
                else if (((char)hexData[i + h]) == 'C' || ((char)hexData[i + h]) == 'c')
                {
                    hexPairInDecimal[h] = 12;
                }
                else if (((char)hexData[i + h]) == 'D' || ((char)hexData[i + h]) == 'd')
                {
                    hexPairInDecimal[h] = 13;
                }
                else if (((char)hexData[i + h]) == 'E' || ((char)hexData[i + h]) == 'e')
                {
                    hexPairInDecimal[h] = 14;
                }
                else if (((char)hexData[i + h]) == 'F' || ((char)hexData[i + h]) == 'f')
                {
                    hexPairInDecimal[h] = 15;
                }
            }

            // Join hex 4 bit(left hex cahr) + 4bit(right hex char) in bytes 8 it
            retVal.WriteByte((byte)((hexPairInDecimal[0] << 4) | hexPairInDecimal[1]));
        }

        return retVal.ToArray();
    }
    public static byte[] QuotedPrintableDecode(byte[] data)
    {
        if (data == null)
        {
            throw new ArgumentNullException("data");
        }

        MemoryStream msRetVal = new MemoryStream();
        MemoryStream msSourceStream = new MemoryStream(data);

        int b = msSourceStream.ReadByte();
        while (b > -1)
        {
            // Encoded 8-bit byte(=XX) or soft line break(=CRLF)
            if (b == '=')
            {
                byte[] buffer = new byte[2];
                int nCount = msSourceStream.Read(buffer, 0, 2);
                if (nCount == 2)
                {
                    // Soft line break, line splitted, just skip CRLF
                    if (buffer[0] == '\r' && buffer[1] == '\n')
                    {
                    }
                    // This must be encoded 8-bit byte
                    else
                    {
                        try
                        {
                            msRetVal.Write(FromHex(buffer), 0, 1);
                        }
                        catch
                        {
                            // Illegal value after =, just leave it as is
                            msRetVal.WriteByte((byte)'=');
                            msRetVal.Write(buffer, 0, 2);
                        }
                    }
                }
                // Illegal =, just leave as it is
                else
                {
                    msRetVal.Write(buffer, 0, nCount);
                }
            }
            // Just write back all other bytes
            else
            {
                msRetVal.WriteByte((byte)b);
            }

            // Read next byte
            b = msSourceStream.ReadByte();
        }

        return msRetVal.ToArray();
    }

    private string quotedprintable(string data, string encoding)
    {
        data = data.Replace("=\r\n", "");
        for (int position = -1; (position = data.IndexOf("=", position + 1)) != -1;)
        {
            string leftpart = data.Substring(0, position);
            System.Collections.ArrayList hex = new System.Collections.ArrayList();
            hex.Add(data.Substring(1 + position, 2));
            while (position + 3 < data.Length && data.Substring(position + 3, 1) == "=")
            {
                position = position + 3;
                hex.Add(data.Substring(1 + position, 2));
            }
            byte[] bytes = new byte[hex.Count];
            for (int i = 0; i < hex.Count; i++)
            {
                bytes[i] = System.Convert.ToByte(new string(((string)hex[i]).ToCharArray()), 16);
            }
            string equivalent = System.Text.Encoding.GetEncoding(encoding).GetString(bytes);
            string rightpart = data.Substring(position + 3);
            data = leftpart + equivalent + rightpart;
        }
        return data;
    }

나는 동적 솔루션을 찾고 있었고 2 일 동안 다른 솔루션을 시도했습니다. 이 솔루션은 일본어 및 기타 표준 문자 세트를 지원합니다.

private static string Decode(string input, string bodycharset) {
        var i = 0;
        var output = new List<byte>();
        while (i < input.Length) {
            if (input[i] == '=' && input[i + 1] == '\r' && input[i + 2] == '\n') {
                //Skip
                i += 3;
            } else if (input[i] == '=') {
                string sHex = input;
                sHex = sHex.Substring(i + 1, 2);
                int hex = Convert.ToInt32(sHex, 16);
                byte b = Convert.ToByte(hex);
                output.Add(b);
                i += 3;
            } else {
                output.Add((byte)input[i]);
                i++;
            }
        }


        if (String.IsNullOrEmpty(bodycharset))
            return Encoding.UTF8.GetString(output.ToArray());
        else {
            if (String.Compare(bodycharset, "ISO-2022-JP", true) == 0)
                return Encoding.GetEncoding("Shift_JIS").GetString(output.ToArray());
            else
                return Encoding.GetEncoding(bodycharset).GetString(output.ToArray());
        }

    }

그런 다음 기능을 호출 할 수 있습니다

Decode("=E3=82=AB=E3=82=B9=E3", "utf-8")

이것은 원래 발견되었습니다 여기

나를 위해 일한 유일한 사람.

http://sourceforge.net/apps/trac/syncmldotnet/wiki/quoted%20printable

QPS를 해독하기 만하면 위의 링크 에서이 세 가지 기능을 코드 내부로 가져 가십시오.

    HexDecoderEvaluator(Match m)
    HexDecoder(string line)
    Decode(string encodedText)

그리고 그냥 :

var humanReadable = Decode(myQPString);

즐기다

더 나은 솔루션

    private static string DecodeQuotedPrintables(string input, string charSet)
    {
        try
        {
            enc = Encoding.GetEncoding(CharSet);
        }
        catch
        {
            enc = new UTF8Encoding();
        }

        var occurences = new Regex(@"(=[0-9A-Z]{2}){1,}", RegexOptions.Multiline);
        var matches = occurences.Matches(input);

    foreach (Match match in matches)
    {
            try
            {
                byte[] b = new byte[match.Groups[0].Value.Length / 3];
                for (int i = 0; i < match.Groups[0].Value.Length / 3; i++)
                {
                    b[i] = byte.Parse(match.Groups[0].Value.Substring(i * 3 + 1, 2), System.Globalization.NumberStyles.AllowHexSpecifier);
                }
                char[] hexChar = enc.GetChars(b);
                input = input.Replace(match.Groups[0].Value, hexChar[0].ToString());
        }
            catch
            { ;}
        }
        input = input.Replace("=\r\n", "").Replace("=\n", "").Replace("?=", "");

        return input;
}

public static string DecodeQuotedPrintables(string input, Encoding encoding)
    {
        var regex = new Regex(@"\=(?<Symbol>[0-9A-Z]{2})", RegexOptions.Multiline);
        var matches = regex.Matches(input);
        var bytes = new byte[matches.Count];

        for (var i = 0; i < matches.Count; i++)
        {
            bytes[i] = Convert.ToByte(matches[i].Groups["Symbol"].Value, 16);
        }

        return encoding.GetString(bytes);
    }

때로는 EML 파일로의 문자열이 여러 인코딩 된 부분으로 구성됩니다. 이것은 이러한 경우에 Dave의 방법을 사용하는 기능입니다.

public string DecodeQP(string codedstring)
{
    Regex codified;

    codified=new Regex(@"=\?((?!\?=).)*\?=", RegexOptions.IgnoreCase);
    MatchCollection setMatches = codified.Matches(cadena);
    if(setMatches.Count > 0)
    {
        Attachment attdecode;
        codedstring= "";
        foreach (Match match in setMatches)
        {
            attdecode = Attachment.CreateAttachmentFromString("", match.Value);
            codedstring+= attdecode.Name;

        }                
    }
    return codedstring;
}

참고 : "input.replace"가있는 솔루션은 인터넷 전체에 있으며 여전히 정확하지 않습니다.

당신이 있다면 참조하십시오 하나의 디코딩 된 기호와 "교체"를 사용합니다., 모두 "입력"의 기호가 교체되고 다음 모두 디코딩이 파손됩니다.

더 정확한 솔루션 :

public static string DecodeQuotedPrintable(string input, string charSet)
    {

        Encoding enc;

        try
        {
            enc = Encoding.GetEncoding(charSet);
        }
        catch
        {
            enc = new UTF8Encoding();
        }

        input = input.Replace("=\r\n=", "=");
        input = input.Replace("=\r\n ", "\r\n ");
        input = input.Replace("= \r\n", " \r\n");
        var occurences = new Regex(@"(=[0-9A-Z]{2})", RegexOptions.Multiline); //{1,}
        var matches = occurences.Matches(input);

        foreach (Match match in matches)
        {
            try
            {
                byte[] b = new byte[match.Groups[0].Value.Length / 3];
                for (int i = 0; i < match.Groups[0].Value.Length / 3; i++)
                {
                    b[i] = byte.Parse(match.Groups[0].Value.Substring(i * 3 + 1, 2), System.Globalization.NumberStyles.AllowHexSpecifier);
                }
                char[] hexChar = enc.GetChars(b);
                input = input.Replace(match.Groups[0].Value, new String(hexChar));

            }
            catch
            { Console.WriteLine("QP dec err"); }
        }
        input = input.Replace("?=", ""); //.Replace("\r\n", "");

        return input;
    }

라이센스 : CC-BY-SA ~와 함께 속성

제휴하지 않습니다 StackOverflow