C #: فئة لفك تشفير الترميز القابل للطباعة؟

https://stackoverflow.com/questions/2226554

19-09-2019
|

سؤال

هل هناك فئة موجودة في C # التي يمكن تحويلها ونقلت القابلة للطباعة ترميز إلى Stringب انقر على الرابط أعلاه للحصول على مزيد من المعلومات حول الترميز.

يتم نقل ما يلي من الرابط أعلاه لراحتك.

قد يتم ترميز أي قيمة بايت 8 بت مع 3 أحرف، an "=" متبوعة برقمين سداسي عشري (0-9 أو A-F) يمثل قيمة البايت الرقمية. على سبيل المثال، يمكن تمثيل حرف تغذية نموذج US-ASCII (القيمة العشرية 12) بواسطة "= 0C"، ويتم تمثيل علامة US-ASCII المتساوية (القيمة العشرية 61) بواسطة "= 3D". يجب ترميز جميع الأحرف باستثناء أحرف ASCII القابلة للطباعة أو نهاية أحرف الخط في هذه الأزياء.

قد يتم تمثيل جميع أحرف ASCII القابلة للطباعة (القيم العشرية بين 33 و 126) بنفسها، باستثناء "=" (عشري 61).

قد يتم تمثيل علامة تبويب ASCII والأحرف الفضائية، والقيم العشرية 9 و 32، بمفردها، إلا إذا ظهرت هذه الأحرف في نهاية السطر. إذا ظهر أحد هذه الأحرف في نهاية السطر، فيجب ترميزه ك "= 09" (علامة تبويب) أو "= 20" (مساحة).

إذا كانت البيانات المشفرة تحتوي على فواصل ذات مغزى، فيجب ترميزها كمتسلسلة ASCII CR LF، وليس كقيم البايت الأصلية. على العكس من ذلك، إذا كانت القيم البايتية 13 و 10 معاني أخرى غير نهاية السطر، فيجب ترميزها ك = 0D و = 0A.

يجب ألا تكون خطوط البيانات المشفرة القابلة للطباعة مقتبسة أطول من 76 حرفا. لإرضاء هذا الشرط دون تغيير النص المشفوع، قد تتم إضافة فواصل الأسطر الناعمة حسب الرغبة. يتكون استراحة خط لينة من "=" في نهاية خط مشفر، ولا يتسبب في كسر خط في النص المشفوق.

المحلول

هناك وظائف في المكتبات الإطارية للقيام بذلك، لكن لا يبدو أنه يتعرض نظيفا. التنفيذ في الفصل الداخلي System.Net.Mime.QuotedPrintableStream. وبعد هذا الفصل يحدد طريقة تسمى DecodeBytes الذي يفعل ما تريد. يبدو أن الطريقة تستخدمها بطريقة واحدة فقط يتم استخدامها لفك تشفير رؤوس MIME. هذه الطريقة هي أيضا داخلية، ولكنها تسمى إلى حد ما مباشرة في مكانين، على سبيل المثال، Attachment.Name setter. مظاهرة:

using System;
using System.Net.Mail;

namespace ConsoleApplication1
{
    class Program
    {
        static void Main(string[] args)
        {
            Attachment attachment = Attachment.CreateAttachmentFromString("", "=?iso-8859-1?Q?=A1Hola,_se=F1or!?=");
            Console.WriteLine(attachment.Name);
        }
    }
}

تنتج الناتج:

حولا السنيور!

قد تضطر إلى القيام ببعض الاختبارات لضمان إرجاع النقل، وما إلى ذلك يتم التعامل معها بشكل صحيح على الرغم من أنه في اختبار سريع، فقد يبدو أنهم. ومع ذلك، قد لا يكون من الحكمة الاعتماد على هذه الوظيفة ما لم تكن حالة استخدامك قريبة بما يكفي لتفكيك سلسلة رأس MIME لا تعتقد أنه سيتم كسره بواسطة أي تغييرات مصنوعة إلى المكتبة. قد تكون أفضل حالا في كتابة وحدة فك الترميز الخاص بك مقتبس.

نصائح أخرى

مددت محلول مارتن ميرفيني وآمل أن تعمل في كل حالة.

private static string DecodeQuotedPrintables(string input, string charSet)
{           
    if (string.IsNullOrEmpty(charSet))
    {
        var charSetOccurences = new Regex(@"=\?.*\?Q\?", RegexOptions.IgnoreCase);
        var charSetMatches = charSetOccurences.Matches(input);
        foreach (Match match in charSetMatches)
        {
            charSet = match.Groups[0].Value.Replace("=?", "").Replace("?Q?", "");
            input = input.Replace(match.Groups[0].Value, "").Replace("?=", "");
        }
    }

    Encoding enc = new ASCIIEncoding();
    if (!string.IsNullOrEmpty(charSet))
    {
        try
        {
            enc = Encoding.GetEncoding(charSet);
        }
        catch
        {
            enc = new ASCIIEncoding();
        }
    }

    //decode iso-8859-[0-9]
    var occurences = new Regex(@"=[0-9A-Z]{2}", RegexOptions.Multiline);
    var matches = occurences.Matches(input);
    foreach (Match match in matches)
    {
        try
        {
            byte[] b = new byte[] { byte.Parse(match.Groups[0].Value.Substring(1), System.Globalization.NumberStyles.AllowHexSpecifier) };
            char[] hexChar = enc.GetChars(b);
            input = input.Replace(match.Groups[0].Value, hexChar[0].ToString());
        }
        catch { }
    }

    //decode base64String (utf-8?B?)
    occurences = new Regex(@"\?utf-8\?B\?.*\?", RegexOptions.IgnoreCase);
    matches = occurences.Matches(input);
    foreach (Match match in matches)
    {
        byte[] b = Convert.FromBase64String(match.Groups[0].Value.Replace("?utf-8?B?", "").Replace("?UTF-8?B?", "").Replace("?", ""));
        string temp = Encoding.UTF8.GetString(b);
        input = input.Replace(match.Groups[0].Value, temp);
    }

    input = input.Replace("=\r\n", "");
    return input;
}

كتبت هذا بسرعة حقيقية.

    public static string DecodeQuotedPrintables(string input)
    {
        var occurences = new Regex(@"=[0-9A-H]{2}", RegexOptions.Multiline);
        var matches = occurences.Matches(input);
        var uniqueMatches = new HashSet<string>(matches);
        foreach (string match in uniqueMatches)
        {
            char hexChar= (char) Convert.ToInt32(match.Substring(1), 16);
            input =input.Replace(match, hexChar.ToString());
        }
        return input.Replace("=\r\n", "");
    }

إذا كنت تقوم بفك تشفير القابلة للطباعة مع ترميز UTF-8، فستحتاج إلى أن تكون على دراية بأنك لا تستطيع فك شفرة كل تسلسل قابل للطباعة من كل مرة واحدة في وقت واحد حيث أظهر الآخر إذا كان هناك أشواط من الأحرف القابلة للطباعة المقتبسة معا.

على سبيل المثال - إذا كان لديك التسلسل التالي = E2 = 80 = 99 ويقوم بفك تشفير هذا باستخدام UTF8 One-A-Time تحصل على ثلاثة أحرف "غريب" - إذا قمت بدلا بدلا من إنشاء مجموعة من ثلاثة بايت وتحويل البايت الثلاثة ترميز UTF8 الذي تحصل عليه

من الواضح أنه إذا كنت تستخدم ترميز ASCII، فلا توجد مشكلة واحدة في وقت واحد، ومع ذلك، فإن عمليات فك التشفير تعني أن الكود الخاص بك سيعمل بغض النظر عن ترميز النص المستخدم.

أوه ولا تنسى = 3D هي حالة خاصة تعني أنك تحتاج إلى فك تشفير كل ما لديك مرة أخرى ... هذا هو مجنون gotcha!

امل ان يساعد

هذا هو وحدة فك الترميز القابل للطباعة يعمل بشكل رائع!

public static byte[] FromHex(byte[] hexData)
    {
        if (hexData == null)
        {
            throw new ArgumentNullException("hexData");
        }

        if (hexData.Length < 2 || (hexData.Length / (double)2 != Math.Floor(hexData.Length / (double)2)))
        {
            throw new Exception("Illegal hex data, hex data must be in two bytes pairs, for example: 0F,FF,A3,... .");
        }

        MemoryStream retVal = new MemoryStream(hexData.Length / 2);
        // Loop hex value pairs
        for (int i = 0; i < hexData.Length; i += 2)
        {
            byte[] hexPairInDecimal = new byte[2];
            // We need to convert hex char to decimal number, for example F = 15
            for (int h = 0; h < 2; h++)
            {
                if (((char)hexData[i + h]) == '0')
                {
                    hexPairInDecimal[h] = 0;
                }
                else if (((char)hexData[i + h]) == '1')
                {
                    hexPairInDecimal[h] = 1;
                }
                else if (((char)hexData[i + h]) == '2')
                {
                    hexPairInDecimal[h] = 2;
                }
                else if (((char)hexData[i + h]) == '3')
                {
                    hexPairInDecimal[h] = 3;
                }
                else if (((char)hexData[i + h]) == '4')
                {
                    hexPairInDecimal[h] = 4;
                }
                else if (((char)hexData[i + h]) == '5')
                {
                    hexPairInDecimal[h] = 5;
                }
                else if (((char)hexData[i + h]) == '6')
                {
                    hexPairInDecimal[h] = 6;
                }
                else if (((char)hexData[i + h]) == '7')
                {
                    hexPairInDecimal[h] = 7;
                }
                else if (((char)hexData[i + h]) == '8')
                {
                    hexPairInDecimal[h] = 8;
                }
                else if (((char)hexData[i + h]) == '9')
                {
                    hexPairInDecimal[h] = 9;
                }
                else if (((char)hexData[i + h]) == 'A' || ((char)hexData[i + h]) == 'a')
                {
                    hexPairInDecimal[h] = 10;
                }
                else if (((char)hexData[i + h]) == 'B' || ((char)hexData[i + h]) == 'b')
                {
                    hexPairInDecimal[h] = 11;
                }
                else if (((char)hexData[i + h]) == 'C' || ((char)hexData[i + h]) == 'c')
                {
                    hexPairInDecimal[h] = 12;
                }
                else if (((char)hexData[i + h]) == 'D' || ((char)hexData[i + h]) == 'd')
                {
                    hexPairInDecimal[h] = 13;
                }
                else if (((char)hexData[i + h]) == 'E' || ((char)hexData[i + h]) == 'e')
                {
                    hexPairInDecimal[h] = 14;
                }
                else if (((char)hexData[i + h]) == 'F' || ((char)hexData[i + h]) == 'f')
                {
                    hexPairInDecimal[h] = 15;
                }
            }

            // Join hex 4 bit(left hex cahr) + 4bit(right hex char) in bytes 8 it
            retVal.WriteByte((byte)((hexPairInDecimal[0] << 4) | hexPairInDecimal[1]));
        }

        return retVal.ToArray();
    }
    public static byte[] QuotedPrintableDecode(byte[] data)
    {
        if (data == null)
        {
            throw new ArgumentNullException("data");
        }

        MemoryStream msRetVal = new MemoryStream();
        MemoryStream msSourceStream = new MemoryStream(data);

        int b = msSourceStream.ReadByte();
        while (b > -1)
        {
            // Encoded 8-bit byte(=XX) or soft line break(=CRLF)
            if (b == '=')
            {
                byte[] buffer = new byte[2];
                int nCount = msSourceStream.Read(buffer, 0, 2);
                if (nCount == 2)
                {
                    // Soft line break, line splitted, just skip CRLF
                    if (buffer[0] == '\r' && buffer[1] == '\n')
                    {
                    }
                    // This must be encoded 8-bit byte
                    else
                    {
                        try
                        {
                            msRetVal.Write(FromHex(buffer), 0, 1);
                        }
                        catch
                        {
                            // Illegal value after =, just leave it as is
                            msRetVal.WriteByte((byte)'=');
                            msRetVal.Write(buffer, 0, 2);
                        }
                    }
                }
                // Illegal =, just leave as it is
                else
                {
                    msRetVal.Write(buffer, 0, nCount);
                }
            }
            // Just write back all other bytes
            else
            {
                msRetVal.WriteByte((byte)b);
            }

            // Read next byte
            b = msSourceStream.ReadByte();
        }

        return msRetVal.ToArray();
    }

    private string quotedprintable(string data, string encoding)
    {
        data = data.Replace("=\r\n", "");
        for (int position = -1; (position = data.IndexOf("=", position + 1)) != -1;)
        {
            string leftpart = data.Substring(0, position);
            System.Collections.ArrayList hex = new System.Collections.ArrayList();
            hex.Add(data.Substring(1 + position, 2));
            while (position + 3 < data.Length && data.Substring(position + 3, 1) == "=")
            {
                position = position + 3;
                hex.Add(data.Substring(1 + position, 2));
            }
            byte[] bytes = new byte[hex.Count];
            for (int i = 0; i < hex.Count; i++)
            {
                bytes[i] = System.Convert.ToByte(new string(((string)hex[i]).ToCharArray()), 16);
            }
            string equivalent = System.Text.Encoding.GetEncoding(encoding).GetString(bytes);
            string rightpart = data.Substring(position + 3);
            data = leftpart + equivalent + rightpart;
        }
        return data;
    }

كنت أبحث عن حل ديناميكي وقضى يومين في محاولة حلول مختلفة. سيدعم هذا الحل الأحرف اليابانية وغيرها من مجموعات الأحرف القياسية

private static string Decode(string input, string bodycharset) {
        var i = 0;
        var output = new List<byte>();
        while (i < input.Length) {
            if (input[i] == '=' && input[i + 1] == '\r' && input[i + 2] == '\n') {
                //Skip
                i += 3;
            } else if (input[i] == '=') {
                string sHex = input;
                sHex = sHex.Substring(i + 1, 2);
                int hex = Convert.ToInt32(sHex, 16);
                byte b = Convert.ToByte(hex);
                output.Add(b);
                i += 3;
            } else {
                output.Add((byte)input[i]);
                i++;
            }
        }


        if (String.IsNullOrEmpty(bodycharset))
            return Encoding.UTF8.GetString(output.ToArray());
        else {
            if (String.Compare(bodycharset, "ISO-2022-JP", true) == 0)
                return Encoding.GetEncoding("Shift_JIS").GetString(output.ToArray());
            else
                return Encoding.GetEncoding(bodycharset).GetString(output.ToArray());
        }

    }

ثم يمكنك استدعاء الوظيفة مع

Decode("=E3=82=AB=E3=82=B9=E3", "utf-8")

تم العثور عليه في الأصل هنا

الوحيد الذي عمل بالنسبة لي.

http://sourceforge.net/apps/trac/syncmldott/wiki/quoted٪20Printable.

إذا كنت بحاجة فقط إلى فك تشفير QPS، اسحبها داخل التعليمات البرمجية هذه الوظائف الثلاث من الرابط أعلاه:

    HexDecoderEvaluator(Match m)
    HexDecoder(string line)
    Decode(string encodedText)

ثم فقط:

var humanReadable = Decode(myQPString);

استمتع

حل أفضل

    private static string DecodeQuotedPrintables(string input, string charSet)
    {
        try
        {
            enc = Encoding.GetEncoding(CharSet);
        }
        catch
        {
            enc = new UTF8Encoding();
        }

        var occurences = new Regex(@"(=[0-9A-Z]{2}){1,}", RegexOptions.Multiline);
        var matches = occurences.Matches(input);

    foreach (Match match in matches)
    {
            try
            {
                byte[] b = new byte[match.Groups[0].Value.Length / 3];
                for (int i = 0; i < match.Groups[0].Value.Length / 3; i++)
                {
                    b[i] = byte.Parse(match.Groups[0].Value.Substring(i * 3 + 1, 2), System.Globalization.NumberStyles.AllowHexSpecifier);
                }
                char[] hexChar = enc.GetChars(b);
                input = input.Replace(match.Groups[0].Value, hexChar[0].ToString());
        }
            catch
            { ;}
        }
        input = input.Replace("=\r\n", "").Replace("=\n", "").Replace("?=", "");

        return input;
}

public static string DecodeQuotedPrintables(string input, Encoding encoding)
    {
        var regex = new Regex(@"\=(?<Symbol>[0-9A-Z]{2})", RegexOptions.Multiline);
        var matches = regex.Matches(input);
        var bytes = new byte[matches.Count];

        for (var i = 0; i < matches.Count; i++)
        {
            bytes[i] = Convert.ToByte(matches[i].Groups["Symbol"].Value, 16);
        }

        return encoding.GetString(bytes);
    }

في بعض الأحيان تتألف السلسلة في ملف EML عدة أجزاء مشفرة. هذه هي وظيفة لاستخدام طريقة Dave لهذه الحالات:

public string DecodeQP(string codedstring)
{
    Regex codified;

    codified=new Regex(@"=\?((?!\?=).)*\?=", RegexOptions.IgnoreCase);
    MatchCollection setMatches = codified.Matches(cadena);
    if(setMatches.Count > 0)
    {
        Attachment attdecode;
        codedstring= "";
        foreach (Match match in setMatches)
        {
            attdecode = Attachment.CreateAttachmentFromString("", match.Value);
            codedstring+= attdecode.Name;

        }                
    }
    return codedstring;
}

يرجى ملاحظة: حلول مع "Input.Replace" كلها عبر الإنترنت ولا تزال غير صحيحة.

انظر، إذا كان لديك رمز فك شفرة واحد ثم استخدام "استبدال", الكل سيتم استبدال الرموز في "الإدخال"، ثم سيتم كسر جميع فك التشفير التالية.

الحل الصحيح الحل:

public static string DecodeQuotedPrintable(string input, string charSet)
    {

        Encoding enc;

        try
        {
            enc = Encoding.GetEncoding(charSet);
        }
        catch
        {
            enc = new UTF8Encoding();
        }

        input = input.Replace("=\r\n=", "=");
        input = input.Replace("=\r\n ", "\r\n ");
        input = input.Replace("= \r\n", " \r\n");
        var occurences = new Regex(@"(=[0-9A-Z]{2})", RegexOptions.Multiline); //{1,}
        var matches = occurences.Matches(input);

        foreach (Match match in matches)
        {
            try
            {
                byte[] b = new byte[match.Groups[0].Value.Length / 3];
                for (int i = 0; i < match.Groups[0].Value.Length / 3; i++)
                {
                    b[i] = byte.Parse(match.Groups[0].Value.Substring(i * 3 + 1, 2), System.Globalization.NumberStyles.AllowHexSpecifier);
                }
                char[] hexChar = enc.GetChars(b);
                input = input.Replace(match.Groups[0].Value, new String(hexChar));

            }
            catch
            { Console.WriteLine("QP dec err"); }
        }
        input = input.Replace("?=", ""); //.Replace("\r\n", "");

        return input;
    }

مرخصة بموجب: CC-BY-SA مع الإسناد

لا تنتمي إلى StackOverflow