سؤال

I'm having problems preserving newslines from a RichEdit control inside strings. What I'm doing is:

  1. Get text from RichEdit control
  2. Split everything delimited by a space
  3. Add some RTF formatting
  4. "Fuse" words back together
  5. Send text to control

I'm not sure what part causes this so here's the most relevant bits:

int RichEdit::GetTextLength() const
{
    GETTEXTLENGTHEX len;
    len.codepage = 1200;
    len.flags = GTL_NUMBYTES;
    return (int)SendMessage(this->handle, EM_GETTEXTLENGTHEX, (WPARAM)&len, 0) + 1;
}

tstring RichEdit::GetText() const
{
    auto len = this->GetTextLength();
    GETTEXTEX str;

    TCHAR* tmp = new TCHAR[len];
    str.cb = len;
    str.flags = GT_USECRLF;
    str.codepage = 1200;


    str.lpDefaultChar = NULL;
        str.lpUsedDefChar = NULL;

        (void)SendMessage(this->handle, EM_GETTEXTEX, (WPARAM)&str, (LPARAM)tmp);

        tstring ret(tmp);

        delete[] tmp;
        return ret;
    }

void RichEdit::SetRtfText(const tstring& text, int flags)
    {
        DWORD WideLength = text.length();
        DWORD Length     = WideLength * 4;
        PSTR Utf8        = (PSTR)malloc(Length);

        int ReturnedLength = WideCharToMultiByte(CP_UTF8,
            0,
            text.c_str(),
            WideLength-1,
            Utf8,
            Length-1,
            NULL,
            NULL);

        if (ReturnedLength)
            Utf8[ReturnedLength] = 0;

        SETTEXTEX st = {0};
        st.flags = flags;
        st.codepage = CP_UTF8;
        (void)SendMessage(this->handle, EM_SETTEXTEX, (WPARAM)&st, (LPARAM)Utf8 );

        free(Utf8);
    }

void split ( tstring input , tstring split_id, std::vector<std::pair<tstring,bool>>& res ) {
    std::vector<std::pair<tstring,bool>> result;
    int i = 0;
    bool add;
    tstring temp;
    std::wstringstream ss;
    size_t found;
    tstring real;
    int r = 0;
    while ( i != input.length() ) 
    {
        add = false;
        ss << input.at(i);
        temp = ss.str();

        found = temp.find(split_id);
        if ( found != tstring::npos ) 
        {
            add = true;
            real.append ( temp , 0 , found );
        } else if ( r > 0 &&  ( i+1 ) == input.length() ) 
        {
            add = true;
            real.append ( temp , 0 , found );
        }
        if ( add ) 
        {
            result.emplace_back(std::make_pair(real,false));
            ss.str(tstring());
            ss.clear();
            temp.clear();
            real.clear();
            r = 0;
        }
        i++;
        r++;
    }
    res = result;
}

ps: tstring is just a typedef for std::wstring/std::string

How can I preserve the newlines?

هل كانت مفيدة؟

المحلول

There are quite a few problems with your code.

Your code is TCHAR based, but you are not actually retrieving/setting the RTF data using TCHAR correctly.

When retreiving the text, you are normalizing line breaks to CRLF, but you are not doing that same normalizing when retreiving the text length, so they are going to be out of sync with each other.

You are writing data to the RichEdit using UTF-8, but RTF is an ASCII-based format that uses escape sequences for Unicode data. If you are going to retrieve data as Unicode, you may as well write it using Unicode as well, and make sure you are doing all of that correctly to begin with. Let the RichEdit control handle the Unicode for you.

Your use of WideCharToMultiByte() is wrong. You should not be subtracting -1 from the string lengths at all. You are likely trying to account for null terminators, but the length values do not include null terminators to begin with. If you are going to stick with UTF-8 then you should be using WideCharToMultiByte() to calculate the correct UTF-8 length instead of hard-coding it.

int Length = WideCharToMultiByte(CP_UTF8, 0, text.c_str(), text.length(), NULL, 0, NULL, NULL);
char Utf8 = new char[Length+1];
WideCharToMultiByte(CP_UTF8, 0, text.c_str(), text.length(), Utf8, Length, NULL, NULL);
Utf8[Length] = 0;
...
delete[] Utf8;

With that said, if you are going to stick with TCHAR then try this:

#ifdef UNICODE
#define RTFCodePage 1200
#else
#define RTFCodePage CP_ACP
#endif

int RichEdit::GetTextLength() const
{
    GETTEXTLENGTHEX len = {0};
    len.codepage = RTFCodePage;
    len.flags = GTL_NUMCHARS | GTL_USECRLF;
    return SendMessage(this->handle, EM_GETTEXTLENGTHEX, (WPARAM)&len, 0);
}

tstring RichEdit::GetText() const
{
    int len = this->GetTextLength() + 1;

    GETTEXTEX str = {0};
    str.cb = len * sizeof(TCHAR);
    str.flags = GT_USECRLF;
    str.codepage = RTFCodePage;

    vector<TCHAR> tmp(len);
    len = SendMessage(this->handle, EM_GETTEXTEX, (WPARAM)&str, (LPARAM)&tmp[0]);

    return tstring(&tmp[0], len-1);
}

void RichEdit::SetRtfText(const tstring& text, int flags)
{
    SETTEXTEX st = {0};
    st.flags = flags;
    st.codepage = RTFCodePage;

    #ifdef UNICODE
    st.flags |= ST_UNICODE;
    #endif

    SendMessage(this->handle, EM_SETTEXTEX, (WPARAM)&st, (LPARAM)text.c_str());
}

It would be better to drop TCHAR and just use Unicode for everything:

int RichEdit::GetTextLength() const
{
    GETTEXTLENGTHEX len = {0};
    len.codepage = 1200;
    len.flags = GTL_NUMCHARS | GTL_USECRLF;
    return SendMessage(this->handle, EM_GETTEXTLENGTHEX, (WPARAM)&len, 0);
}

wstring RichEdit::GetText() const
{
    int len = this->GetTextLength() + 1;

    GETTEXTEX str = {0};
    str.cb = len * sizeof(WCHAR);
    str.flags = GT_USECRLF;
    str.codepage = 1200;

    vector<WCHAR> tmp(len);
    len = SendMessage(this->handle, EM_GETTEXTEX, (WPARAM)&str, (LPARAM)&tmp[0]);

    return wstring(tmp, len-1);
}

void RichEdit::SetRtfText(const wstring& text, int flags)
{
    SETTEXTEX st = {0};
    st.flags = flags | ST_UNICODE;
    st.codepage = 1200;

    SendMessage(this->handle, EM_SETTEXTEX, (WPARAM)&st, (LPARAM)text.c_str());
}

Update: if you have to go back to UTF-8 for the EM_SETTEXTEX message then try this:

void RichEdit::SetRtfText(const tstring& text, int flags)
{
    string Utf8;
    int Length;

    #ifdef UNICODE

    Length = WideCharToMultiByte(CP_UTF8, 0, text.c_str(), text.length(), NULL, 0, NULL, NULL);
    if (Length > 0)
    {
        Utf8.resize(Length);
        WideCharToMultiByte(CP_UTF8, 0, text.c_str(), text.length(), &Utf8[0], Length, NULL, NULL);
    }

    #else

    Length = MultiByteToWideChar(CP_ACP, 0, text.c_str(), text.length(), NULL, 0);
    if (Length > 0)
    {
        vector<WCHAR> tmp(Length);
        MultiByteToWideChar(CP_ACP, 0, text.c_str(), text.length(), &tmp[0], Length);

        Length = WideCharToMultiByte(CP_UTF8, 0, tmp.c_str(), tmp.length(), NULL, 0, NULL, NULL);
        if (Length > 0)
        {
            Utf8.resize(Length);
            WideCharToMultiByte(CP_UTF8, 0, tmp.c_str(), tmp.length(), &Utf8[0], Length, NULL, NULL);
        }
    }

    #endif

    SETTEXTEX st = {0};
    st.flags = flags & ~ST_UNICODE;
    st.codepage = CP_UTF8;
    SendMessage(this->handle, EM_SETTEXTEX, (WPARAM)&st, (LPARAM)Utf8.c_str());
}
مرخصة بموجب: CC-BY-SA مع الإسناد
لا تنتمي إلى StackOverflow
scroll top