I have a problem converting UTF8 encoded char array to CP1250 encoded char array.

I tried making a static iconv lib, but when compiling my UTF8->CP1250 code it always fail with

1>foo.obj : error LNK2001: unresolved external symbol libiconv_close
1>foo.obj : error LNK2001: unresolved external symbol libiconv_open
1>foo.obj : error LNK2001: unresolved external symbol libiconv

I also tried a lot of compiled static libraries, but always the same error.

Doesn't matter if /MD or /MT is set.

I do not insist on using iconv. Is there any other simple solution for this problem? Thanks

有帮助吗?

解决方案

Since you're coding in Windows, why not use the Windows API. Use MultiByteToWideChar to convert losslessly up to UTF-16. Then use WideCharToMultiByte to convert down to e.g. CP 1250.


Addendum: the code below seems to work OK.

#include <assert.h>             // assert
#include <iostream>             // std::wcout, std::wcerr
#include <iterator>             // std::begin, std::end
#include <string>               // std::string, std::wstring
#include <stddef.h>             // ptrdiff_t
#include <stdexcept>            // std::system_error
#include <stdlib.h>             // EXIT_SUCCESS
#include <string.h>             // memcmp
#include <system_error>         // std::system_error etc.

// Example of how to include Microsoft's <windows.h>.
// More support stuff is generally needed for more serious code.
#undef UNICODE
#define UNICODE
#undef NOMINMAX
#define NOMINMAX
#undef STRICT
#define STRICT
#include <windows.h>            // E_FAIL, etc.

namespace cppx {
    using std::begin;
    using std::end;
    using std::error_code;
    using std::string;
    using std::system_error;
    using std::system_category;

    typedef unsigned char   Byte;
    typedef ptrdiff_t       Size;

    template< class Type >
    auto size( Type const& c )
        -> Size
    { return end( c ) - begin( c ); }

    auto hopefully( bool const condition ) -> bool { return condition; }

    auto fail(
        string const&   message,
        int const       code    = 0
        )
        -> bool
    {
        throw system_error(
            error_code( code, system_category() ), message
            );
    }
}  // namespace cppx

namespace data {
    using cppx::Byte;
    using cppx::hopefully;
    using cppx::fail;
    using std::string;
    using std::wstring;

    char const utf8_bom[] = "\xEF\xBB\xBF";

    template< class Type, int n >
    auto dummy()
        -> Type&
    { static Type the_dummy; return the_dummy; }

    auto utf16_from_utf8( char const* bytes, int length )
        -> wstring
    {
        if( length >= 3 && ::memcmp( bytes, utf8_bom, 3 ) == 0 )
        {
            bytes += 3;  length -= 3;
        }

        assert( length >= 0 );
        if( length == 0 ) { return L""; }

        int const buffer_size = ::MultiByteToWideChar(
            CP_UTF8,
            0,                  // flags, must be 0 for UTF-8
            bytes,
            length,
            nullptr,            // output buffer
            0                   // buffer size
            );
        hopefully( buffer_size > 0 )
            || fail( "MultiByteToWideChar (1st call)", ::GetLastError() );

        wstring result( buffer_size, L'#' );
        int const n_encoding_values = ::MultiByteToWideChar(
            CP_UTF8,
            0,                  // flags, must be 0 for UTF-8
            bytes,
            length,
            &result[0],         // output buffer
            buffer_size
            );
        hopefully( n_encoding_values > 0 )
            || fail( "MultiByteToWideChar (2nd call)", ::GetLastError() );
        assert( n_encoding_values <= buffer_size );

        result.resize( n_encoding_values );     // Possible down-sizing.
        return result;
    }

    auto utf16_from_utf8( Byte const* const bytes, int const length )
        -> wstring
    {
        return utf16_from_utf8( reinterpret_cast<char const*>( bytes ), length );
    }

    auto sbcs_from(
        wchar_t const* const    s,
        int const               length,
        unsigned const          codepage            = ::GetACP(),
        bool&                   used_default_char   = dummy<bool, 1>()
        )
        -> string
    {
        assert( codepage != CP_UTF8 );
        if( length == 0 ) { return ""; }

        DWORD const         flags           = WC_NO_BEST_FIT_CHARS;     // Must be 0 for UTF-8.
        char const* const   default_char    = "\x7F";                   // ASCII DEL

        int const buffer_size = WideCharToMultiByte(
            codepage,
            flags,
            s,
            length,
            nullptr,            // buffer
            0,                  // buffer size
            default_char,
            nullptr
            );
        hopefully( buffer_size > 0 )
            || fail( "WideCharToMultiByte (1st call)", ::GetLastError() );

        string  result              = string( buffer_size, '#' );
        BOOL    defaulted           = false;
        int const n_bytes = WideCharToMultiByte(
            codepage,
            flags,
            s,
            length,
            &result[0],         // buffer
            buffer_size,
            default_char,
            &defaulted
            );
        hopefully( n_bytes > 0 )
            || fail( "WideCharToMultiByte (2nd call)", ::GetLastError() );
        assert( n_bytes <= buffer_size );

        result.resize( n_bytes );
        used_default_char = !!defaulted;
        return result;
    }

    auto sbcs_from(
        wstring const&          s,
        unsigned const          codepage            = ::GetACP(),
        bool&                   used_default_char   = dummy<bool, 1>()
        )
        -> string
    {
        if( s.length() == 0 ) { return ""; }
        return sbcs_from( &s[0], s.length(), codepage, used_default_char );
    }

}  // namespace data

void cpp_main()
{
    using cppx::Byte;
    using cppx::fail;
    using cppx::size;
    using std::string;
    using std::wstring;

    auto const infobox      = MB_ICONINFORMATION | MB_SETFOREGROUND;
    Byte const utf8_bytes[] = // UTF-8 with BOM, as if from a file.
    {
        0xEF, 0xBB, 0xBF, 0x42, 0x6C, 0xC3, 0xA5, 0x62, 0xC3, 0xA6,
        0x72, 0x73, 0x79, 0x6C, 0x74, 0x65, 0x74, 0xC3, 0xB8, 0x79,
        0x21, 0x20, 0xE6, 0x97, 0xA5, 0xE6, 0x9C, 0xAC, 0xE5, 0x9B,
        0xBD, 0x20, 0xD0, 0xBA, 0xD0, 0xBE, 0xD1, 0x88, 0xD0, 0xBA,
        0xD0, 0xB0, 0x21, 0x0D, 0x0A, 0x0D, 0x0A, 0x48, 0x75, 0x6E,
        0x67, 0x61, 0x72, 0x69, 0x61, 0x6E, 0x20, 0x61, 0x6C, 0x70,
        0x68, 0x61, 0x62, 0x65, 0x74, 0x3A, 0x0D, 0x0A, 0x41, 0x20,
        0xC3, 0x81, 0x20, 0x42, 0x20, 0x43, 0x20, 0x43, 0x73, 0x20,
        0x44, 0x20, 0x44, 0x7A, 0x20, 0x44, 0x7A, 0x73, 0x20, 0x45,
        0x20, 0xC3, 0x89, 0x20, 0x46, 0x20, 0x47, 0x20, 0x47, 0x79,
        0x20, 0x48, 0x20, 0x49, 0x20, 0xC3, 0x8D, 0x20, 0x4A, 0x20,
        0x4B, 0x20, 0x4C, 0x20, 0x4C, 0x79, 0x20, 0x4D, 0x20, 0x4E,
        0x0D, 0x0A, 0x4E, 0x79, 0x20, 0x4F, 0x20, 0xC3, 0x93, 0x20,
        0xC3, 0x96, 0x20, 0xC5, 0x90, 0x20, 0x50, 0x20, 0x28, 0x51,
        0x29, 0x20, 0x52, 0x20, 0x53, 0x20, 0x53, 0x7A, 0x20, 0x54,
        0x20, 0x54, 0x79, 0x20, 0x55, 0x20, 0xC3, 0x9A, 0x20, 0xC3,
        0x9C, 0x20, 0xC5, 0xB0, 0x20, 0x56, 0x20, 0x28, 0x57, 0x29,
        0x20, 0x28, 0x58, 0x29, 0x20, 0x28, 0x59, 0x29, 0x20, 0x5A,
        0x20, 0x5A, 0x73, 0x0D, 0x0A
    };

    wstring const text = data::utf16_from_utf8( utf8_bytes, size( utf8_bytes ) );
    ::MessageBox( 0, text.c_str(), L"Original text:", infobox );

    string const    sbcs_text           = data::sbcs_from( text, 1250 );

    WORD const      hungarian           = MAKELANGID(
        LANG_HUNGARIAN, SUBLANG_HUNGARIAN_HUNGARY
        );
    DWORD const     hungarian_locale_id = MAKELCID( hungarian, SORT_DEFAULT );

    SetThreadLocale( hungarian_locale_id )
        || fail( "SetThreadLocale", ::GetLastError() );
    DWORD thread_cp = 0;
    ::GetLocaleInfo(
        ::GetThreadLocale(),    // Not LOCALE_USER_DEFAULT,
        LOCALE_IDEFAULTANSICODEPAGE | LOCALE_RETURN_NUMBER,
        reinterpret_cast<wchar_t*>( &thread_cp ),
        sizeof( thread_cp )/sizeof( wchar_t )
        )
        || fail( "GetLocaleInfo", ::GetLastError() );
    //::MessageBox( 0, std::to_wstring( thread_cp ).c_str(), L"Codepage:", MB_SETFOREGROUND );
    assert( thread_cp == 1250 );
    ::MessageBoxA( 0, sbcs_text.c_str(), "SBCS codepage 1250 text:", infobox );
}

auto main()
    -> int
{
    using namespace std;
    try
    {
        cpp_main();
        return EXIT_SUCCESS;
    }
    catch( system_error const& x )
    {
        auto const code = x.code().value();
        cerr << "!" << x.what() << " (code: " << code << ")" << endl;
        return code;
    }
    catch( exception const& x )
    {
        cerr << "!" << x.what() << endl;
        return E_FAIL;
    }
}

Example output (Windows messageboxes):

enter image description here enter image description here

And yes, the dummy function is an abomination, unsound and ungood, but hey. :)

许可以下: CC-BY-SA归因
不隶属于 StackOverflow
scroll top