kernel32: Use the Rtl UTF8 conversion functions.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2019-12-03 12:34:00 +01:00
parent f46fa9c92d
commit d33504b9bb
3 changed files with 141 additions and 37 deletions

View file

@ -1030,6 +1030,32 @@ static int utf7_mbstowcs(const char *src, int srclen, WCHAR *dst, int dstlen)
return dest_index;
}
static int mbstowcs_utf8( DWORD flags, LPCSTR src, INT srclen, LPWSTR dst, INT dstlen )
{
DWORD reslen;
NTSTATUS status;
if (flags & ~MB_FLAGSMASK)
{
SetLastError( ERROR_INVALID_FLAGS );
return 0;
}
if (!dstlen) dst = NULL;
status = RtlUTF8ToUnicodeN( dst, dstlen * sizeof(WCHAR), &reslen, src, srclen );
if (status == STATUS_SOME_NOT_MAPPED)
{
if (flags & MB_ERR_INVALID_CHARS)
{
SetLastError( ERROR_NO_UNICODE_TRANSLATION );
return 0;
}
}
else if (!set_ntstatus( status )) reslen = 0;
return reslen / sizeof(WCHAR);
}
/***********************************************************************
* MultiByteToWideChar (KERNEL32.@)
*
@ -1085,24 +1111,19 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
}
ret = utf7_mbstowcs( src, srclen, dst, dstlen );
break;
case CP_UTF8:
return mbstowcs_utf8( flags, src, srclen, dst, dstlen );
case CP_UNIXCP:
if (unix_cptable)
{
ret = wine_cp_mbstowcs( unix_cptable, flags, src, srclen, dst, dstlen );
break;
}
#ifdef __APPLE__
flags |= MB_COMPOSITE; /* work around broken Mac OS X filesystem that enforces decomposed Unicode */
ret = mbstowcs_utf8( flags, src, srclen, dst, dstlen );
#ifdef __APPLE__ /* work around broken Mac OS X filesystem that enforces decomposed Unicode */
if (ret && dstlen) ret = wine_compose_string( dst, ret );
#endif
/* fall through */
case CP_UTF8:
if (flags & ~MB_FLAGSMASK)
{
SetLastError( ERROR_INVALID_FLAGS );
return 0;
}
ret = wine_utf8_mbstowcs( flags, src, srclen, dst, dstlen );
break;
return ret;
default:
if (!(table = get_codepage_table( page )))
{
@ -1254,6 +1275,30 @@ static int utf7_wcstombs(const WCHAR *src, int srclen, char *dst, int dstlen)
return dest_index;
}
static int wcstombs_utf8( DWORD flags, LPCWSTR src, INT srclen, LPSTR dst, INT dstlen )
{
DWORD reslen;
NTSTATUS status;
if (flags & ~WC_FLAGSMASK)
{
SetLastError( ERROR_INVALID_FLAGS );
return 0;
}
if (!dstlen) dst = NULL;
status = RtlUnicodeToUTF8N( dst, dstlen, &reslen, src, srclen * sizeof(WCHAR) );
if (status == STATUS_SOME_NOT_MAPPED)
{
if (flags & WC_ERR_INVALID_CHARS)
{
SetLastError( ERROR_NO_UNICODE_TRANSLATION );
return 0;
}
}
else if (!set_ntstatus( status )) reslen = 0;
return reslen;
}
/***********************************************************************
* WideCharToMultiByte (KERNEL32.@)
*
@ -1329,26 +1374,17 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,
ret = wine_cp_wcstombs( unix_cptable, flags, src, srclen, dst, dstlen,
defchar, used ? &used_tmp : NULL );
if (used) *used = used_tmp;
break;
}
else
{
ret = wine_utf8_wcstombs( flags, src, srclen, dst, dstlen );
if (used) *used = FALSE;
}
break;
if (used) *used = FALSE;
return wcstombs_utf8( flags, src, srclen, dst, dstlen );
case CP_UTF8:
if (defchar || used)
{
SetLastError( ERROR_INVALID_PARAMETER );
return 0;
}
if (flags & ~WC_FLAGSMASK)
{
SetLastError( ERROR_INVALID_FLAGS );
return 0;
}
ret = wine_utf8_wcstombs( flags, src, srclen, dst, dstlen );
break;
return wcstombs_utf8( flags, src, srclen, dst, dstlen );
default:
if (!(table = get_codepage_table( page )))
{

View file

@ -1336,9 +1336,9 @@ static void test_file_write_read( void )
/* test invalid utf8 sequence */
lseek(tempfd, 5, SEEK_SET);
ret = _read(tempfd, btext, sizeof(btext));
todo_wine ok(ret == 10, "_read returned %d, expected 10\n", ret);
ok(ret == 10, "_read returned %d, expected 10\n", ret);
/* invalid char should be replaced by U+FFFD in MultiByteToWideChar */
todo_wine ok(!memcmp(btext, "\xfd\xff", 2), "invalid UTF8 character was not replaced by U+FFFD\n");
ok(!memcmp(btext, "\xfd\xff", 2), "invalid UTF8 character was not replaced by U+FFFD\n");
ok(!memcmp(btext+ret-8, "\x62\x00\x7c\x01\x0d\x00\x0a\x00", 8), "btext is incorrect\n");
_close(tempfd);
}

View file

@ -2028,8 +2028,8 @@ static const struct unicode_to_utf8_test unicode_to_utf8[] = {
{ { '-',0xfeff,'-',0xfffe,'-',0 }, "-\xEF\xBB\xBF-\xEF\xBF\xBE-", STATUS_SUCCESS },
{ { 0xfeff,'-',0 }, "\xEF\xBB\xBF-", STATUS_SUCCESS },
{ { 0xfffe,'-',0 }, "\xEF\xBF\xBE-", STATUS_SUCCESS },
/* invalid code point */
{ { 0xffff,'-',0 }, "\xEF\xBF\xBF-", STATUS_SUCCESS },
/* invalid code points */
{ { 0xfffd, '-', 0xfffe, '-', 0xffff,'-',0 }, "\xEF\xBF\xBD-\xEF\xBF\xBE-\xEF\xBF\xBF-", STATUS_SUCCESS },
/* canonically equivalent representations -- no normalization should happen */
{ { '-',0x1e09,'-',0 }, "-\xE1\xB8\x89-", STATUS_SUCCESS },
{ { '-',0x0107,0x0327,'-',0 }, "-\xC4\x87\xCC\xA7-", STATUS_SUCCESS },
@ -2086,7 +2086,7 @@ static void test_RtlUnicodeToUTF8N(void)
const unsigned char special_expected[] = { 'X',0xc2,0x80,0xef,0xbf,0xbd,0 };
unsigned int input_len;
const unsigned int test_count = ARRAY_SIZE(unicode_to_utf8);
unsigned int i;
unsigned int i, ret;
if (!pRtlUnicodeToUTF8N)
{
@ -2227,6 +2227,14 @@ static void test_RtlUnicodeToUTF8N(void)
i, bytes_out, buffer, unicode_to_utf8[i].expected);
ok(buffer[bytes_out] == 0x55,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
memset(buffer, 0x55, sizeof(buffer));
ret = WideCharToMultiByte( CP_UTF8, 0, unicode_to_utf8[i].unicode, lstrlenW(unicode_to_utf8[i].unicode),
buffer, sizeof(buffer), NULL, NULL );
ok( ret == strlen(unicode_to_utf8[i].expected), "(test %d): wrong len %u\n", i, ret );
ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
"(test %d): got \"%.*s\", expected \"%s\"\n",
i, ret, buffer, unicode_to_utf8[i].expected);
ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);
/* same test but include the null terminator */
bytes_out = 0x55555555;
@ -2245,6 +2253,30 @@ static void test_RtlUnicodeToUTF8N(void)
i, bytes_out, buffer, unicode_to_utf8[i].expected);
ok(buffer[bytes_out] == 0x55,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
memset(buffer, 0x55, sizeof(buffer));
ret = WideCharToMultiByte( CP_UTF8, 0, unicode_to_utf8[i].unicode, -1, buffer, sizeof(buffer), NULL, NULL );
ok( ret == strlen(unicode_to_utf8[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
"(test %d): got \"%.*s\", expected \"%s\"\n",
i, ret, buffer, unicode_to_utf8[i].expected);
ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);
SetLastError( 0xdeadbeef );
memset(buffer, 0x55, sizeof(buffer));
ret = WideCharToMultiByte( CP_UTF8, WC_ERR_INVALID_CHARS, unicode_to_utf8[i].unicode, -1,
buffer, sizeof(buffer), NULL, NULL );
if (unicode_to_utf8[i].status == STATUS_SOME_NOT_MAPPED)
{
ok( ret == 0, "(test %d): wrong len %u\n", i, ret );
ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "(test %d): wrong error %u\n", i, GetLastError() );
ret = strlen(unicode_to_utf8[i].expected) + 1;
}
else
ok( ret == strlen(unicode_to_utf8[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
"(test %d): got \"%.*s\", expected \"%s\"\n",
i, ret, buffer, unicode_to_utf8[i].expected);
ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);
}
}
@ -2329,9 +2361,8 @@ static const struct utf8_to_unicode_test utf8_to_unicode[] = {
{ "-\xEF\xBB\xBF-\xEF\xBF\xBE-", { '-',0xfeff,'-',0xfffe,'-',0 }, STATUS_SUCCESS },
{ "\xEF\xBB\xBF-", { 0xfeff,'-',0 }, STATUS_SUCCESS },
{ "\xEF\xBF\xBE-", { 0xfffe,'-',0 }, STATUS_SUCCESS },
/* invalid code point */
/* 0xffff */
{ "\xEF\xBF\xBF-", { 0xffff,'-',0 }, STATUS_SUCCESS },
/* invalid code points */
{ "\xEF\xBF\xBD-\xEF\xBF\xBE-\xEF\xBF\xBF-", { 0xfffd,'-',0xfffe,'-',0xffff,'-',0 }, STATUS_SUCCESS },
/* canonically equivalent representations -- no normalization should happen */
{ "-\xE1\xB8\x89-", { '-',0x1e09,'-',0 }, STATUS_SUCCESS },
{ "-\xC4\x87\xCC\xA7-", { '-',0x0107,0x0327,'-',0 }, STATUS_SUCCESS },
@ -2388,7 +2419,7 @@ static void test_RtlUTF8ToUnicodeN(void)
const WCHAR special_expected[] = { 'X',0x80,0xd800,0xdc00,0 };
unsigned int input_len;
const unsigned int test_count = ARRAY_SIZE(utf8_to_unicode);
unsigned int i;
unsigned int i, ret;
if (!pRtlUTF8ToUnicodeN)
{
@ -2497,8 +2528,17 @@ static void test_RtlUTF8ToUnicodeN(void)
ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out),
"(test %d): got %s, expected %s\n",
i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected));
ok(buffer[bytes_out] == 0x5555,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
ok(buffer[bytes_out / sizeof(WCHAR)] == 0x5555,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out / sizeof(WCHAR)]);
memset(buffer, 0x55, sizeof(buffer));
ret = MultiByteToWideChar( CP_UTF8, 0, utf8_to_unicode[i].utf8, strlen(utf8_to_unicode[i].utf8),
buffer, ARRAY_SIZE(buffer) );
ok( ret == lstrlenW(utf8_to_unicode[i].expected), "(test %d): wrong len %u\n", i, ret );
ok(!memcmp(buffer, utf8_to_unicode[i].expected, lstrlenW(utf8_to_unicode[i].expected) * sizeof(WCHAR)),
"(test %d): got %s, expected %s\n",
i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
ok(buffer[ret] == 0x5555,
"(test %d): behind string: 0x%x\n", i, buffer[ret]);
/* same test but include the null terminator */
bytes_out = 0x55555555;
@ -2515,8 +2555,36 @@ static void test_RtlUTF8ToUnicodeN(void)
ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out),
"(test %d): got %s, expected %s\n",
i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected));
ok(buffer[bytes_out] == 0x5555,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
ok(buffer[bytes_out / sizeof(WCHAR)] == 0x5555,
"(test %d): behind string: 0x%x\n", i, buffer[bytes_out / sizeof(WCHAR)]);
memset(buffer, 0x55, sizeof(buffer));
ret = MultiByteToWideChar( CP_UTF8, 0, utf8_to_unicode[i].utf8, -1, buffer, ARRAY_SIZE(buffer) );
ok( ret == lstrlenW(utf8_to_unicode[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
ok(!memcmp(buffer, utf8_to_unicode[i].expected, ret * sizeof(WCHAR)),
"(test %d): got %s, expected %s\n",
i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
ok(buffer[ret] == 0x5555,
"(test %d): behind string: 0x%x\n", i, buffer[ret]);
SetLastError( 0xdeadbeef );
memset(buffer, 0x55, sizeof(buffer));
ret = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS,
utf8_to_unicode[i].utf8, -1, buffer, ARRAY_SIZE(buffer) );
if (utf8_to_unicode[i].status == STATUS_SOME_NOT_MAPPED)
{
ok( ret == 0, "(test %d): wrong len %u\n", i, ret );
ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "(test %d): wrong error %u\n", i, GetLastError() );
ret = lstrlenW(utf8_to_unicode[i].expected) + 1;
}
else
ok( ret == lstrlenW(utf8_to_unicode[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
ok(!memcmp(buffer, utf8_to_unicode[i].expected, ret * sizeof(WCHAR)),
"(test %d): got %s, expected %s\n",
i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
ok(buffer[ret] == 0x5555,
"(test %d): behind string: 0x%x\n", i, buffer[ret]);
}
}