kernel32: Use the Rtl UTF8 conversion functions.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
2024-11-01 07:37:02 +00:00 · 2019-12-03 12:34:00 +01:00 · 2019-12-03 12:34:00 +01:00 · d33504b9bb
commit d33504b9bb
parent f46fa9c92d
3 changed files with 141 additions and 37 deletions
--- a/dlls/kernel32/locale.c
+++ b/dlls/kernel32/locale.c
@ -1030,6 +1030,32 @@ static int utf7_mbstowcs(const char *src, int srclen, WCHAR *dst, int dstlen)
    return dest_index;
 }

+static int mbstowcs_utf8( DWORD flags, LPCSTR src, INT srclen, LPWSTR dst, INT dstlen )
+{
+    DWORD reslen;
+    NTSTATUS status;
+
+    if (flags & ~MB_FLAGSMASK)
+    {
+        SetLastError( ERROR_INVALID_FLAGS );
+        return 0;
+    }
+    if (!dstlen) dst = NULL;
+    status = RtlUTF8ToUnicodeN( dst, dstlen * sizeof(WCHAR), &reslen, src, srclen );
+    if (status == STATUS_SOME_NOT_MAPPED)
+    {
+        if (flags & MB_ERR_INVALID_CHARS)
+        {
+            SetLastError( ERROR_NO_UNICODE_TRANSLATION );
+            return 0;
+        }
+    }
+    else if (!set_ntstatus( status )) reslen = 0;
+
+    return reslen / sizeof(WCHAR);
+}
+
+
 /***********************************************************************
 *              MultiByteToWideChar   (KERNEL32.@)
 *
@ -1085,24 +1111,19 @@ INT WINAPI MultiByteToWideChar( UINT page, DWORD flags, LPCSTR src, INT srclen,
        }
        ret = utf7_mbstowcs( src, srclen, dst, dstlen );
        break;
+    case CP_UTF8:
+        return mbstowcs_utf8( flags, src, srclen, dst, dstlen );
    case CP_UNIXCP:
        if (unix_cptable)
        {
            ret = wine_cp_mbstowcs( unix_cptable, flags, src, srclen, dst, dstlen );
            break;
        }
-#ifdef __APPLE__
-        flags |= MB_COMPOSITE;  /* work around broken Mac OS X filesystem that enforces decomposed Unicode */
+        ret = mbstowcs_utf8( flags, src, srclen, dst, dstlen );
+#ifdef __APPLE__  /* work around broken Mac OS X filesystem that enforces decomposed Unicode */
+        if (ret && dstlen) ret = wine_compose_string( dst, ret );
 #endif
-        /* fall through */
-    case CP_UTF8:
-        if (flags & ~MB_FLAGSMASK)
-        {
-            SetLastError( ERROR_INVALID_FLAGS );
-            return 0;
-        }
-        ret = wine_utf8_mbstowcs( flags, src, srclen, dst, dstlen );
-        break;
+        return ret;
    default:
        if (!(table = get_codepage_table( page )))
        {
@ -1254,6 +1275,30 @@ static int utf7_wcstombs(const WCHAR *src, int srclen, char *dst, int dstlen)
    return dest_index;
 }

+static int wcstombs_utf8( DWORD flags, LPCWSTR src, INT srclen, LPSTR dst, INT dstlen )
+{
+    DWORD reslen;
+    NTSTATUS status;
+
+    if (flags & ~WC_FLAGSMASK)
+    {
+        SetLastError( ERROR_INVALID_FLAGS );
+        return 0;
+    }
+    if (!dstlen) dst = NULL;
+    status = RtlUnicodeToUTF8N( dst, dstlen, &reslen, src, srclen * sizeof(WCHAR) );
+    if (status == STATUS_SOME_NOT_MAPPED)
+    {
+        if (flags & WC_ERR_INVALID_CHARS)
+        {
+            SetLastError( ERROR_NO_UNICODE_TRANSLATION );
+            return 0;
+        }
+    }
+    else if (!set_ntstatus( status )) reslen = 0;
+    return reslen;
+}
+
 /***********************************************************************
 *              WideCharToMultiByte   (KERNEL32.@)
 *
@ -1329,26 +1374,17 @@ INT WINAPI WideCharToMultiByte( UINT page, DWORD flags, LPCWSTR src, INT srclen,
            ret = wine_cp_wcstombs( unix_cptable, flags, src, srclen, dst, dstlen,
                                    defchar, used ? &used_tmp : NULL );
            if (used) *used = used_tmp;
+            break;
        }
-        else
-        {
-            ret = wine_utf8_wcstombs( flags, src, srclen, dst, dstlen );
-            if (used) *used = FALSE;
-        }
-        break;
+        if (used) *used = FALSE;
+        return wcstombs_utf8( flags, src, srclen, dst, dstlen );
    case CP_UTF8:
        if (defchar || used)
        {
            SetLastError( ERROR_INVALID_PARAMETER );
            return 0;
        }
-        if (flags & ~WC_FLAGSMASK)
-        {
-            SetLastError( ERROR_INVALID_FLAGS );
-            return 0;
-        }
-        ret = wine_utf8_wcstombs( flags, src, srclen, dst, dstlen );
-        break;
+        return wcstombs_utf8( flags, src, srclen, dst, dstlen );
    default:
        if (!(table = get_codepage_table( page )))
        {
--- a/dlls/msvcrt/tests/file.c
+++ b/dlls/msvcrt/tests/file.c
@ -1336,9 +1336,9 @@ static void test_file_write_read( void )
      /* test invalid utf8 sequence */
      lseek(tempfd, 5, SEEK_SET);
      ret = _read(tempfd, btext, sizeof(btext));
-      todo_wine ok(ret == 10, "_read returned %d, expected 10\n", ret);
+      ok(ret == 10, "_read returned %d, expected 10\n", ret);
      /* invalid char should be replaced by U+FFFD in MultiByteToWideChar */
-      todo_wine ok(!memcmp(btext, "\xfd\xff", 2), "invalid UTF8 character was not replaced by U+FFFD\n");
+      ok(!memcmp(btext, "\xfd\xff", 2), "invalid UTF8 character was not replaced by U+FFFD\n");
      ok(!memcmp(btext+ret-8, "\x62\x00\x7c\x01\x0d\x00\x0a\x00", 8), "btext is incorrect\n");
      _close(tempfd);
  }
--- a/dlls/ntdll/tests/rtlstr.c
+++ b/dlls/ntdll/tests/rtlstr.c
@ -2028,8 +2028,8 @@ static const struct unicode_to_utf8_test unicode_to_utf8[] = {
    { { '-',0xfeff,'-',0xfffe,'-',0 }, "-\xEF\xBB\xBF-\xEF\xBF\xBE-", STATUS_SUCCESS },
    { { 0xfeff,'-',0 }, "\xEF\xBB\xBF-", STATUS_SUCCESS },
    { { 0xfffe,'-',0 }, "\xEF\xBF\xBE-", STATUS_SUCCESS },
-    /* invalid code point */
-    { { 0xffff,'-',0 }, "\xEF\xBF\xBF-", STATUS_SUCCESS },
+    /* invalid code points */
+    { { 0xfffd, '-', 0xfffe, '-', 0xffff,'-',0 }, "\xEF\xBF\xBD-\xEF\xBF\xBE-\xEF\xBF\xBF-", STATUS_SUCCESS },
    /* canonically equivalent representations -- no normalization should happen */
    { { '-',0x1e09,'-',0 }, "-\xE1\xB8\x89-", STATUS_SUCCESS },
    { { '-',0x0107,0x0327,'-',0 }, "-\xC4\x87\xCC\xA7-", STATUS_SUCCESS },
@ -2086,7 +2086,7 @@ static void test_RtlUnicodeToUTF8N(void)
    const unsigned char special_expected[] = { 'X',0xc2,0x80,0xef,0xbf,0xbd,0 };
    unsigned int input_len;
    const unsigned int test_count = ARRAY_SIZE(unicode_to_utf8);
-    unsigned int i;
+    unsigned int i, ret;

    if (!pRtlUnicodeToUTF8N)
    {
@ -2227,6 +2227,14 @@ static void test_RtlUnicodeToUTF8N(void)
           i, bytes_out, buffer, unicode_to_utf8[i].expected);
        ok(buffer[bytes_out] == 0x55,
           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = WideCharToMultiByte( CP_UTF8, 0, unicode_to_utf8[i].unicode, lstrlenW(unicode_to_utf8[i].unicode),
+                                   buffer, sizeof(buffer), NULL, NULL );
+        ok( ret == strlen(unicode_to_utf8[i].expected), "(test %d): wrong len %u\n", i, ret );
+        ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
+           "(test %d): got \"%.*s\", expected \"%s\"\n",
+           i, ret, buffer, unicode_to_utf8[i].expected);
+        ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);

        /* same test but include the null terminator */
        bytes_out = 0x55555555;
@ -2245,6 +2253,30 @@ static void test_RtlUnicodeToUTF8N(void)
           i, bytes_out, buffer, unicode_to_utf8[i].expected);
        ok(buffer[bytes_out] == 0x55,
           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = WideCharToMultiByte( CP_UTF8, 0, unicode_to_utf8[i].unicode, -1, buffer, sizeof(buffer), NULL, NULL );
+        ok( ret == strlen(unicode_to_utf8[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
+        ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
+           "(test %d): got \"%.*s\", expected \"%s\"\n",
+           i, ret, buffer, unicode_to_utf8[i].expected);
+        ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);
+        SetLastError( 0xdeadbeef );
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = WideCharToMultiByte( CP_UTF8, WC_ERR_INVALID_CHARS, unicode_to_utf8[i].unicode, -1,
+                                   buffer, sizeof(buffer), NULL, NULL );
+        if (unicode_to_utf8[i].status == STATUS_SOME_NOT_MAPPED)
+        {
+            ok( ret == 0, "(test %d): wrong len %u\n", i, ret );
+            ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "(test %d): wrong error %u\n", i, GetLastError() );
+            ret = strlen(unicode_to_utf8[i].expected) + 1;
+        }
+        else
+            ok( ret == strlen(unicode_to_utf8[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
+
+        ok(!memcmp(buffer, unicode_to_utf8[i].expected, ret),
+           "(test %d): got \"%.*s\", expected \"%s\"\n",
+           i, ret, buffer, unicode_to_utf8[i].expected);
+        ok(buffer[ret] == 0x55, "(test %d): behind string: 0x%x\n", i, buffer[ret]);
    }
 }

@ -2329,9 +2361,8 @@ static const struct utf8_to_unicode_test utf8_to_unicode[] = {
    { "-\xEF\xBB\xBF-\xEF\xBF\xBE-", { '-',0xfeff,'-',0xfffe,'-',0 }, STATUS_SUCCESS },
    { "\xEF\xBB\xBF-", { 0xfeff,'-',0 }, STATUS_SUCCESS },
    { "\xEF\xBF\xBE-", { 0xfffe,'-',0 }, STATUS_SUCCESS },
-    /* invalid code point */
-       /* 0xffff */
-    { "\xEF\xBF\xBF-", { 0xffff,'-',0 }, STATUS_SUCCESS },
+    /* invalid code points */
+    { "\xEF\xBF\xBD-\xEF\xBF\xBE-\xEF\xBF\xBF-", { 0xfffd,'-',0xfffe,'-',0xffff,'-',0 }, STATUS_SUCCESS },
    /* canonically equivalent representations -- no normalization should happen */
    { "-\xE1\xB8\x89-", { '-',0x1e09,'-',0 }, STATUS_SUCCESS },
    { "-\xC4\x87\xCC\xA7-", { '-',0x0107,0x0327,'-',0 }, STATUS_SUCCESS },
@ -2388,7 +2419,7 @@ static void test_RtlUTF8ToUnicodeN(void)
    const WCHAR special_expected[] = { 'X',0x80,0xd800,0xdc00,0 };
    unsigned int input_len;
    const unsigned int test_count = ARRAY_SIZE(utf8_to_unicode);
-    unsigned int i;
+    unsigned int i, ret;

    if (!pRtlUTF8ToUnicodeN)
    {
@ -2497,8 +2528,17 @@ static void test_RtlUTF8ToUnicodeN(void)
        ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out),
           "(test %d): got %s, expected %s\n",
           i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected));
-        ok(buffer[bytes_out] == 0x5555,
-           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+        ok(buffer[bytes_out / sizeof(WCHAR)] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out / sizeof(WCHAR)]);
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = MultiByteToWideChar( CP_UTF8, 0, utf8_to_unicode[i].utf8, strlen(utf8_to_unicode[i].utf8),
+                                   buffer, ARRAY_SIZE(buffer) );
+        ok( ret == lstrlenW(utf8_to_unicode[i].expected), "(test %d): wrong len %u\n", i, ret );
+        ok(!memcmp(buffer, utf8_to_unicode[i].expected, lstrlenW(utf8_to_unicode[i].expected) * sizeof(WCHAR)),
+           "(test %d): got %s, expected %s\n",
+           i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
+        ok(buffer[ret] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[ret]);

        /* same test but include the null terminator */
        bytes_out = 0x55555555;
@ -2515,8 +2555,36 @@ static void test_RtlUTF8ToUnicodeN(void)
        ok(!memcmp(buffer, utf8_to_unicode[i].expected, bytes_out),
           "(test %d): got %s, expected %s\n",
           i, wine_dbgstr_wn(buffer, bytes_out / sizeof(WCHAR)), wine_dbgstr_w(utf8_to_unicode[i].expected));
-        ok(buffer[bytes_out] == 0x5555,
-           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out]);
+        ok(buffer[bytes_out / sizeof(WCHAR)] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[bytes_out / sizeof(WCHAR)]);
+
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = MultiByteToWideChar( CP_UTF8, 0, utf8_to_unicode[i].utf8, -1, buffer, ARRAY_SIZE(buffer) );
+        ok( ret == lstrlenW(utf8_to_unicode[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
+        ok(!memcmp(buffer, utf8_to_unicode[i].expected, ret * sizeof(WCHAR)),
+           "(test %d): got %s, expected %s\n",
+           i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
+        ok(buffer[ret] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[ret]);
+
+        SetLastError( 0xdeadbeef );
+        memset(buffer, 0x55, sizeof(buffer));
+        ret = MultiByteToWideChar( CP_UTF8, MB_ERR_INVALID_CHARS,
+                                   utf8_to_unicode[i].utf8, -1, buffer, ARRAY_SIZE(buffer) );
+        if (utf8_to_unicode[i].status == STATUS_SOME_NOT_MAPPED)
+        {
+            ok( ret == 0, "(test %d): wrong len %u\n", i, ret );
+            ok( GetLastError() == ERROR_NO_UNICODE_TRANSLATION, "(test %d): wrong error %u\n", i, GetLastError() );
+            ret = lstrlenW(utf8_to_unicode[i].expected) + 1;
+        }
+        else
+            ok( ret == lstrlenW(utf8_to_unicode[i].expected) + 1, "(test %d): wrong len %u\n", i, ret );
+
+        ok(!memcmp(buffer, utf8_to_unicode[i].expected, ret * sizeof(WCHAR)),
+           "(test %d): got %s, expected %s\n",
+           i, wine_dbgstr_wn(buffer, ret), wine_dbgstr_w(utf8_to_unicode[i].expected));
+        ok(buffer[ret] == 0x5555,
+           "(test %d): behind string: 0x%x\n", i, buffer[ret]);
    }
 }