ntdll: Reimplement IdnToUnicode() using the normalization table and the ntdll helpers.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Alexandre Julliard 2020-03-02 16:28:48 +01:00
parent 01237d0896
commit ab9fe967f1
5 changed files with 148 additions and 175 deletions

View file

@ -544,28 +544,6 @@ INT WINAPI GetGeoInfoA(GEOID geoid, GEOTYPE geotype, LPSTR data, int data_len, L
}
enum {
BASE = 36,
TMIN = 1,
TMAX = 26,
SKEW = 38,
DAMP = 700,
INIT_BIAS = 72,
INIT_N = 128
};
static inline INT adapt(INT delta, INT numpoints, BOOL firsttime)
{
INT k;
delta /= (firsttime ? DAMP : 2);
delta += delta/numpoints;
for(k=0; delta>((BASE-TMIN)*TMAX)/2; k+=BASE)
delta /= BASE-TMIN;
return k+((BASE-TMIN+1)*delta)/(delta+SKEW);
}
/******************************************************************************
* IdnToAscii (KERNEL32.@)
*/
@ -589,154 +567,11 @@ INT WINAPI IdnToNameprepUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHA
/******************************************************************************
* IdnToUnicode (KERNEL32.@)
*/
INT WINAPI IdnToUnicode(DWORD dwFlags, LPCWSTR lpASCIICharStr, INT cchASCIIChar,
LPWSTR lpUnicodeCharStr, INT cchUnicodeChar)
INT WINAPI IdnToUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHAR *dst, INT dstlen )
{
INT i, label_start, label_end, out_label, out = 0;
WCHAR ch;
TRACE("%x %p %d %p %d\n", dwFlags, lpASCIICharStr, cchASCIIChar,
lpUnicodeCharStr, cchUnicodeChar);
for(label_start=0; label_start<cchASCIIChar;) {
INT n = INIT_N, pos = 0, old_pos, w, k, bias = INIT_BIAS, delim=0, digit, t;
out_label = out;
for(i=label_start; i<cchASCIIChar; i++) {
ch = lpASCIICharStr[i];
if(ch>0x7f || (i!=cchASCIIChar-1 && !ch)) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(!ch || ch=='.')
break;
if(ch == '-')
delim = i;
if((dwFlags&IDN_USE_STD3_ASCII_RULES) == 0)
continue;
if((ch>='a' && ch<='z') || (ch>='A' && ch<='Z')
|| (ch>='0' && ch<='9') || ch=='-')
continue;
SetLastError(ERROR_INVALID_NAME);
return 0;
}
label_end = i;
/* last label may be empty */
if(label_start==label_end && ch) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if((dwFlags&IDN_USE_STD3_ASCII_RULES) && (lpASCIICharStr[label_start]=='-' ||
lpASCIICharStr[label_end-1]=='-')) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(label_end-label_start > 63) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(label_end-label_start<4 ||
tolowerW(lpASCIICharStr[label_start])!='x' ||
tolowerW(lpASCIICharStr[label_start+1])!='n' ||
lpASCIICharStr[label_start+2]!='-' || lpASCIICharStr[label_start+3]!='-') {
if(label_end < cchASCIIChar)
label_end++;
if(!lpUnicodeCharStr) {
out += label_end-label_start;
}else if(out+label_end-label_start <= cchUnicodeChar) {
memcpy(lpUnicodeCharStr+out, lpASCIICharStr+label_start,
(label_end-label_start)*sizeof(WCHAR));
out += label_end-label_start;
}else {
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
label_start = label_end;
continue;
}
if(delim == label_start+3)
delim++;
if(!lpUnicodeCharStr) {
out += delim-label_start-4;
}else if(out+delim-label_start-4 <= cchUnicodeChar) {
memcpy(lpUnicodeCharStr+out, lpASCIICharStr+label_start+4,
(delim-label_start-4)*sizeof(WCHAR));
out += delim-label_start-4;
}else {
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
if(out != out_label)
delim++;
for(i=delim; i<label_end;) {
old_pos = pos;
w = 1;
for(k=BASE; ; k+=BASE) {
ch = i<label_end ? tolowerW(lpASCIICharStr[i++]) : 0;
if((ch<'a' || ch>'z') && (ch<'0' || ch>'9')) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
digit = ch<='9' ? ch-'0'+'z'-'a'+1 : ch-'a';
pos += digit*w;
t = k<=bias ? TMIN : k>=bias+TMAX ? TMAX : k-bias;
if(digit < t)
break;
w *= BASE-t;
}
bias = adapt(pos-old_pos, out-out_label+1, old_pos==0);
n += pos/(out-out_label+1);
pos %= out-out_label+1;
if((dwFlags&IDN_ALLOW_UNASSIGNED)==0 &&
get_table_entry(nameprep_char_type, n)==1/*UNASSIGNED*/) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(!lpUnicodeCharStr) {
out++;
}else if(out+1 <= cchASCIIChar) {
memmove(lpUnicodeCharStr+out_label+pos+1,
lpUnicodeCharStr+out_label+pos,
(out-out_label-pos)*sizeof(WCHAR));
lpUnicodeCharStr[out_label+pos] = n;
out++;
}else {
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
pos++;
}
if(out-out_label > 63) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(label_end < cchASCIIChar) {
if(!lpUnicodeCharStr) {
out++;
}else if(out+1 <= cchUnicodeChar) {
lpUnicodeCharStr[out++] = lpASCIICharStr[label_end];
}else {
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
}
label_start = label_end+1;
}
return out;
NTSTATUS status = RtlIdnToUnicode( flags, src, srclen, dst, &dstlen );
if (!set_ntstatus( status )) return 0;
return dstlen;
}

View file

@ -4600,10 +4600,13 @@ static void test_IdnToUnicode(void)
/* 5 */
{ 64, L"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", 0, 0 },
{ 8, L"xn--7va", IDN_ALLOW_UNASSIGNED, 2, 2, L"\x380" },
{ 8, L"xn--7va", 0, 0, 0, L"\x380" },
{ -1, L"xn----bm3an932a1l5d.xn--xvj", 0, 8, 0, L"\xd803\xde78\x46b5-\xa861.\x2e87" },
{ -1, L"xn--z123456789012345678901234567890123456789012345678901234-9te", 0,
57, 57, L"\xe4z123456789012345678901234567890123456789012345678901234" },
/* 10 */
{ -1, L"foo.bar", 0, 8, 8, L"foo.bar" },
{ -1, L"d.xn----dha", 0, 5, 5, L"d.\x00fc-" },
};
WCHAR buf[1024];
@ -4612,13 +4615,11 @@ static void test_IdnToUnicode(void)
for (i=0; i<ARRAY_SIZE(test_data); i++)
{
ret = pIdnToUnicode(test_data[i].flags, test_data[i].in, test_data[i].in_len, NULL, 0);
todo_wine_if (i > 6)
ok(ret == test_data[i].ret || broken(ret == test_data[i].broken_ret), "%d: ret = %d\n", i, ret);
SetLastError(0xdeadbeef);
ret = pIdnToUnicode(test_data[i].flags, test_data[i].in, test_data[i].in_len, buf, ARRAY_SIZE(buf));
err = GetLastError();
todo_wine_if (i > 6)
ok(ret == test_data[i].ret || broken(ret == test_data[i].broken_ret), "%d: ret = %d\n", i, ret);
ok(err == ret ? 0xdeadbeef : ERROR_INVALID_NAME, "%d: err = %d\n", i, err);
ok(!wcsncmp(test_data[i].out, buf, ret), "%d: buf = %s\n", i, wine_dbgstr_wn(buf, ret));
@ -4632,8 +4633,8 @@ static BOOL is_idn_error( const WCHAR *str )
for (p = wcstok( err, L" []" ); p; p = wcstok( NULL, L" []" ) )
{
if (*p == 'B' || !wcscmp( p, L"V8" )) continue; /* BiDi */
if (!wcscmp( p, L"V2" ) || !wcscmp( p, L"V3" )) continue; /* CheckHyphens */
if (!wcscmp( p, L"V7" )) continue; /* CheckJoiners */
if (!wcscmp( p, L"V2" )) continue; /* CheckHyphens */
if (!wcscmp( p, L"V5" )) continue; /* Combining marks */
return TRUE;
}
return FALSE;
@ -4705,7 +4706,7 @@ static void test_Idn(void)
error = columns[2];
SetLastError( 0xdeadbeef );
memset( dst, 0xcc, sizeof(dst) );
ret = pIdnToUnicode( 0, columns[0], -1, dst, ARRAY_SIZE(dst) );
ret = pIdnToUnicode( IDN_USE_STD3_ASCII_RULES, columns[0], -1, dst, ARRAY_SIZE(dst) );
for (i = 0; columns[0][i]; i++) if (columns[0][i] > 0x7f) break;
if (columns[0][i])
{
@ -4713,10 +4714,16 @@ static void test_Idn(void)
}
else if (!is_idn_error( error ))
{
ok( ret, "line %u: toUnicode failed for %s\n", line, debugstr_w(columns[0]) );
ok( ret, "line %u: toUnicode failed for %s expected %s\n", line,
debugstr_w(columns[0]), debugstr_w(expect) );
if (ret) ok( !wcscmp( dst, expect ), "line %u: got %s expected %s\n",
line, debugstr_w(dst), debugstr_w(expect) );
}
else
{
ok( !ret, "line %u: toUnicode didn't fail for %s got %s expected error %s\n",
line, debugstr_w(columns[0]), debugstr_w(dst), debugstr_w(error) );
}
}
fclose( f );
}

View file

@ -2238,3 +2238,132 @@ NTSTATUS WINAPI RtlIdnToNameprepUnicode( DWORD flags, const WCHAR *src, INT srcl
*dstlen = buflen;
return status;
}
/******************************************************************************
* RtlIdnToUnicode (NTDLL.@)
*/
NTSTATUS WINAPI RtlIdnToUnicode( DWORD flags, const WCHAR *src, INT srclen, WCHAR *dst, INT *dstlen )
{
const struct norm_table *info;
int i, buflen, start, end, out_label, out = 0;
NTSTATUS status;
UINT buffer[64];
WCHAR ch;
if (!src || srclen < -1) return STATUS_INVALID_PARAMETER;
if (srclen == -1) srclen = strlenW( src ) + 1;
TRACE( "%x %s %p %d\n", flags, debugstr_wn(src, srclen), dst, *dstlen );
if ((status = load_norm_table( 13, &info ))) return status;
for (start = 0; start < srclen; )
{
int n = 0x80, bias = 72, pos = 0, old_pos, w, k, t, delim = 0, digit, delta;
out_label = out;
for (i = start; i < srclen; i++)
{
ch = src[i];
if (ch > 0x7f || (i != srclen - 1 && !ch)) return STATUS_INVALID_IDN_NORMALIZATION;
if (!ch || ch == '.') break;
if (ch == '-') delim = i;
if (!(flags & IDN_USE_STD3_ASCII_RULES)) continue;
if ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') || ch == '-')
continue;
return STATUS_INVALID_IDN_NORMALIZATION;
}
end = i;
/* last label may be empty */
if (start == end && ch) return STATUS_INVALID_IDN_NORMALIZATION;
if (end - start < 4 ||
(src[start] != 'x' && src[start] != 'X') ||
(src[start + 1] != 'n' && src[start + 1] != 'N') ||
src[start + 2] != '-' || src[start + 3] != '-')
{
if (end - start > 63) return STATUS_INVALID_IDN_NORMALIZATION;
if ((flags & IDN_USE_STD3_ASCII_RULES) && (src[start] == '-' || src[end - 1] == '-'))
return STATUS_INVALID_IDN_NORMALIZATION;
if (end < srclen) end++;
if (*dstlen)
{
if (out + end - start <= *dstlen)
memcpy( dst + out, src + start, (end - start) * sizeof(WCHAR));
else return STATUS_BUFFER_TOO_SMALL;
}
out += end - start;
start = end;
continue;
}
if (delim == start + 3) delim++;
buflen = 0;
for (i = start + 4; i < delim && buflen < ARRAY_SIZE(buffer); i++) buffer[buflen++] = src[i];
if (buflen) i++;
while (i < end)
{
old_pos = pos;
w = 1;
for (k = BASE; ; k += BASE)
{
if (i >= end) return STATUS_INVALID_IDN_NORMALIZATION;
ch = src[i++];
if (ch >= 'a' && ch <= 'z') digit = ch - 'a';
else if (ch >= 'A' && ch <= 'Z') digit = ch - 'A';
else if (ch >= '0' && ch <= '9') digit = ch - '0' + 26;
else return STATUS_INVALID_IDN_NORMALIZATION;
pos += digit * w;
t = k <= bias ? TMIN : k >= bias + TMAX ? TMAX : k - bias;
if (digit < t) break;
w *= BASE - t;
}
delta = (pos - old_pos) / (!old_pos ? DAMP : 2);
delta += delta / (buflen + 1);
for (k = 0; delta > ((BASE - TMIN) * TMAX) / 2; k += BASE) delta /= BASE - TMIN;
bias = k + ((BASE - TMIN + 1) * delta) / (delta + SKEW);
n += pos / (buflen + 1);
pos %= buflen + 1;
if (buflen >= ARRAY_SIZE(buffer) - 1) return STATUS_INVALID_IDN_NORMALIZATION;
memmove( buffer + pos + 1, buffer + pos, (buflen - pos) * sizeof(*buffer) );
buffer[pos++] = n;
buflen++;
}
if (check_invalid_chars( info, flags, buffer, buflen )) return STATUS_INVALID_IDN_NORMALIZATION;
for (i = 0; i < buflen; i++)
{
int len = 1 + (buffer[i] >= 0x10000);
if (*dstlen)
{
if (out + len <= *dstlen) put_utf16( dst + out, buffer[i] );
else return STATUS_BUFFER_TOO_SMALL;
}
out += len;
}
if (out - out_label > 63) return STATUS_INVALID_IDN_NORMALIZATION;
if (end < srclen)
{
if (*dstlen)
{
if (out + 1 <= *dstlen) dst[out] = src[end];
else return STATUS_BUFFER_TOO_SMALL;
}
out++;
}
start = end + 1;
}
*dstlen = out;
return STATUS_SUCCESS;
}

View file

@ -724,6 +724,7 @@
@ stdcall RtlIdentifierAuthoritySid(ptr)
@ stdcall RtlIdnToAscii(long wstr long ptr ptr)
@ stdcall RtlIdnToNameprepUnicode(long wstr long ptr ptr)
@ stdcall RtlIdnToUnicode(long wstr long ptr ptr)
@ stdcall RtlImageDirectoryEntryToData(long long long ptr)
@ stdcall RtlImageNtHeader(long)
@ stdcall RtlImageRvaToSection(ptr long long)

View file

@ -2799,6 +2799,7 @@ NTSYSAPI NTSTATUS WINAPI RtlGUIDFromString(PUNICODE_STRING,GUID*);
NTSYSAPI PSID_IDENTIFIER_AUTHORITY WINAPI RtlIdentifierAuthoritySid(PSID);
NTSYSAPI NTSTATUS WINAPI RtlIdnToAscii(DWORD,const WCHAR*,INT,WCHAR*,INT*);
NTSYSAPI NTSTATUS WINAPI RtlIdnToNameprepUnicode(DWORD,const WCHAR*,INT,WCHAR*,INT*);
NTSYSAPI NTSTATUS WINAPI RtlIdnToUnicode(DWORD,const WCHAR*,INT,WCHAR*,INT*);
NTSYSAPI PVOID WINAPI RtlImageDirectoryEntryToData(HMODULE,BOOL,WORD,ULONG *);
NTSYSAPI PIMAGE_NT_HEADERS WINAPI RtlImageNtHeader(HMODULE);
NTSYSAPI PIMAGE_SECTION_HEADER WINAPI RtlImageRvaToSection(const IMAGE_NT_HEADERS *,HMODULE,DWORD);