xmllite: Replace crln in input buffer with nl.

Signed-off-by: Jacek Caban <jacek@codeweavers.com>
Signed-off-by: Nikolay Sivov <nsivov@codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Jacek Caban 2017-03-21 16:19:32 +01:00 committed by Alexandre Julliard
parent 2b5203bdcd
commit 48fff1b931
2 changed files with 60 additions and 51 deletions

View file

@ -178,6 +178,7 @@ typedef struct
UINT cur;
unsigned int allocated;
unsigned int written;
BOOL prev_cr;
} encoded_buffer;
typedef struct input_buffer input_buffer;
@ -687,6 +688,7 @@ static HRESULT init_encoded_buffer(xmlreaderinput *input, encoded_buffer *buffer
buffer->cur = 0;
buffer->allocated = initial_len;
buffer->written = 0;
buffer->prev_cr = FALSE;
return S_OK;
}
@ -952,6 +954,34 @@ static void readerinput_shrinkraw(xmlreaderinput *readerinput, int len)
buffer->cur = 0;
}
static void fixup_buffer_cr(encoded_buffer *buffer, int off)
{
BOOL prev_cr = buffer->prev_cr;
const WCHAR *src;
WCHAR *dest;
src = dest = (WCHAR*)buffer->data + off;
while ((const char*)src < buffer->data + buffer->written)
{
if (*src == '\r')
{
*dest++ = '\n';
src++;
prev_cr = TRUE;
continue;
}
if(prev_cr && *src == '\n')
src++;
else
*dest++ = *src++;
prev_cr = FALSE;
}
buffer->written = (char*)dest - buffer->data;
buffer->prev_cr = prev_cr;
*dest = 0;
}
/* note that raw buffer content is kept */
static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding enc)
{
@ -976,15 +1006,18 @@ static void readerinput_switchencoding(xmlreaderinput *readerinput, xml_encoding
readerinput_grow(readerinput, len);
memcpy(dest->data, src->data + src->cur, len);
dest->written += len*sizeof(WCHAR);
return;
}
else
{
dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
readerinput_grow(readerinput, dest_len);
ptr = (WCHAR*)dest->data;
MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
ptr[dest_len] = 0;
dest->written += dest_len*sizeof(WCHAR);
}
dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
readerinput_grow(readerinput, dest_len);
ptr = (WCHAR*)dest->data;
MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
ptr[dest_len] = 0;
dest->written += dest_len*sizeof(WCHAR);
fixup_buffer_cr(dest, 0);
}
/* shrinks parsed data a buffer begins with */
@ -1010,13 +1043,14 @@ static HRESULT reader_more(xmlreader *reader)
encoded_buffer *src = &readerinput->buffer->encoded;
encoded_buffer *dest = &readerinput->buffer->utf16;
UINT cp = readerinput->buffer->code_page;
int len, dest_len;
int len, dest_len, prev_len;
HRESULT hr;
WCHAR *ptr;
/* get some raw data from stream first */
hr = readerinput_growraw(readerinput);
len = readerinput_get_convlen(readerinput);
prev_len = dest->written / sizeof(WCHAR);
/* just copy for UTF-16 case */
if (cp == ~0)
@ -1024,18 +1058,20 @@ static HRESULT reader_more(xmlreader *reader)
readerinput_grow(readerinput, len);
memcpy(dest->data + dest->written, src->data + src->cur, len);
dest->written += len*sizeof(WCHAR);
return hr;
}
else
{
dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
readerinput_grow(readerinput, dest_len);
ptr = (WCHAR*)(dest->data + dest->written);
MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
ptr[dest_len] = 0;
dest->written += dest_len*sizeof(WCHAR);
/* get rid of processed data */
readerinput_shrinkraw(readerinput, len);
}
dest_len = MultiByteToWideChar(cp, 0, src->data + src->cur, len, NULL, 0);
readerinput_grow(readerinput, dest_len);
ptr = (WCHAR*)(dest->data + dest->written);
MultiByteToWideChar(cp, 0, src->data + src->cur, len, ptr, dest_len);
ptr[dest_len] = 0;
dest->written += dest_len*sizeof(WCHAR);
/* get rid of processed data */
readerinput_shrinkraw(readerinput, len);
fixup_buffer_cr(dest, prev_len);
return hr;
}
@ -1974,28 +2010,6 @@ static HRESULT reader_parse_qname(xmlreader *reader, strval *prefix, strval *loc
return S_OK;
}
/* Applies normalization rules to a single char, used for attribute values.
Rules include 2 steps:
1) replacing \r\n with a single \n;
2) replacing all whitespace chars with ' '.
*/
static void reader_normalize_space(xmlreader *reader, WCHAR *ptr)
{
encoded_buffer *buffer = &reader->input->buffer->utf16;
if (!is_wchar_space(*ptr)) return;
if (*ptr == '\r' && *(ptr+1) == '\n')
{
int len = buffer->written - ((char*)ptr - buffer->data) - 2*sizeof(WCHAR);
memmove(ptr+1, ptr+2, len);
}
*ptr = ' ';
}
static WCHAR get_predefined_entity(const xmlreader *reader, const strval *name)
{
static const WCHAR entltW[] = {'l','t'};
@ -2171,7 +2185,8 @@ static HRESULT reader_parse_attvalue(xmlreader *reader, strval *value)
}
else
{
reader_normalize_space(reader, ptr);
/* replace all whitespace chars with ' ' */
if (is_wchar_space(*ptr)) *ptr = ' ';
reader_skipn(reader, 1);
}
ptr = reader_get_ptr(reader);
@ -2393,12 +2408,6 @@ static HRESULT reader_parse_cdata(xmlreader *reader)
}
else
{
/* Value normalization is not fully implemented, rules are:
- single '\r' -> '\n';
- sequence '\r\n' -> '\n', in this case value length changes;
*/
if (*ptr == '\r') *ptr = '\n';
reader_skipn(reader, 1);
ptr++;
}

View file

@ -1761,8 +1761,8 @@ static void test_readvaluechunk(void)
static struct test_entry cdata_tests[] = {
{ "<a><![CDATA[ ]]data ]]></a>", "", " ]]data ", S_OK },
{ "<a><![CDATA[<![CDATA[ data ]]]]></a>", "", "<![CDATA[ data ]]", S_OK },
{ "<a><![CDATA[\n \r\n \n\n ]]></a>", "", "\n \n \n\n ", S_OK, S_OK, TRUE },
{ "<a><![CDATA[\r \r\r\n \n\n ]]></a>", "", "\n \n\n \n\n ", S_OK, S_OK, TRUE },
{ "<a><![CDATA[\n \r\n \n\n ]]></a>", "", "\n \n \n\n ", S_OK, S_OK },
{ "<a><![CDATA[\r \r\r\n \n\n ]]></a>", "", "\n \n\n \n\n ", S_OK, S_OK },
{ "<a><![CDATA[\r\r \n\r \r \n\n ]]></a>", "", "\n\n \n\n \n \n\n ", S_OK },
{ NULL }
};
@ -1856,8 +1856,8 @@ static void test_read_cdata(void)
static struct test_entry text_tests[] = {
{ "<a>simple text</a>", "", "simple text", S_OK },
{ "<a>text ]]> text</a>", "", "", WC_E_CDSECTEND },
{ "<a>\n \r\n \n\n text</a>", "", "\n \n \n\n text", S_OK, S_OK, TRUE },
{ "<a>\r \r\r\n \n\n text</a>", "", "\n \n\n \n\n text", S_OK, S_OK, TRUE },
{ "<a>\n \r\n \n\n text</a>", "", "\n \n \n\n text", S_OK, S_OK },
{ "<a>\r \r\r\n \n\n text</a>", "", "\n \n\n \n\n text", S_OK, S_OK },
{ NULL }
};