Add ICONV_{GET,SET}_ILSEQ_INVALID iconvctl. GNU iconv returns EILSEQ

when there is an invalid character in the output codeset while it is
valid in the input.  However, POSIX requires iconv() to perform an
implementation-defined conversion on the character.  So, Citrus iconv converts
such a character to a special character which means it is invalid in the
output codeset.

This is not a problem in most cases but some software like libxml2 depends
on GNU's behavior to determine if a character is output as-is or another form
such as a character entity (&#NNN;).
This commit is contained in:
Hiroki Sato 2013-11-25 01:26:06 +00:00
parent 1b57cec7d9
commit 7c5b23111c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=258537
5 changed files with 45 additions and 1 deletions

View file

@ -86,6 +86,8 @@ void iconv_set_relocation_prefix(const char *, const char *);
#define ICONV_SET_DISCARD_ILSEQ 4
#define ICONV_SET_HOOKS 5
#define ICONV_SET_FALLBACKS 6
#define ICONV_GET_ILSEQ_INVALID 128
#define ICONV_SET_ILSEQ_INVALID 129
typedef void (*iconv_unicode_char_hook) (unsigned int mbr, void *data);
typedef void (*iconv_wide_char_hook) (wchar_t wc, void *data);

View file

@ -99,6 +99,7 @@ struct _citrus_iconv_shared {
char *ci_convname;
bool ci_discard_ilseq;
struct iconv_hooks *ci_hooks;
bool ci_ilseq_invalid;
};
struct _citrus_iconv {

View file

@ -298,6 +298,12 @@ __bsd_iconvctl(iconv_t cd, int request, void *argument)
case ICONV_SET_FALLBACKS:
errno = EOPNOTSUPP;
return (-1);
case ICONV_GET_ILSEQ_INVALID:
*i = cv->cv_shared->ci_ilseq_invalid ? 1 : 0;
return (0);
case ICONV_SET_ILSEQ_INVALID:
cv->cv_shared->ci_ilseq_invalid = *i;
return (0);
default:
errno = EINVAL;
return (-1);

View file

@ -34,7 +34,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd November 7, 2009
.Dd November 25, 2009
.Dt ICONVCTL 3
.Os
.Sh NAME
@ -110,6 +110,31 @@ variable, which is passed to
via
.Fa argument
by its address.
.It ICONV_GET_ILSEQ_INVALID
Determines if a character in the input buffer that is valid,
but for which an identical character does not exist in the target
codeset returns
.Er EILSEQ
or not.
The answer is stored in
.Fa argument ,
which is of
.Ft int * .
It will be set to 1 if this feature is enabled or set to 0 otherwise.
.It ICONV_SET_ILSEQ_INVALID
Sets whether a character in the input buffer that is valid,
but for which an identical character does not exist in the target
codeset returns
.Er EILSEQ
or not.
If
.Fa argument ,
which is of
.Ft int *
is set to 1 it will be enabled,
and if
.Fa argument
is set to 0 it will be disabled.
.El
.\" XXX: fallbacks are unimplemented and trying to set them will always
.\" return EOPNOTSUPP but definitions are provided for source-level

View file

@ -543,6 +543,16 @@ _citrus_iconv_std_iconv_convert(struct _citrus_iconv * __restrict cv,
ret = do_conv(is, &csid, &idx);
if (ret) {
if (ret == E_NO_CORRESPONDING_CHAR) {
/*
* GNU iconv returns EILSEQ when no
* corresponding character in the output.
* Some software depends on this behavior
* though this is against POSIX specification.
*/
if (cv->cv_shared->ci_ilseq_invalid != 0) {
ret = EILSEQ;
goto err;
}
inval++;
szrout = 0;
if ((((flags & _CITRUS_ICONV_F_HIDE_INVALID) == 0) &&