libc: iconv: add mb_cur_min for encoder traits

A future commit will actually implement //IGNORE so that applications
using base iconv can, e.g., sanitize UTF-8 strings.  To do this, the
iconv_std module needs to be able to determine the minimum width for any
given encoding so that it can skip that many bytes in the input buffer.
This is mainly an issue for UTF-16 and UTF-32.

This commit bumps shlib versions to 5 for libiconv modules to reflect
the ABI change.  It also fixes OptionalObsoleteFiles to remove the
libiconv modules if WITHOUT_ICONV is in use.

re: _ENCODING_MB_CUR_MIN, note that this file (citrus_stdenc_template.h)
is included at the bottom of an encoding *implementation*, so the
implementation is free to #define it prior.  UTF1632 is a good example,
as it redefines the minimum to be a property on the encodinginfo, and
the minimum is set to 2 or 4 bytes for UTF-16 and UTF-32 respectively.

Sponsored by:	Klara, Inc.
Differential Revision:	https://reviews.freebsd.org/D34344
This commit is contained in:
Kyle Evans 2022-02-22 01:12:27 -06:00
parent c1f46b8fcb
commit 2300a22c97
9 changed files with 100 additions and 2 deletions

View file

@ -52,6 +52,31 @@
# xargs -n1 | sort | uniq -d;
# done
# 20220811: new iconv encoder trait added
OLD_LIBS+=usr/lib/i18n/libBIG5.so.4
OLD_LIBS+=usr/lib/i18n/libDECHanyu.so.4
OLD_LIBS+=usr/lib/i18n/libEUC.so.4
OLD_LIBS+=usr/lib/i18n/libEUCTW.so.4
OLD_LIBS+=usr/lib/i18n/libGBK2K.so.4
OLD_LIBS+=usr/lib/i18n/libHZ.so.4
OLD_LIBS+=usr/lib/i18n/libISO2022.so.4
OLD_LIBS+=usr/lib/i18n/libJOHAB.so.4
OLD_LIBS+=usr/lib/i18n/libMSKanji.so.4
OLD_LIBS+=usr/lib/i18n/libUES.so.4
OLD_LIBS+=usr/lib/i18n/libUTF1632.so.4
OLD_LIBS+=usr/lib/i18n/libUTF7.so.4
OLD_LIBS+=usr/lib/i18n/libUTF8.so.4
OLD_LIBS+=usr/lib/i18n/libVIQR.so.4
OLD_LIBS+=usr/lib/i18n/libZW.so.4
OLD_LIBS+=usr/lib/i18n/libiconv_none.so.4
OLD_LIBS+=usr/lib/i18n/libiconv_std.so.4
OLD_LIBS+=usr/lib/i18n/libmapper_646.so.4
OLD_LIBS+=usr/lib/i18n/libmapper_none.so.4
OLD_LIBS+=usr/lib/i18n/libmapper_parallel.so.4
OLD_LIBS+=usr/lib/i18n/libmapper_serial.so.4
OLD_LIBS+=usr/lib/i18n/libmapper_std.so.4
OLD_LIBS+=usr/lib/i18n/libmapper_zone.so.4
# 202208XX: raw socket layer removed
OLD_FILES+=usr/include/net/raw_cb.h

View file

@ -106,7 +106,7 @@
#include <string.h>
#include <unistd.h>
#define I18NMODULE_MAJOR 4
#define I18NMODULE_MAJOR 5
#include "citrus_namespace.h"
#include "citrus_bcs.h"

View file

@ -223,6 +223,7 @@
#define _stdenc_wctomb _citrus_stdenc_wctomb
#define _stdenc_put_state_reset _citrus_stdenc_put_state_reset
#define _stdenc_get_state_size _citrus_stdenc_get_state_size
#define _stdenc_get_mb_cur_min _citrus_stdenc_get_mb_cur_min
#define _stdenc_get_mb_cur_max _citrus_stdenc_get_mb_cur_max
#define _stdenc_get_state_desc _citrus_stdenc_get_state_desc
#define _STDENC_SDID_GENERIC _CITRUS_STDENC_SDID_GENERIC

View file

@ -115,6 +115,20 @@ _citrus_stdenc_get_state_size(struct _citrus_stdenc *ce)
return (ce->ce_traits->et_state_size);
}
static __inline size_t
_citrus_stdenc_get_mb_cur_min(struct _citrus_stdenc *ce)
{
return (ce->ce_traits->et_mb_cur_min);
}
static __inline size_t
_citrus_stdenc_get_mb_cur_max(struct _citrus_stdenc *ce)
{
return (ce->ce_traits->et_mb_cur_max);
}
static __inline int
_citrus_stdenc_get_state_desc(struct _citrus_stdenc * __restrict ce,
void * __restrict ps, int id,

View file

@ -149,6 +149,8 @@ struct _citrus_stdenc_traits {
/* version 0x00000001 */
size_t et_state_size;
size_t et_mb_cur_max;
/* version 0x00000005 */
size_t et_mb_cur_min;
};
struct _citrus_stdenc {

View file

@ -49,6 +49,11 @@
#define _CE_TO_EI(_ce_) (_TO_EI((_ce_)->ce_closure))
#define _TO_STATE(_ps_) ((_ENCODING_STATE*)(_ps_))
#ifndef _ENCODING_MB_CUR_MIN
/* Assume one byte minimum unless otherwise specified. */
#define _ENCODING_MB_CUR_MIN(_ei_) 1
#endif
/* ----------------------------------------------------------------------
* templates for public functions
*/
@ -87,6 +92,7 @@ _FUNCNAME(stdenc_init)(struct _citrus_stdenc * __restrict ce,
ce->ce_closure = ei;
et->et_state_size = sizeof(_ENCODING_STATE);
et->et_mb_cur_max = _ENCODING_MB_CUR_MAX(_CE_TO_EI(ce));
et->et_mb_cur_min = _ENCODING_MB_CUR_MIN(_CE_TO_EI(ce));
return (0);
}

View file

@ -2,7 +2,7 @@
.PATH: ${SRCTOP}/lib/libc/iconv
SHLIB_MAJOR= 4
SHLIB_MAJOR= 5
CFLAGS+= -I${SRCTOP}/lib/libc/iconv
CFLAGS+= -Dbool=_Bool

View file

@ -77,6 +77,7 @@ typedef struct {
typedef struct {
int preffered_endian;
unsigned int cur_max;
unsigned int cur_min;
uint32_t mode;
} _UTF1632EncodingInfo;
@ -84,6 +85,7 @@ typedef struct {
#define _ENCODING_INFO _UTF1632EncodingInfo
#define _ENCODING_STATE _UTF1632State
#define _ENCODING_MB_CUR_MAX(_ei_) ((_ei_)->cur_max)
#define _ENCODING_MB_CUR_MIN(_ei_) ((_ei_)->cur_min)
#define _ENCODING_IS_STATE_DEPENDENT 0
#define _STATE_NEEDS_EXPLICIT_INIT(_ps_) 0
@ -390,6 +392,7 @@ _citrus_UTF1632_encoding_module_init(_UTF1632EncodingInfo * __restrict ei,
parse_variable(ei, var, lenvar);
ei->cur_min = ((ei->mode&_MODE_UTF32) == 0) ? 2 : 4;
ei->cur_max = ((ei->mode&_MODE_UTF32) == 0) ? 6 : 8;
/* 6: endian + surrogate */
/* 8: endian + normal */

View file

@ -2455,6 +2455,52 @@ OLD_FILES+=usr/bin/mkcsmapper
OLD_FILES+=usr/bin/mkesdb
OLD_FILES+=usr/include/_libiconv_compat.h
OLD_FILES+=usr/include/iconv.h
OLD_LIBS+=usr/lib/i18n/libBIG5.so.5
OLD_FILES+=usr/lib/i18n/libBIG5.so
OLD_LIBS+=usr/lib/i18n/libDECHanyu.so.5
OLD_FILES+=usr/lib/i18n/libDECHanyu.so
OLD_LIBS+=usr/lib/i18n/libEUC.so.5
OLD_FILES+=usr/lib/i18n/libEUC.so
OLD_LIBS+=usr/lib/i18n/libEUCTW.so.5
OLD_FILES+=usr/lib/i18n/libEUCTW.so
OLD_LIBS+=usr/lib/i18n/libGBK2K.so.5
OLD_FILES+=usr/lib/i18n/libGBK2K.so
OLD_LIBS+=usr/lib/i18n/libHZ.so.5
OLD_FILES+=usr/lib/i18n/libHZ.so
OLD_LIBS+=usr/lib/i18n/libISO2022.so.5
OLD_FILES+=usr/lib/i18n/libISO2022.so
OLD_LIBS+=usr/lib/i18n/libJOHAB.so.5
OLD_FILES+=usr/lib/i18n/libJOHAB.so
OLD_LIBS+=usr/lib/i18n/libMSKanji.so.5
OLD_FILES+=usr/lib/i18n/libMSKanji.so
OLD_LIBS+=usr/lib/i18n/libUES.so.5
OLD_FILES+=usr/lib/i18n/libUES.so
OLD_LIBS+=usr/lib/i18n/libUTF1632.so.5
OLD_FILES+=usr/lib/i18n/libUTF1632.so
OLD_LIBS+=usr/lib/i18n/libUTF7.so.5
OLD_FILES+=usr/lib/i18n/libUTF7.so
OLD_LIBS+=usr/lib/i18n/libUTF8.so.5
OLD_FILES+=usr/lib/i18n/libUTF8.so
OLD_LIBS+=usr/lib/i18n/libVIQR.so.5
OLD_FILES+=usr/lib/i18n/libVIQR.so
OLD_LIBS+=usr/lib/i18n/libZW.so.5
OLD_FILES+=usr/lib/i18n/libZW.so
OLD_LIBS+=usr/lib/i18n/libiconv_none.so.5
OLD_FILES+=usr/lib/i18n/libiconv_none.so
OLD_LIBS+=usr/lib/i18n/libiconv_std.so.5
OLD_FILES+=usr/lib/i18n/libiconv_std.so
OLD_LIBS+=usr/lib/i18n/libmapper_646.so.5
OLD_FILES+=usr/lib/i18n/libmapper_646.so
OLD_LIBS+=usr/lib/i18n/libmapper_none.so.5
OLD_FILES+=usr/lib/i18n/libmapper_none.so
OLD_LIBS+=usr/lib/i18n/libmapper_parallel.so.5
OLD_FILES+=usr/lib/i18n/libmapper_parallel.so
OLD_LIBS+=usr/lib/i18n/libmapper_serial.so.5
OLD_FILES+=usr/lib/i18n/libmapper_serial.so
OLD_LIBS+=usr/lib/i18n/libmapper_std.so.5
OLD_FILES+=usr/lib/i18n/libmapper_std.so
OLD_LIBS+=usr/lib/i18n/libmapper_zone.so.5
OLD_FILES+=usr/lib/i18n/libmapper_zone.so
OLD_FILES+=usr/share/man/man1/iconv.1.gz
OLD_FILES+=usr/share/man/man1/mkcsmapper.1.gz
OLD_FILES+=usr/share/man/man1/mkesdb.1.gz
@ -2468,6 +2514,7 @@ OLD_FILES+=usr/share/man/man3/iconv_open.3.gz
OLD_FILES+=usr/share/man/man3/iconv_open_into.3.gz
OLD_FILES+=usr/share/man/man3/iconvctl.3.gz
OLD_FILES+=usr/share/man/man3/iconvlist.3.gz
OLD_DIRS+=usr/lib/i18n
OLD_DIRS+=usr/share/i18n
OLD_DIRS+=usr/share/i18n/esdb
OLD_DIRS+=usr/share/i18n/esdb/ISO-2022