Add unicode support to msdosfs and smbfs; original pathes from imura,

bug fixes by Kuan-Chung Chiu <buganini at gmail dot com>.

Tested by me in production for several days at work.
This commit is contained in:
Kevin Lo 2011-11-18 03:05:20 +00:00
parent e8ca9d33da
commit 41f1dccceb
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=227650
16 changed files with 780 additions and 100 deletions

View file

@ -74,6 +74,18 @@ kiconv_add_xlat16_cspair(const char *tocode, const char *fromcode, int flag)
struct xlat16_table xt;
void *data;
char *p;
const char unicode[] = ENCODING_UNICODE;
if ((flag & KICONV_WCTYPE) == 0 &&
strcmp(unicode, tocode) != 0 &&
strcmp(unicode, fromcode) != 0 &&
kiconv_lookupconv(unicode) == 0) {
error = kiconv_add_xlat16_cspair(unicode, fromcode, flag);
if (error)
return (-1);
error = kiconv_add_xlat16_cspair(tocode, unicode, flag);
return (error);
}
if (kiconv_lookupcs(tocode, fromcode) == 0)
return (0);

View file

@ -2557,6 +2557,7 @@ libkern/fnmatch.c standard
libkern/gets.c standard
libkern/iconv.c optional libiconv
libkern/iconv_converter_if.m optional libiconv
libkern/iconv_ucs.c optional libiconv
libkern/iconv_xlat.c optional libiconv
libkern/iconv_xlat16.c optional libiconv
libkern/index.c standard

View file

@ -61,9 +61,9 @@
extern struct iconv_functions *msdosfs_iconv;
static int mbsadjpos(const char **, size_t, size_t, int, int, void *handle);
static u_int16_t dos2unixchr(const u_char **, size_t *, int, struct msdosfsmount *);
static u_char * dos2unixchr(const u_char **, size_t *, int, struct msdosfsmount *);
static u_int16_t unix2doschr(const u_char **, size_t *, struct msdosfsmount *);
static u_int16_t win2unixchr(u_int16_t, struct msdosfsmount *);
static u_char * win2unixchr(u_int16_t, struct msdosfsmount *);
static u_int16_t unix2winchr(const u_char **, size_t *, int, struct msdosfsmount *);
/*
@ -242,7 +242,7 @@ dos2unixfn(dn, un, lower, pmp)
{
size_t i;
int thislong = 0;
u_int16_t c;
u_char *c;
/*
* If first char of the filename is SLOT_E5 (0x05), then the real
@ -259,12 +259,10 @@ dos2unixfn(dn, un, lower, pmp)
for (i = 8; i > 0 && *dn != ' ';) {
c = dos2unixchr((const u_char **)&dn, &i, lower & LCASE_BASE,
pmp);
if (c & 0xff00) {
*un++ = c >> 8;
while (*c != '\0') {
*un++ = *c++;
thislong++;
}
*un++ = c;
thislong++;
}
dn += i;
@ -278,12 +276,10 @@ dos2unixfn(dn, un, lower, pmp)
for (i = 3; i > 0 && *dn != ' ';) {
c = dos2unixchr((const u_char **)&dn, &i,
lower & LCASE_EXT, pmp);
if (c & 0xff00) {
*un++ = c >> 8;
while (*c != '\0') {
*un++ = *c++;
thislong++;
}
*un++ = c;
thislong++;
}
}
*un++ = 0;
@ -652,8 +648,9 @@ win2unixfn(nbp, wep, chksum, pmp)
int chksum;
struct msdosfsmount *pmp;
{
u_char *c;
u_int8_t *cp;
u_int8_t *np, name[WIN_CHARS * 2 + 1];
u_int8_t *np, name[WIN_CHARS * 3 + 1];
u_int16_t code;
int i;
@ -686,10 +683,9 @@ win2unixfn(nbp, wep, chksum, pmp)
*np = '\0';
return -1;
default:
code = win2unixchr(code, pmp);
if (code & 0xff00)
*np++ = code >> 8;
*np++ = code;
c = win2unixchr(code, pmp);
while (*c != '\0')
*np++ = *c++;
break;
}
cp += 2;
@ -705,10 +701,9 @@ win2unixfn(nbp, wep, chksum, pmp)
*np = '\0';
return -1;
default:
code = win2unixchr(code, pmp);
if (code & 0xff00)
*np++ = code >> 8;
*np++ = code;
c = win2unixchr(code, pmp);
while (*c != '\0')
*np++ = *c++;
break;
}
cp += 2;
@ -724,10 +719,9 @@ win2unixfn(nbp, wep, chksum, pmp)
*np = '\0';
return -1;
default:
code = win2unixchr(code, pmp);
if (code & 0xff00)
*np++ = code >> 8;
*np++ = code;
c = win2unixchr(code, pmp);
while (*c != '\0')
*np++ = *c++;
break;
}
cp += 2;
@ -817,24 +811,22 @@ mbsadjpos(const char **instr, size_t inlen, size_t outlen, int weight, int flag,
/*
* Convert DOS char to Local char
*/
static u_int16_t
static u_char *
dos2unixchr(const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *pmp)
{
u_char c;
char *outp, outbuf[3];
u_int16_t wc;
u_char c, *outp, outbuf[5];
size_t len, olen;
outp = outbuf;
if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
olen = len = 2;
outp = outbuf;
olen = len = 4;
if (lower & (LCASE_BASE | LCASE_EXT))
msdosfs_iconv->convchr_case(pmp->pm_d2u, (const char **)instr,
ilen, &outp, &olen, KICONV_LOWER);
ilen, (char **)&outp, &olen, KICONV_LOWER);
else
msdosfs_iconv->convchr(pmp->pm_d2u, (const char **)instr,
ilen, &outp, &olen);
ilen, (char **)&outp, &olen);
len -= olen;
/*
@ -843,21 +835,21 @@ dos2unixchr(const u_char **instr, size_t *ilen, int lower, struct msdosfsmount *
if (len == 0) {
(*ilen)--;
(*instr)++;
return ('?');
*outp++ = '?';
}
wc = 0;
while(len--)
wc |= (*(outp - len - 1) & 0xff) << (len << 3);
return (wc);
} else {
(*ilen)--;
c = *(*instr)++;
c = dos2unix[c];
if (lower & (LCASE_BASE | LCASE_EXT))
c = u2l[c];
*outp++ = c;
outbuf[1] = '\0';
}
(*ilen)--;
c = *(*instr)++;
c = dos2unix[c];
if (lower & (LCASE_BASE | LCASE_EXT))
c = u2l[c];
return ((u_int16_t)c);
*outp = '\0';
outp = outbuf;
return (outp);
}
/*
@ -940,23 +932,21 @@ unix2doschr(const u_char **instr, size_t *ilen, struct msdosfsmount *pmp)
/*
* Convert Windows char to Local char
*/
static u_int16_t
static u_char *
win2unixchr(u_int16_t wc, struct msdosfsmount *pmp)
{
u_char *inp, *outp, inbuf[3], outbuf[3];
u_char *inp, *outp, inbuf[3], outbuf[5];
size_t ilen, olen, len;
if (wc == 0)
return (0);
outp = outbuf;
if (pmp->pm_flags & MSDOSFSMNT_KICONV && msdosfs_iconv) {
inbuf[0] = (u_char)(wc>>8);
inbuf[1] = (u_char)wc;
inbuf[2] = '\0';
ilen = olen = len = 2;
ilen = 2;
olen = len = 4;
inp = inbuf;
outp = outbuf;
msdosfs_iconv->convchr(pmp->pm_w2u, (const char **)&inp, &ilen,
(char **)&outp, &olen);
len -= olen;
@ -964,21 +954,15 @@ win2unixchr(u_int16_t wc, struct msdosfsmount *pmp)
/*
* return '?' if failed to convert
*/
if (len == 0) {
wc = '?';
return (wc);
}
wc = 0;
while(len--)
wc |= (*(outp - len - 1) & 0xff) << (len << 3);
return (wc);
if (len == 0)
*outp++ = '?';
} else {
*outp++ = (wc & 0xff00) ? '?' : (u_char)(wc & 0xff);
}
if (wc & 0xff00)
wc = '?';
return (wc);
*outp = '\0';
outp = outbuf;
return (outp);
}
/*

View file

@ -34,6 +34,7 @@
#include <sys/vnode.h>
#include <sys/mbuf.h>
#include <sys/mount.h>
#include <sys/endian.h>
#ifdef USE_MD5_HASH
#include <sys/md5.h>
@ -393,6 +394,10 @@ smbfs_smb_setpattr(struct smbnode *np, u_int16_t attr, struct timespec *mtime,
if (error)
break;
mb_put_uint8(mbp, SMB_DT_ASCII);
if (SMB_UNICODE_STRINGS(SSTOVC(ssp))) {
mb_put_padbyte(mbp);
mb_put_uint8(mbp, 0); /* 1st byte of NULL Unicode char */
}
mb_put_uint8(mbp, 0);
smb_rq_bend(rqp);
error = smb_rq_simple(rqp);
@ -909,6 +914,10 @@ smbfs_smb_search(struct smbfs_fctx *ctx)
mb_put_uint16le(mbp, 0); /* context length */
ctx->f_flags &= ~SMBFS_RDD_FINDFIRST;
} else {
if (SMB_UNICODE_STRINGS(vcp)) {
mb_put_padbyte(mbp);
mb_put_uint8(mbp, 0);
}
mb_put_uint8(mbp, 0); /* file name length */
mb_put_uint8(mbp, SMB_DT_VARIABLE);
mb_put_uint16le(mbp, SMB_SKEYLEN);
@ -1069,7 +1078,7 @@ smbfs_smb_trans2find2(struct smbfs_fctx *ctx)
mb_put_uint32le(mbp, 0); /* resume key */
mb_put_uint16le(mbp, flags);
if (ctx->f_rname)
mb_put_mem(mbp, ctx->f_rname, strlen(ctx->f_rname) + 1, MB_MSYSTEM);
mb_put_mem(mbp, ctx->f_rname, ctx->f_rnamelen + 1, MB_MSYSTEM);
else
mb_put_uint8(mbp, 0); /* resume file name */
#if 0
@ -1152,7 +1161,10 @@ static int
smbfs_findopenLM2(struct smbfs_fctx *ctx, struct smbnode *dnp,
const char *wildcard, int wclen, int attr, struct smb_cred *scred)
{
ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK);
if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
ctx->f_name = malloc(SMB_MAXFNAMELEN * 2, M_SMBFSDATA, M_WAITOK);
} else
ctx->f_name = malloc(SMB_MAXFNAMELEN, M_SMBFSDATA, M_WAITOK);
if (ctx->f_name == NULL)
return ENOMEM;
ctx->f_infolevel = SMB_DIALECT(SSTOVC(ctx->f_ssp)) < SMB_DIALECT_NTLM0_12 ?
@ -1231,7 +1243,10 @@ smbfs_findnextLM2(struct smbfs_fctx *ctx, int limit)
SMBERROR("unexpected info level %d\n", ctx->f_infolevel);
return EINVAL;
}
nmlen = min(size, SMB_MAXFNAMELEN);
if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
nmlen = min(size, SMB_MAXFNAMELEN * 2);
} else
nmlen = min(size, SMB_MAXFNAMELEN);
cp = ctx->f_name;
error = md_get_mem(mbp, cp, nmlen, MB_MSYSTEM);
if (error)
@ -1245,8 +1260,12 @@ smbfs_findnextLM2(struct smbfs_fctx *ctx, int limit)
return EBADRPC;
}
}
if (nmlen && cp[nmlen - 1] == 0)
nmlen--;
if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
if (nmlen > 1 && cp[nmlen - 1] == 0 && cp[nmlen - 2] == 0)
nmlen -= 2;
} else
if (nmlen && cp[nmlen - 1] == 0)
nmlen--;
if (nmlen == 0)
return EBADRPC;
@ -1330,10 +1349,17 @@ smbfs_findnext(struct smbfs_fctx *ctx, int limit, struct smb_cred *scred)
error = smbfs_findnextLM2(ctx, limit);
if (error)
return error;
if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') ||
(ctx->f_nmlen == 2 && ctx->f_name[0] == '.' &&
ctx->f_name[1] == '.'))
continue;
if (SMB_UNICODE_STRINGS(SSTOVC(ctx->f_ssp))) {
if ((ctx->f_nmlen == 2 &&
*(u_int16_t *)ctx->f_name == htole16(0x002e)) ||
(ctx->f_nmlen == 4 &&
*(u_int32_t *)ctx->f_name == htole32(0x002e002e)))
continue;
} else
if ((ctx->f_nmlen == 1 && ctx->f_name[0] == '.') ||
(ctx->f_nmlen == 2 && ctx->f_name[0] == '.' &&
ctx->f_name[1] == '.'))
continue;
break;
}
smbfs_fname_tolocal(SSTOVC(ctx->f_ssp), ctx->f_name, &ctx->f_nmlen,

View file

@ -130,7 +130,10 @@ smb_fphelp(struct mbchain *mbp, struct smb_vc *vcp, struct smbnode *np,
return smb_put_dmem(mbp, vcp, "\\", 2, caseopt);*/
while (i--) {
np = *--npp;
error = mb_put_uint8(mbp, '\\');
if (SMB_UNICODE_STRINGS(vcp))
error = mb_put_uint16le(mbp, '\\');
else
error = mb_put_uint8(mbp, '\\');
if (error)
break;
error = smb_put_dmem(mbp, vcp, np->n_name, np->n_nmlen, caseopt);
@ -148,6 +151,11 @@ smbfs_fullpath(struct mbchain *mbp, struct smb_vc *vcp, struct smbnode *dnp,
int caseopt = SMB_CS_NONE;
int error;
if (SMB_UNICODE_STRINGS(vcp)) {
error = mb_put_padbyte(mbp);
if (error)
return error;
}
if (SMB_DIALECT(vcp) < SMB_DIALECT_LANMAN1_0)
caseopt |= SMB_CS_UPPER;
if (dnp != NULL) {
@ -156,7 +164,10 @@ smbfs_fullpath(struct mbchain *mbp, struct smb_vc *vcp, struct smbnode *dnp,
return error;
}
if (name) {
error = mb_put_uint8(mbp, '\\');
if (SMB_UNICODE_STRINGS(vcp))
error = mb_put_uint16le(mbp, '\\');
else
error = mb_put_uint8(mbp, '\\');
if (error)
return error;
error = smb_put_dmem(mbp, vcp, name, nmlen, caseopt);
@ -164,6 +175,8 @@ smbfs_fullpath(struct mbchain *mbp, struct smb_vc *vcp, struct smbnode *dnp,
return error;
}
error = mb_put_uint8(mbp, 0);
if (SMB_UNICODE_STRINGS(vcp) && error == 0)
error = mb_put_uint8(mbp, 0);
return error;
}
@ -191,6 +204,17 @@ smbfs_fname_tolocal(struct smb_vc *vcp, char *name, int *nmlen, int caseopt)
error = iconv_conv_case
(vcp->vc_tolocal, (const char **)&ibuf, &ilen, &obuf, &olen, copt);
if (error && SMB_UNICODE_STRINGS(vcp)) {
/*
* If using unicode, leaving a file name as it was when
* convert fails will cause a problem because the file name
* will contain NULL.
* Here, put '?' and give converted file name.
*/
*obuf = '?';
olen--;
error = 0;
}
if (!error) {
*nmlen = sizeof(outbuf) - olen;
memcpy(name, outbuf, *nmlen);

View file

@ -127,6 +127,21 @@ mb_reserve(struct mbchain *mbp, int size)
return bpos;
}
int
mb_put_padbyte(struct mbchain *mbp)
{
caddr_t dst;
char x = 0;
dst = mtod(mbp->mb_cur, caddr_t) + mbp->mb_cur->m_len;
/* only add padding if address is odd */
if ((unsigned long)dst & 1)
return mb_put_mem(mbp, (caddr_t)&x, 1, MB_MSYSTEM);
else
return 0;
}
int
mb_put_uint8(struct mbchain *mbp, uint8_t x)
{

View file

@ -377,6 +377,18 @@ iconv_sysctl_cslist(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern_iconv, OID_AUTO, cslist, CTLFLAG_RD | CTLTYPE_OPAQUE,
NULL, 0, iconv_sysctl_cslist, "S,xlat", "registered charset pairs");
int
iconv_add(const char *converter, const char *to, const char *from)
{
struct iconv_converter_class *dcp;
struct iconv_cspair *csp;
if (iconv_lookupconv(converter, &dcp) != 0)
return EINVAL;
return iconv_register_cspair(to, from, dcp, NULL, &csp);
}
/*
* Add new charset pair
*/

540
sys/libkern/iconv_ucs.c Normal file
View file

@ -0,0 +1,540 @@
/*-
* Copyright (c) 2003, 2005 Ryuichiro Imura
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/iconv.h>
#include "iconv_converter_if.h"
/*
* "UCS" converter
*/
#define KICONV_UCS_COMBINE 0x1
#define KICONV_UCS_FROM_UTF8 0x2
#define KICONV_UCS_TO_UTF8 0x4
#define KICONV_UCS_FROM_LE 0x8
#define KICONV_UCS_TO_LE 0x10
#define KICONV_UCS_FROM_UTF16 0x20
#define KICONV_UCS_TO_UTF16 0x40
#define KICONV_UCS_UCS4 0x80
#define ENCODING_UTF16 "UTF-16BE"
#define ENCODING_UTF8 "UTF-8"
static struct {
const char *name;
int from_flag, to_flag;
} unicode_family[] = {
{ "UTF-8", KICONV_UCS_FROM_UTF8, KICONV_UCS_TO_UTF8 },
{ "UCS-2LE", KICONV_UCS_FROM_LE, KICONV_UCS_TO_LE },
{ "UTF-16BE", KICONV_UCS_FROM_UTF16, KICONV_UCS_TO_UTF16 },
{ "UTF-16LE", KICONV_UCS_FROM_UTF16|KICONV_UCS_FROM_LE,
KICONV_UCS_TO_UTF16|KICONV_UCS_TO_LE },
{ NULL, 0, 0 }
};
static uint32_t utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen);
static u_char *ucs4_to_utf8(uint32_t ucs4, char * dst, size_t *utf8width, size_t dstlen);
static uint32_t encode_surrogate(uint32_t code);
static uint32_t decode_surrogate(const u_char *ucs);
#ifdef MODULE_DEPEND
MODULE_DEPEND(iconv_ucs, libiconv, 2, 2, 2);
#endif
/*
* UCS converter instance
*/
struct iconv_ucs {
KOBJ_FIELDS;
int convtype;
struct iconv_cspair * d_csp;
struct iconv_cspair * d_cspf;
void * f_ctp;
void * t_ctp;
void * ctype;
};
static int
iconv_ucs_open(struct iconv_converter_class *dcp,
struct iconv_cspair *csp, struct iconv_cspair *cspf, void **dpp)
{
struct iconv_ucs *dp;
int i;
const char *from, *to;
dp = (struct iconv_ucs *)kobj_create((struct kobj_class*)dcp, M_ICONV, M_WAITOK);
to = csp->cp_to;
from = cspf ? cspf->cp_from : csp->cp_from;
dp->convtype = 0;
if (cspf)
dp->convtype |= KICONV_UCS_COMBINE;
for (i = 0; unicode_family[i].name; i++) {
if (strcmp(from, unicode_family[i].name) == 0)
dp->convtype |= unicode_family[i].from_flag;
if (strcmp(to, unicode_family[i].name) == 0)
dp->convtype |= unicode_family[i].to_flag;
}
if (strcmp(ENCODING_UNICODE, ENCODING_UTF16) == 0)
dp->convtype |= KICONV_UCS_UCS4;
else
dp->convtype &= ~KICONV_UCS_UCS4;
dp->f_ctp = dp->t_ctp = NULL;
if (dp->convtype & KICONV_UCS_COMBINE) {
if ((dp->convtype & KICONV_UCS_FROM_UTF8) == 0 &&
(dp->convtype & KICONV_UCS_FROM_LE) == 0) {
iconv_open(ENCODING_UNICODE, from, &dp->f_ctp);
}
if ((dp->convtype & KICONV_UCS_TO_UTF8) == 0 &&
(dp->convtype & KICONV_UCS_TO_LE) == 0) {
iconv_open(to, ENCODING_UNICODE, &dp->t_ctp);
}
}
dp->ctype = NULL;
if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_TO_UTF8))
iconv_open(KICONV_WCTYPE_NAME, ENCODING_UTF8, &dp->ctype);
dp->d_csp = csp;
if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE)) {
if (cspf) {
dp->d_cspf = cspf;
cspf->cp_refcount++;
} else
csp->cp_refcount++;
}
if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE))
csp->cp_refcount++;
*dpp = (void*)dp;
return 0;
}
static int
iconv_ucs_close(void *data)
{
struct iconv_ucs *dp = data;
if (dp->f_ctp)
iconv_close(dp->f_ctp);
if (dp->t_ctp)
iconv_close(dp->t_ctp);
if (dp->ctype)
iconv_close(dp->ctype);
if (dp->d_cspf)
dp->d_cspf->cp_refcount--;
else if (dp->convtype & (KICONV_UCS_FROM_UTF8 | KICONV_UCS_FROM_LE))
dp->d_csp->cp_refcount--;
if (dp->convtype & (KICONV_UCS_TO_UTF8 | KICONV_UCS_TO_LE))
dp->d_csp->cp_refcount--;
kobj_delete((struct kobj*)data, M_ICONV);
return 0;
}
static int
iconv_ucs_conv(void *d2p, const char **inbuf,
size_t *inbytesleft, char **outbuf, size_t *outbytesleft,
int convchar, int casetype)
{
struct iconv_ucs *dp = (struct iconv_ucs*)d2p;
int ret = 0, i;
size_t in, on, ir, or, inlen, outlen, ucslen;
const char *src, *p;
char *dst;
u_char ucs[4], *q;
uint32_t code;
if (inbuf == NULL || *inbuf == NULL || outbuf == NULL || *outbuf == NULL)
return 0;
ir = in = *inbytesleft;
or = on = *outbytesleft;
src = *inbuf;
dst = *outbuf;
while (ir > 0 && or > 0) {
/*
* The first half of conversion.
* (convert any code into ENCODING_UNICODE)
*/
code = 0;
p = src;
if (dp->convtype & KICONV_UCS_FROM_UTF8) {
/* convert UTF-8 to ENCODING_UNICODE */
inlen = 0;
code = utf8_to_ucs4(p, &inlen, ir);
if (code == 0) {
ret = -1;
break;
}
if (casetype == KICONV_FROM_LOWER && dp->ctype) {
code = towlower(code, dp->ctype);
} else if (casetype == KICONV_FROM_UPPER && dp->ctype) {
code = towupper(code, dp->ctype);
}
if ((code >= 0xd800 && code < 0xe000) || code >= 0x110000 ) {
/* reserved for utf-16 surrogate pair */
/* invalid unicode */
ret = -1;
break;
}
if (inlen == 4) {
if (dp->convtype & KICONV_UCS_UCS4) {
ucslen = 4;
code = encode_surrogate(code);
} else {
/* can't handle with ucs-2 */
ret = -1;
break;
}
} else {
ucslen = 2;
}
/* save UCS-4 into ucs[] */
for (q = ucs, i = ucslen - 1 ; i >= 0 ; i--)
*q++ = (code >> (i << 3)) & 0xff;
} else if (dp->convtype & KICONV_UCS_COMBINE && dp->f_ctp) {
/* convert local code to ENCODING_UNICODE */
ucslen = 4;
inlen = ir;
q = ucs;
ret = iconv_convchr_case(dp->f_ctp, &p, &inlen, (char **)&q,
&ucslen, casetype & (KICONV_FROM_LOWER | KICONV_FROM_UPPER));
if (ret)
break;
inlen = ir - inlen;
ucslen = 4 - ucslen;
} else {
/* src code is a proper subset of ENCODING_UNICODE */
q = ucs;
if (dp->convtype & KICONV_UCS_FROM_LE) {
*q = *(p + 1);
*(q + 1) = *p;
p += 2;
} else {
*q = *p++;
*(q + 1) = *p++;
}
if ((*q & 0xfc) == 0xd8) {
if (dp->convtype & KICONV_UCS_UCS4 &&
dp->convtype & KICONV_UCS_FROM_UTF16) {
inlen = ucslen = 4;
} else {
/* invalid unicode */
ret = -1;
break;
}
} else {
inlen = ucslen = 2;
}
if (ir < inlen) {
ret = -1;
break;
}
if (ucslen == 4) {
q += 2;
if (dp->convtype & KICONV_UCS_FROM_LE) {
*q = *(p + 1);
*(q + 1) = *p;
} else {
*q = *p++;
*(q + 1) = *p;
}
if ((*q & 0xfc) != 0xdc) {
/* invalid unicode */
ret = -1;
break;
}
}
}
/*
* The second half of conversion.
* (convert ENCODING_UNICODE into any code)
*/
p = ucs;
if (dp->convtype & KICONV_UCS_TO_UTF8) {
q = (u_char *)dst;
if (ucslen == 4 && dp->convtype & KICONV_UCS_UCS4) {
/* decode surrogate pair */
code = decode_surrogate(p);
} else {
code = (ucs[0] << 8) | ucs[1];
}
if (casetype == KICONV_LOWER && dp->ctype) {
code = towlower(code, dp->ctype);
} else if (casetype == KICONV_UPPER && dp->ctype) {
code = towupper(code, dp->ctype);
}
outlen = 0;
if (ucs4_to_utf8(code, q, &outlen, or) == NULL) {
ret = -1;
break;
}
src += inlen;
ir -= inlen;
dst += outlen;
or -= outlen;
} else if (dp->convtype & KICONV_UCS_COMBINE && dp->t_ctp) {
ret = iconv_convchr_case(dp->t_ctp, &p, &ucslen, &dst,
&or, casetype & (KICONV_LOWER | KICONV_UPPER));
if (ret)
break;
src += inlen;
ir -= inlen;
} else {
/* dst code is a proper subset of ENCODING_UNICODE */
if (or < ucslen) {
ret = -1;
break;
}
src += inlen;
ir -= inlen;
or -= ucslen;
if (dp->convtype & KICONV_UCS_TO_LE) {
*dst++ = *(p + 1);
*dst++ = *p;
p += 2;
} else {
*dst++ = *p++;
*dst++ = *p++;
}
if (ucslen == 4) {
if ((dp->convtype & KICONV_UCS_UCS4) == 0 ||
(dp->convtype & KICONV_UCS_TO_UTF16) == 0) {
ret = -1;
break;
}
if (dp->convtype & KICONV_UCS_TO_LE) {
*dst++ = *(p + 1);
*dst++ = *p;
} else {
*dst++ = *p++;
*dst++ = *p;
}
}
}
if (convchar == 1)
break;
}
*inbuf += in - ir;
*outbuf += on - or;
*inbytesleft -= in - ir;
*outbytesleft -= on - or;
return (ret);
}
static int
iconv_ucs_init(struct iconv_converter_class *dcp)
{
int error;
error = iconv_add(ENCODING_UNICODE, ENCODING_UNICODE, ENCODING_UTF8);
if (error)
return (error);
error = iconv_add(ENCODING_UNICODE, ENCODING_UTF8, ENCODING_UNICODE);
if (error)
return (error);
return (0);
}
static int
iconv_ucs_done(struct iconv_converter_class *dcp)
{
return (0);
}
static const char *
iconv_ucs_name(struct iconv_converter_class *dcp)
{
return (ENCODING_UNICODE);
}
static kobj_method_t iconv_ucs_methods[] = {
KOBJMETHOD(iconv_converter_open, iconv_ucs_open),
KOBJMETHOD(iconv_converter_close, iconv_ucs_close),
KOBJMETHOD(iconv_converter_conv, iconv_ucs_conv),
KOBJMETHOD(iconv_converter_init, iconv_ucs_init),
KOBJMETHOD(iconv_converter_done, iconv_ucs_done),
KOBJMETHOD(iconv_converter_name, iconv_ucs_name),
{0, 0}
};
KICONV_CONVERTER(ucs, sizeof(struct iconv_ucs));
static uint32_t
utf8_to_ucs4(const char *src, size_t *utf8width, size_t srclen)
{
size_t i, w = 0;
uint32_t ucs4 = 0;
/*
* get leading 1 byte from utf-8
*/
if ((*src & 0x80) == 0) {
/*
* leading 1 bit is "0"
* utf-8: 0xxxxxxx
* ucs-4: 00000000 00000000 00000000 0xxxxxxx
*/
w = 1;
/* get trailing 7 bits */
ucs4 = *src & 0x7f;
} else if ((*src & 0xe0) == 0xc0) {
/*
* leading 3 bits are "110"
* utf-8: 110xxxxx 10yyyyyy
* ucs-4: 00000000 00000000 00000xxx xxyyyyyy
*/
w = 2;
/* get trailing 5 bits */
ucs4 = *src & 0x1f;
} else if ((*src & 0xf0) == 0xe0) {
/*
* leading 4 bits are "1110"
* utf-8: 1110xxxx 10yyyyyy 10zzzzzz
* ucs-4: 00000000 00000000 xxxxyyyy yyzzzzzz
*/
w = 3;
/* get trailing 4 bits */
ucs4 = *src & 0x0f;
} else if ((*src & 0xf8) == 0xf0) {
/*
* leading 5 bits are "11110"
* utf-8: 11110www 10xxxxxx 10yyyyyy 10zzzzzz
* ucs-4: 00000000 000wwwxx xxxxyyyy yyzzzzzz
*/
w = 4;
/* get trailing 3 bits */
ucs4 = *src & 0x07;
} else {
/* out of utf-16 range or having illegal bits */
return (0);
}
if (w == 0)
return (0);
if (srclen < w)
return (0);
/*
* get left parts from utf-8
*/
for (i = 1 ; i < w ; i++) {
if ((*(src + i) & 0xc0) != 0x80) {
/* invalid: leading 2 bits are not "10" */
return (0);
}
/* concatenate trailing 6 bits into ucs4 */
ucs4 <<= 6;
ucs4 |= *(src + i) & 0x3f;
}
*utf8width = w;
return (ucs4);
}
static u_char *
ucs4_to_utf8(uint32_t ucs4, char *dst, size_t *utf8width, size_t dstlen)
{
u_char lead, *p;
size_t i, w;
/*
* determine utf-8 width and leading bits
*/
if (ucs4 < 0x80) {
w = 1;
lead = 0; /* "0" */
} else if (ucs4 < 0x800) {
w = 2;
lead = 0xc0; /* "11" */
} else if (ucs4 < 0x10000) {
w = 3;
lead = 0xe0; /* "111" */
} else if (ucs4 < 0x200000) {
w = 4;
lead = 0xf0; /* "1111" */
} else {
return (NULL);
}
if (dstlen < w)
return (NULL);
/*
* construct utf-8
*/
p = dst;
for (i = w - 1 ; i >= 1 ; i--) {
/* get trailing 6 bits and put it with leading bit as "1" */
*(p + i) = (ucs4 & 0x3f) | 0x80;
ucs4 >>= 6;
}
*p = ucs4 | lead;
*utf8width = w;
return (p);
}
static uint32_t
encode_surrogate(register uint32_t code)
{
return ((((code - 0x10000) << 6) & 0x3ff0000) |
((code - 0x10000) & 0x3ff) | 0xd800dc00);
}
static uint32_t
decode_surrogate(register const u_char *ucs)
{
return ((((ucs[0] & 0x3) << 18) | (ucs[1] << 10) |
((ucs[2] & 0x3) << 8) | ucs[3]) + 0x10000);
}

View file

@ -3,12 +3,13 @@
.PATH: ${.CURDIR}/../../libkern ${.CURDIR}/../../sys
KMOD= libiconv
SRCS= iconv.c iconv_xlat.c iconv_xlat16.c
SRCS= iconv.c iconv_ucs.c iconv_xlat.c iconv_xlat16.c
SRCS+= iconv.h
SRCS+= iconv_converter_if.c iconv_converter_if.h
MFILES= libkern/iconv_converter_if.m
EXPORT_SYMS= iconv_open \
EXPORT_SYMS= iconv_add \
iconv_open \
iconv_close \
iconv_conv \
iconv_conv_case \

View file

@ -11,6 +11,7 @@ EXPORT_SYMS= mb_init \
mb_detach \
mb_fixhdr \
mb_reserve \
mb_put_padbyte \
mb_put_uint8 \
mb_put_uint16be \
mb_put_uint16le \

View file

@ -444,13 +444,29 @@ smb_vc_create(struct smb_vcspec *vcspec,
goto fail;
if (vcspec->servercs[0]) {
error = (int)iconv_open(vcspec->servercs, vcspec->localcs,
&vcp->vc_toserver);
&vcp->vc_cp_toserver);
if (error)
goto fail;
error = (int)iconv_open(vcspec->localcs, vcspec->servercs,
&vcp->vc_tolocal);
&vcp->vc_cp_tolocal);
if (error)
goto fail;
vcp->vc_toserver = vcp->vc_cp_toserver;
vcp->vc_tolocal = vcp->vc_cp_tolocal;
iconv_add(ENCODING_UNICODE, ENCODING_UNICODE, SMB_UNICODE_NAME);
iconv_add(ENCODING_UNICODE, SMB_UNICODE_NAME, ENCODING_UNICODE);
error = (int)iconv_open(SMB_UNICODE_NAME, vcspec->localcs,
&vcp->vc_ucs_toserver);
if (!error) {
error = (int)iconv_open(vcspec->localcs, SMB_UNICODE_NAME,
&vcp->vc_ucs_tolocal);
}
if (error) {
if (vcp->vc_ucs_toserver)
iconv_close(vcp->vc_ucs_toserver);
vcp->vc_ucs_toserver = NULL;
vcp->vc_ucs_tolocal = NULL;
}
}
error = (int)smb_iod_create(vcp);
if (error)
@ -486,9 +502,17 @@ smb_vc_free(struct smb_connobj *cp)
if (vcp->vc_toupper)
iconv_close(vcp->vc_toupper);
if (vcp->vc_tolocal)
iconv_close(vcp->vc_tolocal);
vcp->vc_tolocal = NULL;
if (vcp->vc_toserver)
iconv_close(vcp->vc_toserver);
vcp->vc_toserver = NULL;
if (vcp->vc_cp_tolocal)
iconv_close(vcp->vc_cp_tolocal);
if (vcp->vc_cp_toserver)
iconv_close(vcp->vc_cp_toserver);
if (vcp->vc_ucs_tolocal)
iconv_close(vcp->vc_ucs_tolocal);
if (vcp->vc_ucs_toserver)
iconv_close(vcp->vc_ucs_toserver);
smb_co_done(VCTOCP(vcp));
smb_sl_destroy(&vcp->vc_stlock);
free(vcp, M_SMBCONN);

View file

@ -242,6 +242,10 @@ struct smb_vc {
void * vc_toupper; /* local charset */
void * vc_toserver; /* local charset to server one */
void * vc_tolocal; /* server charset to local one */
void * vc_cp_toserver; /* local charset to server one (using CodePage) */
void * vc_cp_tolocal; /* server charset to local one (using CodePage) */
void * vc_ucs_toserver; /* local charset to server one (using UCS-2) */
void * vc_ucs_tolocal; /* server charset to local one (using UCS-2) */
int vc_number; /* number of this VC from the client side */
int vc_genid;
uid_t vc_uid; /* user id of connection */
@ -272,6 +276,8 @@ struct smb_vc {
#define SMB_UNICODE_STRINGS(vcp) ((vcp)->vc_hflags2 & SMB_FLAGS2_UNICODE)
#define SMB_UNICODE_NAME "UCS-2LE"
/*
* smb_share structure describes connection to the given SMB share (tree).
* Connection to share is always built on top of the VC.

View file

@ -121,9 +121,17 @@ smb_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
u_int8_t wc, stime[8], sblen;
u_int16_t dindex, tw, tw1, swlen, bc;
int error, maxqsz;
int unicode = SMB_UNICODE_STRINGS(vcp);
void * servercharset = vcp->vc_toserver;
void * localcharset = vcp->vc_tolocal;
if (smb_smb_nomux(vcp, scred, __func__) != 0)
return EINVAL;
/* Disable Unicode for SMB_COM_NEGOTIATE requests */
if (unicode) {
vcp->vc_toserver = vcp->vc_cp_toserver;
vcp->vc_tolocal = vcp->vc_cp_tolocal;
}
vcp->vc_hflags = 0;
vcp->vc_hflags2 = 0;
vcp->obj.co_flags &= ~(SMBV_ENCRYPT);
@ -180,7 +188,7 @@ smb_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
SMBERROR("Unexpected length of security blob (%d)\n", sblen);
break;
}
error = md_get_uint16(mdp, &bc);
error = md_get_uint16le(mdp, &bc);
if (error)
break;
if (sp->sv_caps & SMB_CAP_EXT_SECURITY)
@ -193,6 +201,13 @@ smb_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
}
if (sp->sv_sm & SMB_SM_SIGS_REQUIRE)
vcp->vc_hflags2 |= SMB_FLAGS2_SECURITY_SIGNATURE;
if (vcp->vc_ucs_toserver &&
sp->sv_caps & SMB_CAP_UNICODE) {
/*
* They do Unicode.
*/
vcp->obj.co_flags |= SMBV_UNICODE;
}
vcp->vc_hflags2 |= SMB_FLAGS2_KNOWS_LONG_NAMES;
if (dp->d_id == SMB_DIALECT_NTLM0_12 &&
sp->sv_maxtx < 4096 &&
@ -200,7 +215,13 @@ smb_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
vcp->obj.co_flags |= SMBV_WIN95;
SMBSDEBUG("Win95 detected\n");
}
} else if (dp->d_id > SMB_DIALECT_CORE) {
error = 0;
break;
}
vcp->vc_hflags2 &= ~(SMB_FLAGS2_EXT_SEC|SMB_FLAGS2_DFS|
SMB_FLAGS2_ERR_STATUS|SMB_FLAGS2_UNICODE);
unicode = 0;
if (dp->d_id > SMB_DIALECT_CORE) {
md_get_uint16le(mdp, &tw);
sp->sv_sm = tw;
md_get_uint16le(mdp, &tw);
@ -217,7 +238,7 @@ smb_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
if (swlen > SMB_MAXCHALLENGELEN)
break;
md_get_uint16(mdp, NULL); /* mbz */
if (md_get_uint16(mdp, &bc) != 0)
if (md_get_uint16le(mdp, &bc) != 0)
break;
if (bc < swlen)
break;
@ -259,6 +280,12 @@ smb_smb_negotiate(struct smb_vc *vcp, struct smb_cred *scred)
SMBSDEBUG("MAXTX = %d\n", sp->sv_maxtx);
}
bad:
/* Restore Unicode conversion state */
if (unicode) {
vcp->vc_toserver = servercharset;
vcp->vc_tolocal = localcharset;
vcp->vc_hflags2 |= SMB_FLAGS2_UNICODE;
}
smb_rq_done(rqp);
return error;
}
@ -273,9 +300,13 @@ smb_smb_ssnsetup(struct smb_vc *vcp, struct smb_cred *scred)
smb_uniptr unipp, ntencpass = NULL;
char *pp, *up, *pbuf, *encpass;
int error, plen, uniplen, ulen, upper;
u_int32_t caps = 0;
upper = 0;
if (vcp->obj.co_flags & SMBV_UNICODE)
caps |= SMB_CAP_UNICODE;
again:
vcp->vc_smbuid = SMB_UID_UNKNOWN;
@ -374,8 +405,7 @@ smb_smb_ssnsetup(struct smb_vc *vcp, struct smb_cred *scred)
} else {
mb_put_uint16le(mbp, uniplen);
mb_put_uint32le(mbp, 0); /* reserved */
mb_put_uint32le(mbp, vcp->obj.co_flags & SMBV_UNICODE ?
SMB_CAP_UNICODE : 0);
mb_put_uint32le(mbp, caps);
smb_rq_wend(rqp);
smb_rq_bstart(rqp);
mb_put_mem(mbp, pp, plen, MB_MSYSTEM);
@ -477,24 +507,13 @@ smb_smb_treeconnect(struct smb_share *ssp, struct smb_cred *scred)
upper = 0;
again:
#if 0
/* Disable Unicode for SMB_COM_TREE_CONNECT_ANDX requests */
if (SSTOVC(ssp)->vc_hflags2 & SMB_FLAGS2_UNICODE) {
vcp = SSTOVC(ssp);
if (vcp->vc_toserver) {
iconv_close(vcp->vc_toserver);
/* Use NULL until UTF-8 -> ASCII works */
vcp->vc_toserver = NULL;
}
if (vcp->vc_tolocal) {
iconv_close(vcp->vc_tolocal);
/* Use NULL until ASCII -> UTF-8 works*/
vcp->vc_tolocal = NULL;
}
vcp->vc_toserver = vcp->vc_cp_toserver;
vcp->vc_tolocal = vcp->vc_cp_tolocal;
vcp->vc_hflags2 &= ~SMB_FLAGS2_UNICODE;
}
#endif
ssp->ss_tid = SMB_TID_UNKNOWN;
error = smb_rq_alloc(SSTOCP(ssp), SMB_COM_TREE_CONNECT_ANDX, scred, &rqp);
@ -559,6 +578,15 @@ smb_smb_treeconnect(struct smb_share *ssp, struct smb_cred *scred)
ssp->ss_tid = rqp->sr_rptid;
ssp->ss_vcgenid = vcp->vc_genid;
ssp->ss_flags |= SMBS_CONNECTED;
/*
* If the server can speak Unicode then switch
* our converters to do Unicode <--> Local
*/
if (vcp->obj.co_flags & SMBV_UNICODE) {
vcp->vc_toserver = vcp->vc_ucs_toserver;
vcp->vc_tolocal = vcp->vc_ucs_tolocal;
vcp->vc_hflags2 |= SMB_FLAGS2_UNICODE;
}
bad:
if (encpass)
free(encpass, M_SMBTEMP);

View file

@ -350,6 +350,8 @@ smb_put_dmem(struct mbchain *mbp, struct smb_vc *vcp, const char *src,
}
mbp->mb_copy = smb_copy_iconv;
mbp->mb_udata = dp;
if (SMB_UNICODE_STRINGS(vcp))
mb_put_padbyte(mbp);
return mb_put_mem(mbp, src, size, MB_MCUSTOM);
}
@ -362,6 +364,8 @@ smb_put_dstring(struct mbchain *mbp, struct smb_vc *vcp, const char *src,
error = smb_put_dmem(mbp, vcp, src, strlen(src), caseopt);
if (error)
return error;
if (SMB_UNICODE_STRINGS(vcp))
return mb_put_uint16le(mbp, 0);
return mb_put_uint8(mbp, 0);
}

View file

@ -47,6 +47,7 @@
#define KICONV_FROM_UPPER 8 /* toupper source character, then convert */
#define KICONV_WCTYPE 16 /* towlower/towupper characters */
#define ENCODING_UNICODE "UTF-16BE"
#define KICONV_WCTYPE_NAME "_wctype"
/*
@ -85,7 +86,6 @@ struct iconv_add_out {
__BEGIN_DECLS
#define ENCODING_UNICODE "UTF-16BE"
#define KICONV_VENDOR_MICSFT 1 /* Microsoft Vendor Code for quirk */
int kiconv_add_xlat_table(const char *, const char *, const u_char *);
@ -162,6 +162,7 @@ int iconv_convchr(void *handle, const char **inbuf,
size_t *inbytesleft, char **outbuf, size_t *outbytesleft);
int iconv_convchr_case(void *handle, const char **inbuf,
size_t *inbytesleft, char **outbuf, size_t *outbytesleft, int casetype);
int iconv_add(const char *converter, const char *to, const char *from);
char* iconv_convstr(void *handle, char *dst, const char *src);
void* iconv_convmem(void *handle, void *dst, const void *src, int size);
int iconv_vfs_refcount(const char *fsname);

View file

@ -67,6 +67,7 @@ struct mbuf *mb_detach(struct mbchain *mbp);
int mb_fixhdr(struct mbchain *mbp);
caddr_t mb_reserve(struct mbchain *mbp, int size);
int mb_put_padbyte(struct mbchain *mbp);
int mb_put_uint8(struct mbchain *mbp, u_int8_t x);
int mb_put_uint16be(struct mbchain *mbp, u_int16_t x);
int mb_put_uint16le(struct mbchain *mbp, u_int16_t x);