24275: fixes for multibyte characters on Solaris

2024-07-21 10:14:19 +00:00 · 2007-12-17 17:11:29 +00:00 · 2007-12-17 17:11:29 +00:00 · 20607774dc
parent 1ac4f6a77f
commit 20607774dc
4 changed files with 43 additions and 9 deletions
--- a/5
+++ b/5
@ -1,5 +1,10 @@
 2007-12-17  Peter Stephenson  <pws@csr.com>

+	* 24275: Src/builtin.c, Src/Zle/zle_utils.c,
+	Test/D07multibyte.ztst: Solaris returns the full character
+	length from mbrlen() etc. even if the call started in the
+	middle; bad characters are silently converted to a question mark.
+
 	* unposted: Config/version.mk: 4.3.4-dev-5.

 	* unposted: Src/lex.c: minor typo
--- a/Src/Zle/zle_utils.c
+++ b/Src/Zle/zle_utils.c
@ -294,6 +294,16 @@ stringaszleline(char *instr, int incs, int *outll, int *outsz, int *outcs)
 		 * (certainly true for Unicode and unlikely to be false
 		 * in any non-pathological multibyte representation). */
 		cnt = 1;
+	    } else if (cnt > ll) {
+		/*
+		 * Some multibyte implementations return the
+		 * full length of a previous incomplete character
+		 * instead of the remaining length.
+		 * This is paranoia: it only applies if we start
+		 * midway through a multibyte character, which
+		 * presumably can't happen.
+		 */
+		cnt = ll;
 	    }

 	    if (outcs) {
@ -843,6 +853,12 @@ showmsg(char const *msg)
 		cnt = 1;
 		/* FALL THROUGH */
 	    default:
+		/*
+		 * Paranoia: only needed if we start in the middle
+		 * of a multibyte string and only in some implementations.
+		 */
+		if (cnt > ulen)
+		    cnt = ulen;
 		n = wcs_nicechar(c, &width, NULL);
 		break;
 	    }
--- a/Src/builtin.c
+++ b/Src/builtin.c
@ -4927,7 +4927,7 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 		    break;
 		}
 		*bptr = (char) val;
-#ifdef MULTIBYTE_SUPPORT	
+#ifdef MULTIBYTE_SUPPORT
 		if (isset(MULTIBYTE)) {
 		    ret = mbrlen(bptr++, 1, &mbs);
 		    if (ret == MB_INVALID)
@ -4954,8 +4954,8 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 		    eof = 1;
 		    break;
 		}
-	    
-#ifdef MULTIBYTE_SUPPORT	
+
+#ifdef MULTIBYTE_SUPPORT
 		if (isset(MULTIBYTE)) {
 		    while (val > 0) {
 			ret = mbrlen(bptr, val, &mbs);
@ -4970,6 +4970,10 @@ bin_read(char *name, char **args, Options ops, UNUSED(int func))
 			    }
 			    else if (ret == 0) /* handle null as normal char */
 				ret = 1;
+			    else if (ret > val) {
+				/* Some mbrlen()s return the full char len */
+				ret = val;
+			    }
 			    nchars--;
 			    val -= ret;
 			    bptr += ret;
--- a/Test/D07multibyte.ztst
+++ b/Test/D07multibyte.ztst
@ -388,9 +388,18 @@
 # This also isn't strictly multibyte and is here to reduce the
 # likelihood of a "can't do character set conversion" error.
  testfn() { (LC_ALL=C; print $'\u00e9') }
-  repeat 4 testfn
-1:error handling in Unicode quoting
-?testfn: character not in range
-?testfn: character not in range
-?testfn: character not in range
-?testfn: character not in range
+  repeat 4 testfn 2>&1 | while read line; do
+    if [[ $line = *"character not in range"* ]]; then
+      print OK
+    elif [[ $line = "?" ]]; then
+      print OK
+    else
+      print Failed: no error message and no question mark
+    fi
+  done
+  true
+0:error handling in Unicode quoting
+>OK
+>OK
+>OK
+>OK