From cce8d6fdb4d7170a73763586daf6ac4f6b8fce2c Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Fri, 16 May 2008 14:03:30 +0100 Subject: [PATCH 1/3] mailsplit and mailinfo: gracefully handle NUL characters The function fgets() has a big problem with NUL characters: it reads them, but nobody will know if the NUL comes from the file stream, or was appended at the end of the line. So implement a custom read_line_with_nul() function. Noticed by Tommy Thorn. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- builtin-mailinfo.c | 24 +++++++++++++----------- builtin-mailsplit.c | 27 +++++++++++++++++++++++---- builtin.h | 1 + t/t5100-mailinfo.sh | 9 +++++++++ t/t5100/nul | Bin 0 -> 91 bytes 5 files changed, 46 insertions(+), 15 deletions(-) create mode 100644 t/t5100/nul diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index 11f154b31f..f0c420976f 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -641,7 +641,7 @@ static void decode_transfer_encoding(char *line, unsigned linesize) } } -static int handle_filter(char *line, unsigned linesize); +static int handle_filter(char *line, unsigned linesize, int linelen); static int find_boundary(void) { @@ -669,7 +669,7 @@ static int handle_boundary(void) "can't recover\n"); exit(1); } - handle_filter(newline, sizeof(newline)); + handle_filter(newline, sizeof(newline), strlen(newline)); /* skip to the next boundary */ if (!find_boundary()) @@ -759,14 +759,14 @@ static int handle_commit_msg(char *line, unsigned linesize) return 0; } -static int handle_patch(char *line) +static int handle_patch(char *line, int len) { - fputs(line, patchfile); + fwrite(line, 1, len, patchfile); patch_lines++; return 0; } -static int handle_filter(char *line, unsigned linesize) +static int handle_filter(char *line, unsigned linesize, int linelen) { static int filter = 0; @@ -779,7 +779,7 @@ static int handle_filter(char *line, unsigned linesize) break; filter++; case 1: - if (!handle_patch(line)) + if (!handle_patch(line, linelen)) break; filter++; default: @@ -794,6 +794,7 @@ static void handle_body(void) int rc = 0; static char newline[2000]; static char *np = newline; + int len = strlen(line); /* Skip up to the first boundary */ if (content_top->boundary) { @@ -807,7 +808,8 @@ static void handle_body(void) /* flush any leftover */ if ((transfer_encoding == TE_BASE64) && (np != newline)) { - handle_filter(newline, sizeof(newline)); + handle_filter(newline, sizeof(newline), + strlen(newline)); } if (!handle_boundary()) return; @@ -824,7 +826,7 @@ static void handle_body(void) /* binary data most likely doesn't have newlines */ if (message_type != TYPE_TEXT) { - rc = handle_filter(line, sizeof(newline)); + rc = handle_filter(line, sizeof(line), len); break; } @@ -841,7 +843,7 @@ static void handle_body(void) /* should be sitting on a new line */ *(++np) = 0; op++; - rc = handle_filter(newline, sizeof(newline)); + rc = handle_filter(newline, sizeof(newline), np - newline); np = newline; } } while (*op != 0); @@ -851,12 +853,12 @@ static void handle_body(void) break; } default: - rc = handle_filter(line, sizeof(newline)); + rc = handle_filter(line, sizeof(line), len); } if (rc) /* nothing left to filter */ break; - } while (fgets(line, sizeof(line), fin)); + } while ((len = read_line_with_nul(line, sizeof(line), fin))); return; } diff --git a/builtin-mailsplit.c b/builtin-mailsplit.c index 46b27cdaea..e4d977bafb 100644 --- a/builtin-mailsplit.c +++ b/builtin-mailsplit.c @@ -45,6 +45,25 @@ static int is_from_line(const char *line, int len) /* Could be as small as 64, enough to hold a Unix "From " line. */ static char buf[4096]; +/* We cannot use fgets() because our lines can contain NULs */ +int read_line_with_nul(char *buf, int size, FILE *in) +{ + int len = 0, c; + + for (;;) { + c = getc(in); + buf[len++] = c; + if (c == EOF || c == '\n' || len + 1 >= size) + break; + } + + if (c == EOF) + len--; + buf[len] = '\0'; + + return len; +} + /* Called with the first line (potentially partial) * already in buf[] -- normally that should begin with * the Unix "From " line. Write it into the specified @@ -70,19 +89,19 @@ static int split_one(FILE *mbox, const char *name, int allow_bare) * "From " and having something that looks like a date format. */ for (;;) { - int is_partial = (buf[len-1] != '\n'); + int is_partial = len && buf[len-1] != '\n'; - if (fputs(buf, output) == EOF) + if (fwrite(buf, 1, len, output) != len) die("cannot write output"); - if (fgets(buf, sizeof(buf), mbox) == NULL) { + len = read_line_with_nul(buf, sizeof(buf), mbox); + if (len == 0) { if (feof(mbox)) { status = 1; break; } die("cannot read mbox"); } - len = strlen(buf); if (!is_partial && !is_bare && is_from_line(buf, len)) break; /* done with one message */ } diff --git a/builtin.h b/builtin.h index 95126fd0c1..48f1332001 100644 --- a/builtin.h +++ b/builtin.h @@ -9,6 +9,7 @@ extern const char git_usage_string[]; extern void list_common_cmds_help(void); extern void help_unknown_cmd(const char *cmd); extern void prune_packed_objects(int); +extern int read_line_with_nul(char *buf, int size, FILE *file); extern int cmd_add(int argc, const char **argv, const char *prefix); extern int cmd_annotate(int argc, const char **argv, const char *prefix); diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh index d6c55c1157..5a4610b860 100755 --- a/t/t5100-mailinfo.sh +++ b/t/t5100-mailinfo.sh @@ -25,4 +25,13 @@ do diff ../t5100/info$mail info$mail" done +test_expect_success 'respect NULs' ' + + git mailsplit -d3 -o. ../t5100/nul && + cmp ../t5100/nul 001 && + (cat 001 | git mailinfo msg patch) && + test 4 = $(wc -l < patch) + +' + test_done diff --git a/t/t5100/nul b/t/t5100/nul new file mode 100644 index 0000000000000000000000000000000000000000..3d40691787b855cc0133514a19052492eb853d21 GIT binary patch literal 91 zcmW;6y$ygM5C%|6a#MT@Tm%~v2e7kZ0t`Q)fHOej_C}MJcXX*}a!Gh_N`s3x>;_}@ mA68>55i?ULDS Date: Sun, 25 May 2008 01:16:05 -0700 Subject: [PATCH 2/3] mailinfo: apply the same fix not to lose NULs in BASE64 and QP codepaths Signed-off-by: Junio C Hamano --- builtin-mailinfo.c | 46 ++++++++++++++++++++++------------------- t/t5100-mailinfo.sh | 9 ++++++++ t/t5100/nul-b64.expect | Bin 0 -> 1672 bytes t/t5100/nul-b64.in | 37 +++++++++++++++++++++++++++++++++ 4 files changed, 71 insertions(+), 21 deletions(-) create mode 100644 t/t5100/nul-b64.expect create mode 100644 t/t5100/nul-b64.in diff --git a/builtin-mailinfo.c b/builtin-mailinfo.c index f0c420976f..e1e094f29e 100644 --- a/builtin-mailinfo.c +++ b/builtin-mailinfo.c @@ -434,6 +434,7 @@ static int read_one_header_line(char *line, int sz, FILE *in) static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int rfc2047) { + char *otbegin = ot; char *otend = ot + otsize; int c; while ((c = *in++) != 0 && (in <= ep)) { @@ -453,13 +454,14 @@ static int decode_q_segment(char *in, char *ot, unsigned otsize, char *ep, int r *ot++ = c; } *ot = 0; - return 0; + return (ot - otbegin); } static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep) { /* Decode in..ep, possibly in-place to ot */ int c, pos = 0, acc = 0; + char *otbegin = ot; char *otend = ot + otsize; while ((c = *in++) != 0 && (in <= ep)) { @@ -505,7 +507,7 @@ static int decode_b_segment(char *in, char *ot, unsigned otsize, char *ep) } } *ot = 0; - return 0; + return (ot - otbegin); } /* @@ -623,21 +625,20 @@ static void decode_header(char *it, unsigned itsize) convert_to_utf8(it, itsize, ""); } -static void decode_transfer_encoding(char *line, unsigned linesize) +static int decode_transfer_encoding(char *line, unsigned linesize, int inputlen) { char *ep; switch (transfer_encoding) { case TE_QP: - ep = line + strlen(line); - decode_q_segment(line, line, linesize, ep, 0); - break; + ep = line + inputlen; + return decode_q_segment(line, line, linesize, ep, 0); case TE_BASE64: - ep = line + strlen(line); - decode_b_segment(line, line, linesize, ep); - break; + ep = line + inputlen; + return decode_b_segment(line, line, linesize, ep); case TE_DONTCARE: - break; + default: + return inputlen; } } @@ -806,17 +807,19 @@ static void handle_body(void) /* process any boundary lines */ if (content_top->boundary && is_multipart_boundary(line)) { /* flush any leftover */ - if ((transfer_encoding == TE_BASE64) && - (np != newline)) { + if (np != newline) handle_filter(newline, sizeof(newline), - strlen(newline)); - } + np - newline); if (!handle_boundary()) return; } /* Unwrap transfer encoding */ - decode_transfer_encoding(line, sizeof(line)); + len = decode_transfer_encoding(line, sizeof(line), len); + if (len < 0) { + error("Malformed input line"); + return; + } switch (transfer_encoding) { case TE_BASE64: @@ -830,13 +833,13 @@ static void handle_body(void) break; } - /* this is a decoded line that may contain + /* + * This is a decoded line that may contain * multiple new lines. Pass only one chunk * at a time to handle_filter() */ - do { - while (*op != '\n' && *op != 0) + while (op < line + len && *op != '\n') *np++ = *op++; *np = *op; if (*np != 0) { @@ -846,9 +849,10 @@ static void handle_body(void) rc = handle_filter(newline, sizeof(newline), np - newline); np = newline; } - } while (*op != 0); - /* the partial chunk is saved in newline and - * will be appended by the next iteration of fgets + } while (op < line + len); + /* + * The partial chunk is saved in newline and will be + * appended by the next iteration of read_line_with_nul(). */ break; } diff --git a/t/t5100-mailinfo.sh b/t/t5100-mailinfo.sh index 5a4610b860..a8b78ebf7d 100755 --- a/t/t5100-mailinfo.sh +++ b/t/t5100-mailinfo.sh @@ -34,4 +34,13 @@ test_expect_success 'respect NULs' ' ' +test_expect_success 'Preserve NULs out of MIME encoded message' ' + + git mailsplit -d5 -o. ../t5100/nul-b64.in && + cmp ../t5100/nul-b64.in 00001 && + git mailinfo msg patch <00001 && + cmp ../t5100/nul-b64.expect patch + +' + test_done diff --git a/t/t5100/nul-b64.expect b/t/t5100/nul-b64.expect new file mode 100644 index 0000000000000000000000000000000000000000..d7d680f631b14ea75adf34ba5052043be311e72f GIT binary patch literal 1672 zcmbVNU2hvV5cD&D#XdIB-Bn6#D}5=PAbkkX1_pxu0E%3l7T}Q=ei#kVAKxJ2!erMlL({H?ed*FN8ZMXNgw`v^6 zbG_PZHgsO|@Id2k*uAAqyFNT9`ZGjcNY+#qvcZK&scyA!9>}06HkotbNZQIO$%vnc zo#ctWmb9|Bk`k+zMU+cK8S%`SIAt;=rjp2M?88{~!O6Fx7g}_Qk|uhLniKo1#;4#) zUNveSV}O46t|b=Kd@0UpD$#37dKLBqS=RM2&Os9c&LPm(paDR&A`0*sAtWjAASl4Vk)#?PKWiGr2Toq>0*+UrSU z>qM8){T1Rtr~`XBoX*2v)aIu!ZY~%%7YvX$Wl6gFr&;}5rKHb}Oa{Tjz%Z_%S)AC* zQoFRWz@b%;?%IKT@K?j_zrI+%O5M#3)6~k!_E}h6QDlIS3;Y64C4{SO$Z~`O;Rg7m zn2aPsGjtMbB)&rTbFFGAz905TalPWPP6c6`=#N}o)vVPVIKYUpH{XN@^d-g`JDo8R zwn23lb_j*HPk?w%&gJ$zTajFZhaaz0~aZgG2xSqX!RdvmwX8d_kJ1yHxFw5Ye zyfEH5SM&CG(y9GPPUhl(*qnP3D279 +Date: Sun, 25 May 2008 00:38:18 -0700 +Subject: [PATCH] second +Content-Transfer-Encoding: base64 + +LS0tCiBmaWxlIHwgIEJpbiAxMzU3IC0+IDEzNTcgYnl0ZXMKIDEgZmlsZXMgY2hhbmdlZCwg +MCBpbnNlcnRpb25zKCspLCAwIGRlbGV0aW9ucygtKQoKZGlmZiAtLWdpdCBhL2ZpbGUgYi9m +aWxlCmluZGV4IDc3MzYxZDguLjllMDJiZTYgMTAwNjQ0Ci0tLSBhL2ZpbGUKKysrIGIvZmls +ZQpAQCAtMSwxMiArMSwxMiBAQAogTG9yZW0gaXBzdW0gZG9sb3Igc2l0IGFtZXQsIGNvbnNl +Y3RldHVlciBhZGlwaXNjaW5nIGVsaXQuIFN1c3BlbmRpc3NlCiBzaXQgYW1ldCB0dXJwaXMg +ZWdldCBlc3QgY3Vyc3VzIGxhb3JlZXQuIEFsaXF1YW0gbWF1cmlzLiBQcmFlc2VudAotdm9s +dXRwYXQuIFByb2luIGluIHB1cnVzLiBOdWxsYSB1cm5hIHNhcGllbiwgZGFwaWJ1cyBzaXQg +YW1ldCwKK3ZvbHV0cGF0LiBQcm9pbiBpbiBwdXJ1cy4gTnVsbGEgdXJuYSBzYXBpZW4sIGRh +cGkAdXMgc2l0IGFtZXQsCiBoZW5kcmVyaXQgbmVjLCB0ZW1wdXMgZXUsIG1pLiBVdCBwb3J0 +YSwgbGVvIGlkIHRpbmNpZHVudCB1bGxhbWNvcnBlciwKLXZlbGl0IGZlbGlzIHRyaXN0aXF1 +ZSBhbnRlLCBhdCBsb2JvcnRpcyBkaWFtIHBlZGUgdXQgZHVpLiBQcm9pbiBhYwordmVsaXQg +ZmVsaXMgdHJpc3RpcXVlIGFudGUsIGF0IGxvAG9ydGlzIGRpYW0gcGVkZSB1dCBkdWkuIFBy +b2luIGFjCiBsZWN0dXMuIERvbmVjIGF0IG1hc3NhIGFjIGlwc3VtIGhlbmRyZXJpdCBzb2xs +aWNpdHVkaW4uIE5hbSBkaWN0dW0KIG5pc2kgc2VkIG1pLiBEdWlzIHNlZCBhbnRlLiBVdCB2 +aXRhZSBlc3QgdXQgZHVpIHVsdHJpY2llcyBkaWduaXNzaW0uCiAKLUluIHZlbCBvZGlvIGVn +ZXQgbmlzbCBjb252YWxsaXMgdm9sdXRwYXQuIE1vcmJpIHZpdGFlIG5pYmguIE51bGxhbQor +SW4gdmVsIG9kaW8gZWdldCBuaXNsIGNvbnZhbGxpcyB2b2x1dHBhdC4gTW9yAGkgdml0YWUg +bmkAaC4gTnVsbGFtCiBhY2N1bXNhbiwgZG9sb3IgcXVpcyBhbGlxdWFtIHNjZWxlcmlzcXVl +LCBlbGl0IGVuaW0gY29uZGltZW50dW0KIG1hdXJpcywgbm9uIHRyaXN0aXF1ZSBtYXVyaXMg +dHVycGlzIGV0IG1hdXJpcy4gVXQgbm9uIG5pc2wuIE5hbSBkaWFtCiBtaSwgc2VtcGVyIHBv +c3VlcmUsIGVsZWlmZW5kIHV0LCBhdWN0b3IgdmVsLCBlcmF0LiBTZWQgcG9zdWVyZQpAQCAt +MTYsNyArMTYsNyBAQCBzZWQgZXN0LiBFdGlhbSBkaWFtIGZlbGlzLCBmZXJtZW50dW0gZWdl +dCwgYWRpcGlzY2luZyBhdCwgcG9zdWVyZSBpbiwKIGR1aS4gRXRpYW0gbHVjdHVzLgogCiBO +dWxsYSBpZCBhdWd1ZS4gTmFtIGlhY3VsaXMgYWNjdW1zYW4gbmlzaS4gU3VzcGVuZGlzc2Ug +cG90ZW50aS4gTnVuYwotdmFyaXVzIGF1Z3VlIG5lYyBvcmNpLiBVdCBjb25kaW1lbnR1bSBk +b2xvciBzYWdpdHRpcyBuaWJoLiBTdXNwZW5kaXNzZQordmFyaXVzIGF1Z3VlIG5lYyBvcmNp +LiBVdCBjb25kaW1lbnR1bSBkb2xvciBzYWdpdHRpcyBuaQBoLiBTdXNwZW5kaXNzZQogdGVt +cG9yIGxlY3R1cyBzZWQgbWFnbmEuIFN1c3BlbmRpc3NlIHBvdGVudGkuIE51bGxhbSB0ZW1w +b3IgaXBzdW0uIFNlZAogbW9sZXN0aWUgdGVsbHVzLiBQaGFzZWxsdXMgbGlndWxhLiBJbiB2 +ZWhpY3VsYSB1bHRyaWNlcwogbmlzaS4gU3VzcGVuZGlzc2UgZmVsaXMgYXVndWUsIHBlbGxl +bnRlc3F1ZSBhdCwgZGljdHVtIHZpdmVycmEsCi0tIAoxLjUuNS4xLjU0MC5nNTc3ODAKCg== From edc5594153d7246694c1ec5bdb8ccdaa97bf7daf Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Sun, 25 May 2008 01:23:55 -0700 Subject: [PATCH 3/3] mailsplit: minor clean-up in read_line_with_nul() Signed-off-by: Junio C Hamano --- builtin-mailsplit.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/builtin-mailsplit.c b/builtin-mailsplit.c index e4d977bafb..ae2b4cb21b 100644 --- a/builtin-mailsplit.c +++ b/builtin-mailsplit.c @@ -52,13 +52,12 @@ int read_line_with_nul(char *buf, int size, FILE *in) for (;;) { c = getc(in); + if (c == EOF) + break; buf[len++] = c; - if (c == EOF || c == '\n' || len + 1 >= size) + if (c == '\n' || len + 1 >= size) break; } - - if (c == EOF) - len--; buf[len] = '\0'; return len;