Merge branch 'jc/latin-1'

Some platforms no longer understand "latin-1" that is still seen in
the wild in e-mail headers; replace them with "iso-8859-1" that is
more widely known when conversion fails from/to it.

* jc/latin-1:
  utf8: accept "latin-1" as ISO-8859-1
  utf8: refactor code to decide fallback encoding
This commit is contained in:
Junio C Hamano 2017-01-10 15:24:22 -08:00
commit f01c1ff4f7

36
utf8.c
View file

@ -489,6 +489,28 @@ char *reencode_string_iconv(const char *in, size_t insz, iconv_t conv, int *outs
return out;
}
static const char *fallback_encoding(const char *name)
{
/*
* Some platforms do not have the variously spelled variants of
* UTF-8, so let's fall back to trying the most official
* spelling. We do so only as a fallback in case the platform
* does understand the user's spelling, but not our official
* one.
*/
if (is_encoding_utf8(name))
return "UTF-8";
/*
* Even though latin-1 is still seen in e-mail
* headers, some platforms only install ISO-8859-1.
*/
if (!strcasecmp(name, "latin-1"))
return "ISO-8859-1";
return name;
}
char *reencode_string_len(const char *in, int insz,
const char *out_encoding, const char *in_encoding,
int *outsz)
@ -501,17 +523,9 @@ char *reencode_string_len(const char *in, int insz,
conv = iconv_open(out_encoding, in_encoding);
if (conv == (iconv_t) -1) {
/*
* Some platforms do not have the variously spelled variants of
* UTF-8, so let's fall back to trying the most official
* spelling. We do so only as a fallback in case the platform
* does understand the user's spelling, but not our official
* one.
*/
if (is_encoding_utf8(in_encoding))
in_encoding = "UTF-8";
if (is_encoding_utf8(out_encoding))
out_encoding = "UTF-8";
in_encoding = fallback_encoding(in_encoding);
out_encoding = fallback_encoding(out_encoding);
conv = iconv_open(out_encoding, in_encoding);
if (conv == (iconv_t) -1)
return NULL;