notepad: Improve encoding detection when opening files.

This commit is contained in:
Alexander Scott-Johns 2009-06-29 22:24:59 +01:00 committed by Alexandre Julliard
parent 42729bc1c1
commit 8b6b7b2c39
2 changed files with 98 additions and 7 deletions

View file

@ -26,6 +26,7 @@
#include <windows.h> #include <windows.h>
#include <commdlg.h> #include <commdlg.h>
#include <shlwapi.h> #include <shlwapi.h>
#include <winternl.h>
#include "main.h" #include "main.h"
#include "dialog.h" #include "dialog.h"
@ -37,6 +38,13 @@ static const WCHAR helpfileW[] = { 'n','o','t','e','p','a','d','.','h','l','p',0
static INT_PTR WINAPI DIALOG_PAGESETUP_DlgProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam); static INT_PTR WINAPI DIALOG_PAGESETUP_DlgProc(HWND hDlg, UINT msg, WPARAM wParam, LPARAM lParam);
/* Swap bytes of WCHAR buffer (big-endian <-> little-endian). */
static inline void byteswap_wide_string(LPWSTR str, UINT num)
{
UINT i;
for (i = 0; i < num; i++) str[i] = RtlUshortByteSwap(str[i]);
}
VOID ShowLastError(void) VOID ShowLastError(void)
{ {
DWORD error = GetLastError(); DWORD error = GetLastError();
@ -195,6 +203,43 @@ BOOL DoCloseFile(void)
return(TRUE); return(TRUE);
} }
static inline ENCODING detect_encoding_of_buffer(const void* buffer, int size)
{
static const char bom_utf8[] = { 0xef, 0xbb, 0xbf };
if (size >= sizeof(bom_utf8) && !memcmp(buffer, bom_utf8, sizeof(bom_utf8)))
return ENCODING_UTF8;
else
{
int flags = IS_TEXT_UNICODE_SIGNATURE |
IS_TEXT_UNICODE_REVERSE_SIGNATURE |
IS_TEXT_UNICODE_ODD_LENGTH;
IsTextUnicode(buffer, size, &flags);
if (flags & IS_TEXT_UNICODE_SIGNATURE)
return ENCODING_UTF16LE;
else if (flags & IS_TEXT_UNICODE_REVERSE_SIGNATURE)
return ENCODING_UTF16BE;
else
return ENCODING_ANSI;
}
}
/* Similar to SetWindowTextA, but uses a CP_UTF8 encoded input, not CP_ACP.
* lpTextInUtf8 should be NUL-terminated and not include the BOM.
*
* Returns FALSE on failure, TRUE on success, like SetWindowTextA/W.
*/
static BOOL SetWindowTextUtf8(HWND hwnd, LPCSTR lpTextInUtf8)
{
BOOL ret;
int lenW = MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, NULL, 0);
LPWSTR textW = HeapAlloc(GetProcessHeap(), 0, lenW * sizeof(WCHAR));
if (!textW)
return FALSE;
MultiByteToWideChar(CP_UTF8, 0, lpTextInUtf8, -1, textW, lenW);
ret = SetWindowTextW(hwnd, textW);
HeapFree(GetProcessHeap(), 0, textW);
return ret;
}
void DoOpenFile(LPCWSTR szFileName) void DoOpenFile(LPCWSTR szFileName)
{ {
@ -203,6 +248,8 @@ void DoOpenFile(LPCWSTR szFileName)
LPSTR pTemp; LPSTR pTemp;
DWORD size; DWORD size;
DWORD dwNumRead; DWORD dwNumRead;
ENCODING enc;
BOOL succeeded;
WCHAR log[5]; WCHAR log[5];
/* Close any files and prompt to save changes */ /* Close any files and prompt to save changes */
@ -224,9 +271,9 @@ void DoOpenFile(LPCWSTR szFileName)
ShowLastError(); ShowLastError();
return; return;
} }
size++;
pTemp = HeapAlloc(GetProcessHeap(), 0, size); /* Extra memory for (WCHAR)'\0'-termination. */
pTemp = HeapAlloc(GetProcessHeap(), 0, size+2);
if (!pTemp) if (!pTemp)
{ {
CloseHandle(hFile); CloseHandle(hFile);
@ -243,12 +290,48 @@ void DoOpenFile(LPCWSTR szFileName)
} }
CloseHandle(hFile); CloseHandle(hFile);
pTemp[dwNumRead] = 0;
if((size -1) >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe) size = dwNumRead;
SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1); pTemp[size] = 0; /* make sure it's (char)'\0'-terminated */
else pTemp[size+1] = 0; /* make sure it's (WCHAR)'\0'-terminated */
SetWindowTextA(Globals.hEdit, pTemp);
enc = detect_encoding_of_buffer(pTemp, size);
/* SetWindowTextUtf8 and SetWindowTextA try to allocate memory, so we
* check if they succeed.
*/
switch (enc)
{
case ENCODING_UTF16BE:
byteswap_wide_string((WCHAR*) pTemp, size/sizeof(WCHAR));
/* fall through */
case ENCODING_UTF16LE:
if (size >= 2 && (BYTE)pTemp[0] == 0xff && (BYTE)pTemp[1] == 0xfe)
succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp + 1);
else
succeeded = SetWindowTextW(Globals.hEdit, (LPWSTR)pTemp);
break;
case ENCODING_UTF8:
if (size >= 3 && (BYTE)pTemp[0] == 0xef && (BYTE)pTemp[1] == 0xbb &&
(BYTE)pTemp[2] == 0xbf)
succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp+3);
else
succeeded = SetWindowTextUtf8(Globals.hEdit, pTemp);
break;
default:
succeeded = SetWindowTextA(Globals.hEdit, pTemp);
break;
}
if (!succeeded)
{
ShowLastError();
HeapFree(GetProcessHeap(), 0, pTemp);
return;
}
HeapFree(GetProcessHeap(), 0, pTemp); HeapFree(GetProcessHeap(), 0, pTemp);

View file

@ -25,6 +25,14 @@
#define MAX_STRING_LEN 255 #define MAX_STRING_LEN 255
typedef enum
{
ENCODING_ANSI,
ENCODING_UTF16LE,
ENCODING_UTF16BE,
ENCODING_UTF8
} ENCODING;
typedef struct typedef struct
{ {
HANDLE hInstance; HANDLE hInstance;