okular/xpdf/CharCodeToUnicode.cc
Albert Astals Cid 3abd57ea99 - Put xpdf 3.00 in the xpdf, goo and fofi drawers to bring the PDF 1.5 support that version has to kpdf
- Remove not used files of xpdf (images, etc) that we don't need to distribute

Please test if your kpdf behaves equally that older versions (it should only have somewhat better support to PDF 1.5)

svn path=/trunk/kdegraphics/kpdf/; revision=339928
2004-08-23 00:05:01 +00:00

533 lines
12 KiB
C++

//========================================================================
//
// CharCodeToUnicode.cc
//
// Copyright 2001-2003 Glyph & Cog, LLC
//
//========================================================================
#include <aconf.h>
#ifdef USE_GCC_PRAGMAS
#pragma implementation
#endif
#include <stdio.h>
#include <string.h>
#include "gmem.h"
#include "gfile.h"
#include "GString.h"
#include "Error.h"
#include "GlobalParams.h"
#include "PSTokenizer.h"
#include "CharCodeToUnicode.h"
//------------------------------------------------------------------------
#define maxUnicodeString 8
struct CharCodeToUnicodeString {
CharCode c;
Unicode u[maxUnicodeString];
int len;
};
//------------------------------------------------------------------------
static int getCharFromString(void *data) {
char *p;
int c;
p = *(char **)data;
if (*p) {
c = *p++;
*(char **)data = p;
} else {
c = EOF;
}
return c;
}
static int getCharFromFile(void *data) {
return fgetc((FILE *)data);
}
//------------------------------------------------------------------------
CharCodeToUnicode *CharCodeToUnicode::parseCIDToUnicode(GString *fileName,
GString *collection) {
FILE *f;
Unicode *mapA;
CharCode size, mapLenA;
char buf[64];
Unicode u;
CharCodeToUnicode *ctu;
if (!(f = fopen(fileName->getCString(), "r"))) {
error(-1, "Couldn't open cidToUnicode file '%s'",
fileName->getCString());
return NULL;
}
size = 32768;
mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
mapLenA = 0;
while (getLine(buf, sizeof(buf), f)) {
if (mapLenA == size) {
size *= 2;
mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
}
if (sscanf(buf, "%x", &u) == 1) {
mapA[mapLenA] = u;
} else {
error(-1, "Bad line (%d) in cidToUnicode file '%s'",
(int)(mapLenA + 1), fileName->getCString());
mapA[mapLenA] = 0;
}
++mapLenA;
}
fclose(f);
ctu = new CharCodeToUnicode(collection->copy(), mapA, mapLenA, gTrue,
NULL, 0, 0);
gfree(mapA);
return ctu;
}
CharCodeToUnicode *CharCodeToUnicode::parseUnicodeToUnicode(
GString *fileName) {
FILE *f;
Unicode *mapA;
CharCodeToUnicodeString *sMapA;
CharCode size, oldSize, len, sMapSizeA, sMapLenA;
char buf[256];
char *tok;
Unicode u0;
Unicode uBuf[maxUnicodeString];
CharCodeToUnicode *ctu;
int line, n, i;
if (!(f = fopen(fileName->getCString(), "r"))) {
error(-1, "Couldn't open unicodeToUnicode file '%s'",
fileName->getCString());
return NULL;
}
size = 4096;
mapA = (Unicode *)gmalloc(size * sizeof(Unicode));
memset(mapA, 0, size * sizeof(Unicode));
len = 0;
sMapA = NULL;
sMapSizeA = sMapLenA = 0;
line = 0;
while (getLine(buf, sizeof(buf), f)) {
++line;
if (!(tok = strtok(buf, " \t\r\n")) ||
sscanf(tok, "%x", &u0) != 1) {
error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
line, fileName->getCString());
continue;
}
n = 0;
while (n < maxUnicodeString) {
if (!(tok = strtok(NULL, " \t\r\n"))) {
break;
}
if (sscanf(tok, "%x", &uBuf[n]) != 1) {
error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
line, fileName->getCString());
break;
}
++n;
}
if (n < 1) {
error(-1, "Bad line (%d) in unicodeToUnicode file '%s'",
line, fileName->getCString());
continue;
}
if (u0 >= size) {
oldSize = size;
while (u0 >= size) {
size *= 2;
}
mapA = (Unicode *)grealloc(mapA, size * sizeof(Unicode));
memset(mapA + oldSize, 0, (size - oldSize) * sizeof(Unicode));
}
if (n == 1) {
mapA[u0] = uBuf[0];
} else {
mapA[u0] = 0;
if (sMapLenA == sMapSizeA) {
sMapSizeA += 16;
sMapA = (CharCodeToUnicodeString *)
grealloc(sMapA, sMapSizeA * sizeof(CharCodeToUnicodeString));
}
sMapA[sMapLenA].c = u0;
for (i = 0; i < n; ++i) {
sMapA[sMapLenA].u[i] = uBuf[i];
}
sMapA[sMapLenA].len = n;
++sMapLenA;
}
if (u0 >= len) {
len = u0 + 1;
}
}
fclose(f);
ctu = new CharCodeToUnicode(fileName->copy(), mapA, len, gTrue,
sMapA, sMapLenA, sMapSizeA);
gfree(mapA);
return ctu;
}
CharCodeToUnicode *CharCodeToUnicode::make8BitToUnicode(Unicode *toUnicode) {
return new CharCodeToUnicode(NULL, toUnicode, 256, gTrue, NULL, 0, 0);
}
CharCodeToUnicode *CharCodeToUnicode::parseCMap(GString *buf, int nBits) {
CharCodeToUnicode *ctu;
char *p;
ctu = new CharCodeToUnicode(NULL);
p = buf->getCString();
ctu->parseCMap1(&getCharFromString, &p, nBits);
return ctu;
}
void CharCodeToUnicode::mergeCMap(GString *buf, int nBits) {
char *p;
p = buf->getCString();
parseCMap1(&getCharFromString, &p, nBits);
}
void CharCodeToUnicode::parseCMap1(int (*getCharFunc)(void *), void *data,
int nBits) {
PSTokenizer *pst;
char tok1[256], tok2[256], tok3[256];
int nDigits, n1, n2, n3;
CharCode i;
CharCode code1, code2;
GString *name;
FILE *f;
nDigits = nBits / 4;
pst = new PSTokenizer(getCharFunc, data);
pst->getToken(tok1, sizeof(tok1), &n1);
while (pst->getToken(tok2, sizeof(tok2), &n2)) {
if (!strcmp(tok2, "usecmap")) {
if (tok1[0] == '/') {
name = new GString(tok1 + 1);
if ((f = globalParams->findToUnicodeFile(name))) {
parseCMap1(&getCharFromFile, f, nBits);
fclose(f);
} else {
error(-1, "Couldn't find ToUnicode CMap file for '%s'",
name->getCString());
}
delete name;
}
pst->getToken(tok1, sizeof(tok1), &n1);
} else if (!strcmp(tok2, "beginbfchar")) {
while (pst->getToken(tok1, sizeof(tok1), &n1)) {
if (!strcmp(tok1, "endbfchar")) {
break;
}
if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
!strcmp(tok2, "endbfchar")) {
error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
break;
}
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
tok2[0] == '<' && tok2[n2 - 1] == '>')) {
error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
continue;
}
tok1[n1 - 1] = tok2[n2 - 1] = '\0';
if (sscanf(tok1 + 1, "%x", &code1) != 1) {
error(-1, "Illegal entry in bfchar block in ToUnicode CMap");
continue;
}
addMapping(code1, tok2 + 1, n2 - 1, 0);
}
pst->getToken(tok1, sizeof(tok1), &n1);
} else if (!strcmp(tok2, "beginbfrange")) {
while (pst->getToken(tok1, sizeof(tok1), &n1)) {
if (!strcmp(tok1, "endbfrange")) {
break;
}
if (!pst->getToken(tok2, sizeof(tok2), &n2) ||
!strcmp(tok2, "endbfrange") ||
!pst->getToken(tok3, sizeof(tok3), &n3) ||
!strcmp(tok3, "endbfrange")) {
error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
break;
}
if (!(n1 == 2 + nDigits && tok1[0] == '<' && tok1[n1 - 1] == '>' &&
n2 == 2 + nDigits && tok2[0] == '<' && tok2[n2 - 1] == '>')) {
error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
continue;
}
tok1[n1 - 1] = tok2[n2 - 1] = '\0';
if (sscanf(tok1 + 1, "%x", &code1) != 1 ||
sscanf(tok2 + 1, "%x", &code2) != 1) {
error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
continue;
}
if (!strcmp(tok3, "[")) {
i = 0;
while (pst->getToken(tok1, sizeof(tok1), &n1) &&
code1 + i <= code2) {
if (!strcmp(tok1, "]")) {
break;
}
if (tok1[0] == '<' && tok1[n1 - 1] == '>') {
tok1[n1 - 1] = '\0';
addMapping(code1 + i, tok1 + 1, n1 - 2, 0);
} else {
error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
}
++i;
}
} else if (tok3[0] == '<' && tok3[n3 - 1] == '>') {
tok3[n3 - 1] = '\0';
for (i = 0; code1 <= code2; ++code1, ++i) {
addMapping(code1, tok3 + 1, n3 - 2, i);
}
} else {
error(-1, "Illegal entry in bfrange block in ToUnicode CMap");
}
}
pst->getToken(tok1, sizeof(tok1), &n1);
} else {
strcpy(tok1, tok2);
}
}
delete pst;
}
void CharCodeToUnicode::addMapping(CharCode code, char *uStr, int n,
int offset) {
CharCode oldLen, i;
Unicode u;
char uHex[5];
int j;
if (code >= mapLen) {
oldLen = mapLen;
mapLen = (code + 256) & ~255;
map = (Unicode *)grealloc(map, mapLen * sizeof(Unicode));
for (i = oldLen; i < mapLen; ++i) {
map[i] = 0;
}
}
if (n <= 4) {
if (sscanf(uStr, "%x", &u) != 1) {
error(-1, "Illegal entry in ToUnicode CMap");
return;
}
map[code] = u + offset;
} else {
if (sMapLen >= sMapSize) {
sMapSize = sMapSize + 16;
sMap = (CharCodeToUnicodeString *)
grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
}
map[code] = 0;
sMap[sMapLen].c = code;
sMap[sMapLen].len = n / 4;
for (j = 0; j < sMap[sMapLen].len && j < maxUnicodeString; ++j) {
strncpy(uHex, uStr + j*4, 4);
uHex[4] = '\0';
if (sscanf(uHex, "%x", &sMap[sMapLen].u[j]) != 1) {
error(-1, "Illegal entry in ToUnicode CMap");
}
}
sMap[sMapLen].u[sMap[sMapLen].len - 1] += offset;
++sMapLen;
}
}
CharCodeToUnicode::CharCodeToUnicode(GString *tagA) {
CharCode i;
tag = tagA;
mapLen = 256;
map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
for (i = 0; i < mapLen; ++i) {
map[i] = 0;
}
sMap = NULL;
sMapLen = sMapSize = 0;
refCnt = 1;
#if MULTITHREADED
gInitMutex(&mutex);
#endif
}
CharCodeToUnicode::CharCodeToUnicode(GString *tagA, Unicode *mapA,
CharCode mapLenA, GBool copyMap,
CharCodeToUnicodeString *sMapA,
int sMapLenA, int sMapSizeA) {
tag = tagA;
mapLen = mapLenA;
if (copyMap) {
map = (Unicode *)gmalloc(mapLen * sizeof(Unicode));
memcpy(map, mapA, mapLen * sizeof(Unicode));
} else {
map = mapA;
}
sMap = sMapA;
sMapLen = sMapLenA;
sMapSize = sMapSizeA;
refCnt = 1;
#if MULTITHREADED
gInitMutex(&mutex);
#endif
}
CharCodeToUnicode::~CharCodeToUnicode() {
if (tag) {
delete tag;
}
gfree(map);
if (sMap) {
gfree(sMap);
}
#if MULTITHREADED
gDestroyMutex(&mutex);
#endif
}
void CharCodeToUnicode::incRefCnt() {
#if MULTITHREADED
gLockMutex(&mutex);
#endif
++refCnt;
#if MULTITHREADED
gUnlockMutex(&mutex);
#endif
}
void CharCodeToUnicode::decRefCnt() {
GBool done;
#if MULTITHREADED
gLockMutex(&mutex);
#endif
done = --refCnt == 0;
#if MULTITHREADED
gUnlockMutex(&mutex);
#endif
if (done) {
delete this;
}
}
GBool CharCodeToUnicode::match(GString *tagA) {
return tag && !tag->cmp(tagA);
}
void CharCodeToUnicode::setMapping(CharCode c, Unicode *u, int len) {
int i;
if (len == 1) {
map[c] = u[0];
} else {
map[c] = 0;
if (sMapLen == sMapSize) {
sMapSize += 8;
sMap = (CharCodeToUnicodeString *)
grealloc(sMap, sMapSize * sizeof(CharCodeToUnicodeString));
}
sMap[sMapLen].c = c;
sMap[sMapLen].len = len;
for (i = 0; i < len && i < maxUnicodeString; ++i) {
sMap[sMapLen].u[i] = u[i];
}
++sMapLen;
}
}
int CharCodeToUnicode::mapToUnicode(CharCode c, Unicode *u, int size) {
int i, j;
if (c >= mapLen) {
return 0;
}
if (map[c]) {
u[0] = map[c];
return 1;
}
for (i = 0; i < sMapLen; ++i) {
if (sMap[i].c == c) {
for (j = 0; j < sMap[i].len && j < size; ++j) {
u[j] = sMap[i].u[j];
}
return j;
}
}
return 0;
}
//------------------------------------------------------------------------
CharCodeToUnicodeCache::CharCodeToUnicodeCache(int sizeA) {
int i;
size = sizeA;
cache = (CharCodeToUnicode **)gmalloc(size * sizeof(CharCodeToUnicode *));
for (i = 0; i < size; ++i) {
cache[i] = NULL;
}
}
CharCodeToUnicodeCache::~CharCodeToUnicodeCache() {
int i;
for (i = 0; i < size; ++i) {
if (cache[i]) {
cache[i]->decRefCnt();
}
}
gfree(cache);
}
CharCodeToUnicode *CharCodeToUnicodeCache::getCharCodeToUnicode(GString *tag) {
CharCodeToUnicode *ctu;
int i, j;
if (cache[0] && cache[0]->match(tag)) {
cache[0]->incRefCnt();
return cache[0];
}
for (i = 1; i < size; ++i) {
if (cache[i] && cache[i]->match(tag)) {
ctu = cache[i];
for (j = i; j >= 1; --j) {
cache[j] = cache[j - 1];
}
cache[0] = ctu;
ctu->incRefCnt();
return ctu;
}
}
return NULL;
}
void CharCodeToUnicodeCache::add(CharCodeToUnicode *ctu) {
int i;
if (cache[size - 1]) {
cache[size - 1]->decRefCnt();
}
for (i = size - 1; i >= 1; --i) {
cache[i] = cache[i - 1];
}
cache[0] = ctu;
ctu->incRefCnt();
}