mirror of
https://invent.kde.org/graphics/okular
synced 2024-10-01 05:33:33 +00:00
Simplify textentity memory management
After trying to measure the effect of switching TinyTextEntity from a custom SBO string type to just QString and getting a barely measurable gain, TinyTextEntity kind of looked like TextEntity, so merge those two. Also reduce the amount of new/deletes around TextEntities Implement a string pool for some tiny strings, and various memory optimizations
This commit is contained in:
parent
7c6b5fb2fc
commit
2e008d437c
|
@ -62,7 +62,7 @@ static void createTextPage(const QVector<QString> &text, const QVector<Okular::N
|
|||
{
|
||||
tp = new Okular::TextPage();
|
||||
for (int i = 0; i < text.size(); i++) {
|
||||
tp->append(text[i], new Okular::NormalizedRect(rect[i]));
|
||||
tp->append(text[i], rect[i]);
|
||||
}
|
||||
|
||||
// The Page::setTextPage method invokes the layout analysis algorithms tested by some tests here
|
||||
|
|
|
@ -374,9 +374,7 @@ TextEntity::List Page::words(const RegularAreaRect *area, TextPage::TextAreaIncl
|
|||
}
|
||||
|
||||
for (auto &retI : ret) {
|
||||
const TextEntity *orig = retI;
|
||||
retI = new TextEntity(orig->text(), new Okular::NormalizedRect(orig->transformedArea(d->rotationMatrix())));
|
||||
delete orig;
|
||||
retI = TextEntity(retI.text(), Okular::NormalizedRect(retI.transformedArea(d->rotationMatrix())));
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
|
|
@ -106,7 +106,7 @@ Okular::TextPage *TextDocumentGeneratorPrivate::createTextPage(int pageNumber) c
|
|||
text = QStringLiteral("\n");
|
||||
}
|
||||
|
||||
textPage->append(text, new Okular::NormalizedRect(rect.left(), rect.top(), rect.right(), rect.bottom()));
|
||||
textPage->append(text, Okular::NormalizedRect(rect.left(), rect.top(), rect.right(), rect.bottom()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
#include "misc.h"
|
||||
#include "page.h"
|
||||
#include "page_p.h"
|
||||
#include <unordered_set>
|
||||
|
||||
#include <cstring>
|
||||
|
||||
|
@ -22,6 +23,30 @@
|
|||
|
||||
using namespace Okular;
|
||||
|
||||
// Many of the strings are being reused; especially
|
||||
// those less than 2 letters are very common
|
||||
// Use the implicit shared bits from QString to
|
||||
// not keep multiple same strings around, but just up the
|
||||
// refcount a bit
|
||||
// The main reason for '2' is that most calls here happens
|
||||
// in auxillary threads that are following a document
|
||||
// and keeping the pool thread_local gives quite a bit
|
||||
// of advantage here.
|
||||
// Some calls though comes from the main thread, so we
|
||||
// shouldn't keep all the long strings allocated in the main
|
||||
// thread around forever.
|
||||
// '2' has been chosen by random testing, and guesswork.
|
||||
static QString fromPool(const QString &str)
|
||||
{
|
||||
if (str.length() > 2) {
|
||||
return str;
|
||||
}
|
||||
|
||||
thread_local std::unordered_set<QString> pool;
|
||||
auto [iterator, success] = pool.insert(str);
|
||||
return *iterator;
|
||||
}
|
||||
|
||||
class SearchPoint
|
||||
{
|
||||
public:
|
||||
|
@ -31,11 +56,11 @@ public:
|
|||
{
|
||||
}
|
||||
|
||||
/** The TinyTextEntity containing the first character of the match. */
|
||||
TextList::ConstIterator it_begin;
|
||||
/** The TextEntity containing the first character of the match. */
|
||||
TextEntity::List::ConstIterator it_begin;
|
||||
|
||||
/** The TinyTextEntity containing the last character of the match. */
|
||||
TextList::ConstIterator it_end;
|
||||
/** The TextEntity containing the last character of the match. */
|
||||
TextEntity::List::ConstIterator it_end;
|
||||
|
||||
/** The index of the first character of the match in (*it_begin)->text().
|
||||
* Satisfies 0 <= offset_begin < (*it_begin)->text().length().
|
||||
|
@ -106,105 +131,27 @@ static bool doesConsumeY(const NormalizedRect &first, const NormalizedRect &seco
|
|||
return segmentsOverlap(first.top, first.bottom, second.top, second.bottom, threshold);
|
||||
}
|
||||
|
||||
/*
|
||||
Rationale behind TinyTextEntity:
|
||||
|
||||
instead of storing directly a QString for the text of an entity,
|
||||
we store the UTF-16 data and their length. This way, we save about
|
||||
4 int's wrt a QString, and we can create a new string from that
|
||||
raw data (that's the only penalty of that).
|
||||
Even better, if the string we need to store has at most
|
||||
MaxStaticChars characters, then we store those in place of the QChar*
|
||||
that would be used (with new[] + free[]) for the data.
|
||||
*/
|
||||
class TinyTextEntity
|
||||
{
|
||||
static const int MaxStaticChars = sizeof(void *) / sizeof(QChar);
|
||||
|
||||
public:
|
||||
TinyTextEntity(const QString &text, const NormalizedRect &rect)
|
||||
: area(rect)
|
||||
{
|
||||
Q_ASSERT_X(!text.isEmpty(), "TinyTextEntity", "empty string");
|
||||
Q_ASSERT_X(sizeof(d) == sizeof(void *), "TinyTextEntity", "internal storage is wider than QChar*, fix it!");
|
||||
length = text.length();
|
||||
switch (length) {
|
||||
#if QT_POINTER_SIZE >= 8
|
||||
case 4:
|
||||
d.qc[3] = text.at(3).unicode();
|
||||
// fall through
|
||||
case 3:
|
||||
d.qc[2] = text.at(2).unicode();
|
||||
#endif
|
||||
// fall through
|
||||
case 2:
|
||||
d.qc[1] = text.at(1).unicode();
|
||||
// fall through
|
||||
case 1:
|
||||
d.qc[0] = text.at(0).unicode();
|
||||
break;
|
||||
default:
|
||||
d.data = new QChar[length];
|
||||
std::memcpy(d.data, text.constData(), length * sizeof(QChar));
|
||||
}
|
||||
}
|
||||
|
||||
~TinyTextEntity()
|
||||
{
|
||||
if (length > MaxStaticChars) {
|
||||
delete[] d.data;
|
||||
}
|
||||
}
|
||||
|
||||
inline QString text() const
|
||||
{
|
||||
return length <= MaxStaticChars ? QString::fromRawData((const QChar *)&d.qc[0], length) : QString::fromRawData(d.data, length);
|
||||
}
|
||||
|
||||
inline NormalizedRect transformedArea(const QTransform &matrix) const
|
||||
{
|
||||
NormalizedRect transformed_area = area;
|
||||
transformed_area.transform(matrix);
|
||||
return transformed_area;
|
||||
}
|
||||
|
||||
NormalizedRect area;
|
||||
|
||||
private:
|
||||
Q_DISABLE_COPY(TinyTextEntity)
|
||||
|
||||
union {
|
||||
QChar *data;
|
||||
ushort qc[MaxStaticChars];
|
||||
} d;
|
||||
int length;
|
||||
};
|
||||
|
||||
TextEntity::TextEntity(const QString &text, NormalizedRect *area)
|
||||
: m_text(text)
|
||||
TextEntity::TextEntity(const QString &text, const NormalizedRect &area)
|
||||
: m_text(fromPool(text))
|
||||
, m_area(area)
|
||||
, d(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
TextEntity::~TextEntity()
|
||||
{
|
||||
delete m_area;
|
||||
}
|
||||
TextEntity::~TextEntity() = default;
|
||||
|
||||
QString TextEntity::text() const
|
||||
{
|
||||
return m_text;
|
||||
}
|
||||
|
||||
NormalizedRect *TextEntity::area() const
|
||||
NormalizedRect TextEntity::area() const
|
||||
{
|
||||
return m_area;
|
||||
}
|
||||
|
||||
NormalizedRect TextEntity::transformedArea(const QTransform &matrix) const
|
||||
{
|
||||
NormalizedRect transformed_area = *m_area;
|
||||
NormalizedRect transformed_area = m_area;
|
||||
transformed_area.transform(matrix);
|
||||
return transformed_area;
|
||||
}
|
||||
|
@ -217,7 +164,6 @@ TextPagePrivate::TextPagePrivate()
|
|||
TextPagePrivate::~TextPagePrivate()
|
||||
{
|
||||
qDeleteAll(m_searchPoints);
|
||||
qDeleteAll(m_words);
|
||||
}
|
||||
|
||||
TextPage::TextPage()
|
||||
|
@ -228,14 +174,7 @@ TextPage::TextPage()
|
|||
TextPage::TextPage(const TextEntity::List &words)
|
||||
: d(new TextPagePrivate())
|
||||
{
|
||||
TextEntity::List::ConstIterator it = words.constBegin(), itEnd = words.constEnd();
|
||||
for (; it != itEnd; ++it) {
|
||||
TextEntity *e = *it;
|
||||
if (!e->text().isEmpty()) {
|
||||
d->m_words.append(new TinyTextEntity(e->text(), *e->area()));
|
||||
}
|
||||
delete e;
|
||||
}
|
||||
d->m_words = words;
|
||||
}
|
||||
|
||||
TextPage::~TextPage()
|
||||
|
@ -243,30 +182,27 @@ TextPage::~TextPage()
|
|||
delete d;
|
||||
}
|
||||
|
||||
void TextPage::append(const QString &text, NormalizedRect *area)
|
||||
void TextPage::append(const QString &text, const NormalizedRect &area)
|
||||
{
|
||||
if (!text.isEmpty()) {
|
||||
if (!d->m_words.isEmpty()) {
|
||||
TinyTextEntity *lastEntity = d->m_words.last();
|
||||
const QString concatText = lastEntity->text() + text.normalized(QString::NormalizationForm_KC);
|
||||
TextEntity &lastEntity = d->m_words.last();
|
||||
const QString concatText = lastEntity.text() + text.normalized(QString::NormalizationForm_KC);
|
||||
if (concatText != concatText.normalized(QString::NormalizationForm_KC)) {
|
||||
// If this happens it means that the new text + old one have combined, for example A and ◌̊ form Å
|
||||
NormalizedRect newArea = *area | lastEntity->area;
|
||||
delete area;
|
||||
delete lastEntity;
|
||||
NormalizedRect newArea = area | lastEntity.area();
|
||||
d->m_words.removeLast();
|
||||
d->m_words.append(new TinyTextEntity(concatText.normalized(QString::NormalizationForm_KC), newArea));
|
||||
d->m_words.append(TextEntity(concatText.normalized(QString::NormalizationForm_KC), newArea));
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
d->m_words.append(new TinyTextEntity(text.normalized(QString::NormalizationForm_KC), *area));
|
||||
d->m_words.append(TextEntity(text.normalized(QString::NormalizationForm_KC), area));
|
||||
}
|
||||
delete area;
|
||||
}
|
||||
|
||||
struct WordWithCharacters {
|
||||
WordWithCharacters(TinyTextEntity *w, const TextList &c)
|
||||
WordWithCharacters(const TextEntity &w, const TextEntity::List &c)
|
||||
: word(w)
|
||||
, characters(c)
|
||||
{
|
||||
|
@ -274,16 +210,16 @@ struct WordWithCharacters {
|
|||
|
||||
inline QString text() const
|
||||
{
|
||||
return word->text();
|
||||
return word.text();
|
||||
}
|
||||
|
||||
inline const NormalizedRect &area() const
|
||||
inline NormalizedRect area() const
|
||||
{
|
||||
return word->area;
|
||||
return word.area();
|
||||
}
|
||||
|
||||
TinyTextEntity *word;
|
||||
TextList characters;
|
||||
TextEntity word;
|
||||
TextEntity::List characters;
|
||||
};
|
||||
typedef QList<WordWithCharacters> WordsWithCharacters;
|
||||
|
||||
|
@ -452,14 +388,14 @@ RegularAreaRect *TextPage::textArea(TextSelection *sel) const
|
|||
}
|
||||
}
|
||||
|
||||
TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
|
||||
TextList::ConstIterator start = it, end = itEnd, tmpIt = it; //, tmpItEnd = itEnd;
|
||||
TextEntity::List::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
|
||||
TextEntity::List::ConstIterator start = it, end = itEnd, tmpIt = it; //, tmpItEnd = itEnd;
|
||||
const MergeSide side = d->m_page ? (MergeSide)d->m_page->totalOrientation() : MergeRight;
|
||||
|
||||
NormalizedRect tmp;
|
||||
// case 2(a)
|
||||
for (; it != itEnd; ++it) {
|
||||
tmp = (*it)->area;
|
||||
tmp = it->area();
|
||||
if (tmp.contains(startC.x, startC.y)) {
|
||||
start = it;
|
||||
}
|
||||
|
@ -473,7 +409,7 @@ RegularAreaRect *TextPage::textArea(TextSelection *sel) const
|
|||
if (start == it && end == itEnd) {
|
||||
for (; it != itEnd; ++it) {
|
||||
// is there any text rectangle within the start_end rect
|
||||
tmp = (*it)->area;
|
||||
tmp = it->area();
|
||||
if (start_end.intersects(tmp)) {
|
||||
break;
|
||||
}
|
||||
|
@ -495,7 +431,7 @@ RegularAreaRect *TextPage::textArea(TextSelection *sel) const
|
|||
// selection type 01
|
||||
if (startC.y <= endC.y) {
|
||||
for (; it != itEnd; ++it) {
|
||||
rect = (*it)->area;
|
||||
rect = it->area();
|
||||
bool flagV = !rect.isBottom(startC);
|
||||
|
||||
if (flagV && rect.isRight(startC)) {
|
||||
|
@ -511,7 +447,7 @@ RegularAreaRect *TextPage::textArea(TextSelection *sel) const
|
|||
int distance = scaleX + scaleY + 100;
|
||||
|
||||
for (; it != itEnd; ++it) {
|
||||
rect = (*it)->area;
|
||||
rect = it->area();
|
||||
|
||||
if (rect.isBottomOrLevel(startC) && rect.isRight(startC)) {
|
||||
QRect entRect = rect.geometry(scaleX, scaleY);
|
||||
|
@ -545,7 +481,7 @@ RegularAreaRect *TextPage::textArea(TextSelection *sel) const
|
|||
|
||||
if (startC.y <= endC.y) {
|
||||
for (; itEnd >= it; itEnd--) {
|
||||
rect = (*itEnd)->area;
|
||||
rect = itEnd->area();
|
||||
bool flagV = !rect.isTop(endC);
|
||||
|
||||
if (flagV && rect.isLeft(endC)) {
|
||||
|
@ -558,7 +494,7 @@ RegularAreaRect *TextPage::textArea(TextSelection *sel) const
|
|||
else {
|
||||
int distance = scaleX + scaleY + 100;
|
||||
for (; itEnd >= it; itEnd--) {
|
||||
rect = (*itEnd)->area;
|
||||
rect = itEnd->area();
|
||||
|
||||
if (rect.isTopOrLevel(endC) && rect.isLeft(endC)) {
|
||||
QRect entRect = rect.geometry(scaleX, scaleY);
|
||||
|
@ -606,7 +542,7 @@ RegularAreaRect *TextPage::textArea(TextSelection *sel) const
|
|||
}
|
||||
|
||||
for (; start <= end; start++) {
|
||||
ret->appendShape((*start)->transformedArea(matrix), side);
|
||||
ret->appendShape(start->transformedArea(matrix), side);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -619,9 +555,9 @@ RegularAreaRect *TextPage::findText(int searchID, const QString &query, SearchDi
|
|||
if (d->m_words.isEmpty() || query.isEmpty() || (area && area->isNull())) {
|
||||
return nullptr;
|
||||
}
|
||||
TextList::ConstIterator start;
|
||||
TextEntity::List::ConstIterator start;
|
||||
int start_offset = 0;
|
||||
TextList::ConstIterator end;
|
||||
TextEntity::List::ConstIterator end;
|
||||
const QMap<int, SearchPoint *>::const_iterator sIt = d->m_searchPoints.constFind(searchID);
|
||||
if (sIt == d->m_searchPoints.constEnd()) {
|
||||
// if no previous run of this search is found, then set it to start
|
||||
|
@ -671,7 +607,7 @@ RegularAreaRect *TextPage::findText(int searchID, const QString &query, SearchDi
|
|||
// we have a '-' just followed by a '\n' character
|
||||
// check if the string contains a '-' character
|
||||
// if the '-' is the last entry
|
||||
static int stringLengthAdaptedWithHyphen(const QString &str, TextList::ConstIterator it, TextList::ConstIterator textListEnd)
|
||||
static int stringLengthAdaptedWithHyphen(const QString &str, TextEntity::List::ConstIterator it, TextEntity::List::ConstIterator textListEnd)
|
||||
{
|
||||
const int len = str.length();
|
||||
|
||||
|
@ -683,14 +619,14 @@ static int stringLengthAdaptedWithHyphen(const QString &str, TextList::ConstIter
|
|||
// validity chek of it + 1
|
||||
if ((it + 1) != textListEnd) {
|
||||
// 1. if the next character is '\n'
|
||||
const QString &lookahedStr = (*(it + 1))->text();
|
||||
const QString &lookahedStr = (it + 1)->text();
|
||||
if (lookahedStr.startsWith(QLatin1Char('\n'))) {
|
||||
return len - 1;
|
||||
}
|
||||
|
||||
// 2. if the next word is in a different line or not
|
||||
const NormalizedRect &hyphenArea = (*it)->area;
|
||||
const NormalizedRect &lookaheadArea = (*(it + 1))->area;
|
||||
const NormalizedRect &hyphenArea = it->area();
|
||||
const NormalizedRect &lookaheadArea = (it + 1)->area();
|
||||
|
||||
// lookahead to check whether both the '-' rect and next character rect overlap
|
||||
if (!doesConsumeY(hyphenArea, lookaheadArea, 70)) {
|
||||
|
@ -712,9 +648,9 @@ RegularAreaRect *TextPagePrivate::searchPointToArea(const SearchPoint *sp)
|
|||
const QTransform matrix = pagePrivate ? pagePrivate->rotationMatrix() : QTransform();
|
||||
RegularAreaRect *ret = new RegularAreaRect;
|
||||
|
||||
for (TextList::ConstIterator it = sp->it_begin;; it++) {
|
||||
const TinyTextEntity *curEntity = *it;
|
||||
ret->append(curEntity->transformedArea(matrix));
|
||||
for (TextEntity::List::ConstIterator it = sp->it_begin;; it++) {
|
||||
const TextEntity &curEntity = *it;
|
||||
ret->append(curEntity.transformedArea(matrix));
|
||||
|
||||
if (it == sp->it_end) {
|
||||
break;
|
||||
|
@ -725,7 +661,7 @@ RegularAreaRect *TextPagePrivate::searchPointToArea(const SearchPoint *sp)
|
|||
return ret;
|
||||
}
|
||||
|
||||
RegularAreaRect *TextPagePrivate::findTextInternalForward(int searchID, const QString &_query, TextComparisonFunction comparer, TextList::ConstIterator start, int start_offset, TextList::ConstIterator end)
|
||||
RegularAreaRect *TextPagePrivate::findTextInternalForward(int searchID, const QString &_query, TextComparisonFunction comparer, TextEntity::List::ConstIterator start, int start_offset, TextEntity::List::ConstIterator end)
|
||||
{
|
||||
// normalize query search all unicode (including glyphs)
|
||||
const QString query = _query.normalized(QString::NormalizationForm_KC);
|
||||
|
@ -734,15 +670,15 @@ RegularAreaRect *TextPagePrivate::findTextInternalForward(int searchID, const QS
|
|||
// queryLeft is the length of the query we have left to match
|
||||
int j = 0, queryLeft = query.length();
|
||||
|
||||
TextList::ConstIterator it = start;
|
||||
TextEntity::List::ConstIterator it = start;
|
||||
int offset = start_offset;
|
||||
|
||||
TextList::ConstIterator it_begin = TextList::ConstIterator();
|
||||
TextEntity::List::ConstIterator it_begin = TextEntity::List::ConstIterator();
|
||||
int offset_begin = 0; // dummy initial value to suppress compiler warnings
|
||||
|
||||
while (it != end) {
|
||||
const TinyTextEntity *curEntity = *it;
|
||||
const QString &str = curEntity->text();
|
||||
const TextEntity &curEntity = *it;
|
||||
const QString &str = curEntity.text();
|
||||
const int strLen = str.length();
|
||||
const int adjustedLen = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd());
|
||||
// adjustedLen <= strLen
|
||||
|
@ -753,7 +689,7 @@ RegularAreaRect *TextPagePrivate::findTextInternalForward(int searchID, const QS
|
|||
continue;
|
||||
}
|
||||
|
||||
if (it_begin == TextList::ConstIterator()) {
|
||||
if (it_begin == TextEntity::List::ConstIterator()) {
|
||||
it_begin = it;
|
||||
offset_begin = offset;
|
||||
}
|
||||
|
@ -782,7 +718,7 @@ RegularAreaRect *TextPagePrivate::findTextInternalForward(int searchID, const QS
|
|||
queryLeft = query.length();
|
||||
it = it_begin;
|
||||
offset = offset_begin + 1;
|
||||
it_begin = TextList::ConstIterator();
|
||||
it_begin = TextEntity::List::ConstIterator();
|
||||
} else {
|
||||
// we have a match
|
||||
// move the current position in the query
|
||||
|
@ -825,7 +761,7 @@ RegularAreaRect *TextPagePrivate::findTextInternalForward(int searchID, const QS
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
RegularAreaRect *TextPagePrivate::findTextInternalBackward(int searchID, const QString &_query, TextComparisonFunction comparer, TextList::ConstIterator start, int start_offset, TextList::ConstIterator end)
|
||||
RegularAreaRect *TextPagePrivate::findTextInternalBackward(int searchID, const QString &_query, TextComparisonFunction comparer, TextEntity::List::ConstIterator start, int start_offset, TextEntity::List::ConstIterator end)
|
||||
{
|
||||
// normalize query to search all unicode (including glyphs)
|
||||
const QString query = _query.normalized(QString::NormalizationForm_KC);
|
||||
|
@ -835,10 +771,10 @@ RegularAreaRect *TextPagePrivate::findTextInternalBackward(int searchID, const Q
|
|||
// queryLeft is the length of the query we have left
|
||||
int j = query.length(), queryLeft = query.length();
|
||||
|
||||
TextList::ConstIterator it = start;
|
||||
TextEntity::List::ConstIterator it = start;
|
||||
int offset = start_offset;
|
||||
|
||||
TextList::ConstIterator it_begin = TextList::ConstIterator();
|
||||
TextEntity::List::ConstIterator it_begin = TextEntity::List::ConstIterator();
|
||||
int offset_begin = 0; // dummy initial value to suppress compiler warnings
|
||||
|
||||
while (true) {
|
||||
|
@ -849,8 +785,8 @@ RegularAreaRect *TextPagePrivate::findTextInternalBackward(int searchID, const Q
|
|||
it--;
|
||||
}
|
||||
|
||||
const TinyTextEntity *curEntity = *it;
|
||||
const QString &str = curEntity->text();
|
||||
const TextEntity &curEntity = *it;
|
||||
const QString &str = curEntity.text();
|
||||
const int strLen = str.length();
|
||||
const int adjustedLen = stringLengthAdaptedWithHyphen(str, it, m_words.constEnd());
|
||||
// adjustedLen <= strLen
|
||||
|
@ -859,7 +795,7 @@ RegularAreaRect *TextPagePrivate::findTextInternalBackward(int searchID, const Q
|
|||
offset = strLen;
|
||||
}
|
||||
|
||||
if (it_begin == TextList::ConstIterator()) {
|
||||
if (it_begin == TextEntity::List::ConstIterator()) {
|
||||
it_begin = it;
|
||||
offset_begin = offset;
|
||||
}
|
||||
|
@ -890,7 +826,7 @@ RegularAreaRect *TextPagePrivate::findTextInternalBackward(int searchID, const Q
|
|||
queryLeft = query.length();
|
||||
it = it_begin;
|
||||
offset = offset_begin - 1;
|
||||
it_begin = TextList::ConstIterator();
|
||||
it_begin = TextEntity::List::ConstIterator();
|
||||
} else {
|
||||
// we have a match
|
||||
// move the current position in the query
|
||||
|
@ -943,24 +879,24 @@ QString TextPage::text(const RegularAreaRect *area, TextAreaInclusionBehaviour b
|
|||
return QString();
|
||||
}
|
||||
|
||||
TextList::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
|
||||
TextEntity::List::ConstIterator it = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
|
||||
QString ret;
|
||||
if (area) {
|
||||
for (; it != itEnd; ++it) {
|
||||
if (b == AnyPixelTextAreaInclusionBehaviour) {
|
||||
if (area->intersects((*it)->area)) {
|
||||
ret += (*it)->text();
|
||||
if (area->intersects(it->area())) {
|
||||
ret += it->text();
|
||||
}
|
||||
} else {
|
||||
NormalizedPoint center = (*it)->area.center();
|
||||
NormalizedPoint center = it->area().center();
|
||||
if (area->contains(center.x, center.y)) {
|
||||
ret += (*it)->text();
|
||||
ret += it->text();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (; it != itEnd; ++it) {
|
||||
ret += (*it)->text();
|
||||
ret += it->text();
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
@ -985,9 +921,8 @@ static bool compareTinyTextEntityY(const WordWithCharacters &first, const WordWi
|
|||
/**
|
||||
* Sets a new world list. Deleting the contents of the old one
|
||||
*/
|
||||
void TextPagePrivate::setWordList(const TextList &list)
|
||||
void TextPagePrivate::setWordList(const TextEntity::List &list)
|
||||
{
|
||||
qDeleteAll(m_words);
|
||||
m_words = list;
|
||||
}
|
||||
|
||||
|
@ -995,13 +930,13 @@ void TextPagePrivate::setWordList(const TextList &list)
|
|||
* Remove all the spaces in between texts. It will make all the generators
|
||||
* same, whether they save spaces(like pdf) or not(like djvu).
|
||||
*/
|
||||
static void removeSpace(TextList *words)
|
||||
static void removeSpace(TextEntity::List *words)
|
||||
{
|
||||
TextList::Iterator it = words->begin();
|
||||
TextEntity::List::Iterator it = words->begin();
|
||||
const QString str(QLatin1Char(' '));
|
||||
|
||||
while (it != words->end()) {
|
||||
if ((*it)->text() == str) {
|
||||
if (it->text() == str) {
|
||||
it = words->erase(it);
|
||||
} else {
|
||||
++it;
|
||||
|
@ -1016,7 +951,7 @@ static void removeSpace(TextList *words)
|
|||
* WordsWithCharacters memory has to be managed by the caller, both the
|
||||
* WordWithCharacters::word and WordWithCharacters::characters contents
|
||||
*/
|
||||
static WordsWithCharacters makeWordFromCharacters(const TextList &characters, int pageWidth, int pageHeight)
|
||||
static WordsWithCharacters makeWordFromCharacters(const TextEntity::List &characters, int pageWidth, int pageHeight)
|
||||
{
|
||||
/**
|
||||
* We will traverse characters and try to create words from the TinyTextEntities in it.
|
||||
|
@ -1031,14 +966,15 @@ static WordsWithCharacters makeWordFromCharacters(const TextList &characters, in
|
|||
*/
|
||||
WordsWithCharacters wordsWithCharacters;
|
||||
|
||||
TextList::ConstIterator it = characters.begin(), itEnd = characters.end(), tmpIt;
|
||||
TextEntity::List::ConstIterator it = characters.begin(), itEnd = characters.end(), tmpIt;
|
||||
int newLeft, newRight, newTop, newBottom;
|
||||
|
||||
for (; it != itEnd; it++) {
|
||||
QString textString = (*it)->text();
|
||||
QString textString = it->text();
|
||||
QString newString;
|
||||
QRect lineArea = (*it)->area.roundedGeometry(pageWidth, pageHeight), elementArea;
|
||||
TextList wordCharacters;
|
||||
QRect lineArea = it->area().roundedGeometry(pageWidth, pageHeight);
|
||||
QRect elementArea;
|
||||
TextEntity::List wordCharacters;
|
||||
tmpIt = it;
|
||||
int space = 0;
|
||||
|
||||
|
@ -1049,10 +985,10 @@ static WordsWithCharacters makeWordFromCharacters(const TextList &characters, in
|
|||
// when textString is the start of the word
|
||||
if (tmpIt == it) {
|
||||
NormalizedRect newRect(lineArea, pageWidth, pageHeight);
|
||||
wordCharacters.append(new TinyTextEntity(textString.normalized(QString::NormalizationForm_KC), newRect));
|
||||
wordCharacters.append(TextEntity(textString.normalized(QString::NormalizationForm_KC), newRect));
|
||||
} else {
|
||||
NormalizedRect newRect(elementArea, pageWidth, pageHeight);
|
||||
wordCharacters.append(new TinyTextEntity(textString.normalized(QString::NormalizationForm_KC), newRect));
|
||||
wordCharacters.append(TextEntity(textString.normalized(QString::NormalizationForm_KC), newRect));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1065,7 +1001,7 @@ static WordsWithCharacters makeWordFromCharacters(const TextList &characters, in
|
|||
if (it == itEnd) {
|
||||
break;
|
||||
}
|
||||
elementArea = (*it)->area.roundedGeometry(pageWidth, pageHeight);
|
||||
elementArea = it->area().roundedGeometry(pageWidth, pageHeight);
|
||||
if (!doesConsumeY(elementArea, lineArea, 60)) {
|
||||
--it;
|
||||
break;
|
||||
|
@ -1091,13 +1027,13 @@ static WordsWithCharacters makeWordFromCharacters(const TextList &characters, in
|
|||
lineArea.setWidth(newRight - newLeft);
|
||||
lineArea.setHeight(newBottom - newTop);
|
||||
|
||||
textString = (*it)->text();
|
||||
textString = it->text();
|
||||
}
|
||||
|
||||
// if newString is not empty, save it
|
||||
if (!newString.isEmpty()) {
|
||||
const NormalizedRect newRect(lineArea, pageWidth, pageHeight);
|
||||
TinyTextEntity *word = new TinyTextEntity(newString.normalized(QString::NormalizationForm_KC), newRect);
|
||||
TextEntity word = TextEntity(newString.normalized(QString::NormalizationForm_KC), newRect);
|
||||
wordsWithCharacters.append(WordWithCharacters(word, wordCharacters));
|
||||
}
|
||||
|
||||
|
@ -1106,6 +1042,7 @@ static WordsWithCharacters makeWordFromCharacters(const TextList &characters, in
|
|||
}
|
||||
}
|
||||
|
||||
wordsWithCharacters.shrink_to_fit();
|
||||
return wordsWithCharacters;
|
||||
}
|
||||
|
||||
|
@ -1179,9 +1116,7 @@ QList<QPair<WordsWithCharacters, QRect>> makeAndSortLines(const WordsWithCharact
|
|||
only one element and append it to the lines
|
||||
*/
|
||||
if (!found) {
|
||||
WordsWithCharacters tmp;
|
||||
tmp.append((*it));
|
||||
lines.append(QPair<WordsWithCharacters, QRect>(tmp, elementArea));
|
||||
lines.append(QPair<WordsWithCharacters, QRect>({*it}, elementArea));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1190,6 +1125,7 @@ QList<QPair<WordsWithCharacters, QRect>> makeAndSortLines(const WordsWithCharact
|
|||
WordsWithCharacters &list = line.first;
|
||||
std::sort(list.begin(), list.end(), compareTinyTextEntityX);
|
||||
}
|
||||
lines.shrink_to_fit();
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
@ -1425,8 +1361,7 @@ static RegionTextList XYCutForBoundingBoxes(const QList<WordWithCharacters> &wor
|
|||
|
||||
// for every text in the region
|
||||
for (const WordWithCharacters &wwc : list) {
|
||||
TinyTextEntity *ent = wwc.word;
|
||||
const QRect entRect = ent->area.geometry(pageWidth, pageHeight);
|
||||
const QRect entRect = wwc.area().geometry(pageWidth, pageHeight);
|
||||
|
||||
// calculate vertical projection profile proj_on_xaxis1
|
||||
for (int k = entRect.left(); k <= entRect.left() + entRect.width(); ++k) {
|
||||
|
@ -1622,13 +1557,14 @@ static RegionTextList XYCutForBoundingBoxes(const QList<WordWithCharacters> &wor
|
|||
}
|
||||
}
|
||||
|
||||
tree.shrink_to_fit();
|
||||
return tree;
|
||||
}
|
||||
|
||||
/**
|
||||
* Add spaces in between words in a line. It reuses the pointers passed in tree and might add new ones. You will need to take care of deleting them if needed
|
||||
*/
|
||||
WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight)
|
||||
TextEntity::List addNecessarySpace(RegionTextList tree, int pageWidth, int pageHeight)
|
||||
{
|
||||
/**
|
||||
* 1. Call makeAndSortLines before adding spaces in between words in a line
|
||||
|
@ -1636,10 +1572,12 @@ WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pa
|
|||
* 3. Finally, extract all the space separated texts from each region and return it
|
||||
*/
|
||||
|
||||
TextEntity::List res;
|
||||
// Only change the texts under RegionTexts, not the area
|
||||
for (RegionText &tmpRegion : tree) {
|
||||
for (const RegionText &tmpRegion : std::as_const(tree)) {
|
||||
// Step 01
|
||||
QList<QPair<WordsWithCharacters, QRect>> sortedLines = makeAndSortLines(tmpRegion.text(), pageWidth, pageHeight);
|
||||
int counter = 0;
|
||||
|
||||
// Step 02
|
||||
for (QPair<WordsWithCharacters, QRect> &sortedLine : sortedLines) {
|
||||
|
@ -1663,9 +1601,8 @@ WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pa
|
|||
const QString spaceStr(QStringLiteral(" "));
|
||||
const QRect rect(QPoint(left, top), QPoint(right, bottom));
|
||||
const NormalizedRect entRect(rect, pageWidth, pageHeight);
|
||||
TinyTextEntity *ent1 = new TinyTextEntity(spaceStr, entRect);
|
||||
TinyTextEntity *ent2 = new TinyTextEntity(spaceStr, entRect);
|
||||
WordWithCharacters word(ent1, QList<TinyTextEntity *>() << ent2);
|
||||
TextEntity ent1 = TextEntity(spaceStr, entRect);
|
||||
WordWithCharacters word(ent1, QList<TextEntity>() << ent1);
|
||||
|
||||
list.insert(k + 1, word);
|
||||
|
||||
|
@ -1673,21 +1610,20 @@ WordsWithCharacters addNecessarySpace(RegionTextList tree, int pageWidth, int pa
|
|||
k++;
|
||||
}
|
||||
}
|
||||
counter += list.length();
|
||||
}
|
||||
res.reserve(res.length() + counter);
|
||||
|
||||
WordsWithCharacters tmpList;
|
||||
for (const QPair<WordsWithCharacters, QRect> &sortedLine : std::as_const(sortedLines)) {
|
||||
tmpList += sortedLine.first;
|
||||
for (const WordWithCharacters &word : sortedLine.first) {
|
||||
res += word.characters;
|
||||
}
|
||||
}
|
||||
tmpRegion.setText(tmpList);
|
||||
}
|
||||
|
||||
// Step 03
|
||||
WordsWithCharacters tmp;
|
||||
for (const RegionText &tmpRegion : std::as_const(tree)) {
|
||||
tmp += tmpRegion.text();
|
||||
}
|
||||
return tmp;
|
||||
res.shrink_to_fit();
|
||||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1703,7 +1639,7 @@ void TextPagePrivate::correctTextOrder()
|
|||
const int pageWidth = (int)(scalingFactor * m_page->width());
|
||||
const int pageHeight = (int)(scalingFactor * m_page->height());
|
||||
|
||||
TextList characters = m_words;
|
||||
TextEntity::List characters = m_words;
|
||||
|
||||
/**
|
||||
* Remove spaces from the text
|
||||
|
@ -1718,21 +1654,13 @@ void TextPagePrivate::correctTextOrder()
|
|||
/**
|
||||
* Make a XY Cut tree for segmentation of the texts
|
||||
*/
|
||||
const RegionTextList tree = XYCutForBoundingBoxes(wordsWithCharacters, pageWidth, pageHeight);
|
||||
RegionTextList tree = XYCutForBoundingBoxes(wordsWithCharacters, pageWidth, pageHeight);
|
||||
|
||||
/**
|
||||
* Add spaces to the word
|
||||
*/
|
||||
const WordsWithCharacters listWithWordsAndSpaces = addNecessarySpace(tree, pageWidth, pageHeight);
|
||||
const auto listOfCharacters = addNecessarySpace(std::move(tree), pageWidth, pageHeight);
|
||||
|
||||
/**
|
||||
* Break the words into characters
|
||||
*/
|
||||
TextList listOfCharacters;
|
||||
for (const WordWithCharacters &word : listWithWordsAndSpaces) {
|
||||
delete word.word;
|
||||
listOfCharacters.append(word.characters);
|
||||
}
|
||||
setWordList(listOfCharacters);
|
||||
}
|
||||
|
||||
|
@ -1744,21 +1672,21 @@ TextEntity::List TextPage::words(const RegularAreaRect *area, TextAreaInclusionB
|
|||
|
||||
TextEntity::List ret;
|
||||
if (area) {
|
||||
for (const TinyTextEntity *te : std::as_const(d->m_words)) {
|
||||
for (const TextEntity &te : std::as_const(d->m_words)) {
|
||||
if (b == AnyPixelTextAreaInclusionBehaviour) {
|
||||
if (area->intersects(te->area)) {
|
||||
ret.append(new TextEntity(te->text(), new Okular::NormalizedRect(te->area)));
|
||||
if (area->intersects(te.area())) {
|
||||
ret.append(te);
|
||||
}
|
||||
} else {
|
||||
const NormalizedPoint center = te->area.center();
|
||||
const NormalizedPoint center = te.area().center();
|
||||
if (area->contains(center.x, center.y)) {
|
||||
ret.append(new TextEntity(te->text(), new Okular::NormalizedRect(te->area)));
|
||||
ret.append(te);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (const TinyTextEntity *te : std::as_const(d->m_words)) {
|
||||
ret.append(new TextEntity(te->text(), new Okular::NormalizedRect(te->area)));
|
||||
for (const TextEntity &te : std::as_const(d->m_words)) {
|
||||
ret.append(te);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
@ -1766,23 +1694,23 @@ TextEntity::List TextPage::words(const RegularAreaRect *area, TextAreaInclusionB
|
|||
|
||||
RegularAreaRect *TextPage::wordAt(const NormalizedPoint &p, QString *word) const
|
||||
{
|
||||
TextList::ConstIterator itBegin = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
|
||||
TextList::ConstIterator it = itBegin;
|
||||
TextList::ConstIterator posIt = itEnd;
|
||||
TextEntity::List::ConstIterator itBegin = d->m_words.constBegin(), itEnd = d->m_words.constEnd();
|
||||
TextEntity::List::ConstIterator it = itBegin;
|
||||
TextEntity::List::ConstIterator posIt = itEnd;
|
||||
for (; it != itEnd; ++it) {
|
||||
if ((*it)->area.contains(p.x, p.y)) {
|
||||
if (it->area().contains(p.x, p.y)) {
|
||||
posIt = it;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (posIt != itEnd) {
|
||||
if ((*posIt)->text().simplified().isEmpty()) {
|
||||
if (posIt->text().simplified().isEmpty()) {
|
||||
return nullptr;
|
||||
}
|
||||
// Find the first TinyTextEntity of the word
|
||||
while (posIt != itBegin) {
|
||||
--posIt;
|
||||
const QString itText = (*posIt)->text();
|
||||
const QString itText = posIt->text();
|
||||
if (itText.right(1).at(0).isSpace()) {
|
||||
if (itText.endsWith(QLatin1String("-\n"))) {
|
||||
// Is an hyphenated word
|
||||
|
@ -1792,7 +1720,7 @@ RegularAreaRect *TextPage::wordAt(const NormalizedPoint &p, QString *word) const
|
|||
|
||||
if (itText == QLatin1String("\n") && posIt != itBegin) {
|
||||
--posIt;
|
||||
if ((*posIt)->text().endsWith(QLatin1String("-"))) {
|
||||
if (posIt->text().endsWith(QLatin1String("-"))) {
|
||||
// Is an hyphenated word
|
||||
// continue searching the start of the word back
|
||||
continue;
|
||||
|
@ -1807,13 +1735,13 @@ RegularAreaRect *TextPage::wordAt(const NormalizedPoint &p, QString *word) const
|
|||
RegularAreaRect *ret = new RegularAreaRect();
|
||||
QString foundWord;
|
||||
for (; posIt != itEnd; ++posIt) {
|
||||
const QString itText = (*posIt)->text();
|
||||
const QString itText = posIt->text();
|
||||
if (itText.simplified().isEmpty()) {
|
||||
break;
|
||||
}
|
||||
|
||||
ret->appendShape((*posIt)->area);
|
||||
foundWord += (*posIt)->text();
|
||||
ret->appendShape(posIt->area());
|
||||
foundWord += posIt->text();
|
||||
if (itText.right(1).at(0).isSpace()) {
|
||||
if (!foundWord.endsWith(QLatin1String("-\n"))) {
|
||||
break;
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include <QList>
|
||||
#include <QString>
|
||||
|
||||
#include "area.h"
|
||||
#include "global.h"
|
||||
#include "okularcore_export.h"
|
||||
|
||||
|
@ -51,13 +52,13 @@ class RegularAreaRect;
|
|||
class OKULARCORE_EXPORT TextEntity
|
||||
{
|
||||
public:
|
||||
typedef QList<TextEntity *> List;
|
||||
typedef QList<TextEntity> List;
|
||||
|
||||
/**
|
||||
* Creates a new text entity with the given @p text and the
|
||||
* given @p area.
|
||||
*/
|
||||
TextEntity(const QString &text, NormalizedRect *area);
|
||||
TextEntity(const QString &text, const NormalizedRect &area);
|
||||
|
||||
/**
|
||||
* Destroys the text entity.
|
||||
|
@ -72,7 +73,7 @@ public:
|
|||
/**
|
||||
* Returns the bounding area of the text entity.
|
||||
*/
|
||||
NormalizedRect *area() const;
|
||||
NormalizedRect area() const;
|
||||
|
||||
/**
|
||||
* Returns the transformed area of the text entity.
|
||||
|
@ -81,12 +82,7 @@ public:
|
|||
|
||||
private:
|
||||
QString m_text;
|
||||
NormalizedRect *m_area;
|
||||
|
||||
class Private;
|
||||
const Private *d;
|
||||
|
||||
Q_DISABLE_COPY(TextEntity)
|
||||
NormalizedRect m_area;
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -138,7 +134,7 @@ public:
|
|||
* Appends the given @p text with the given @p area as new
|
||||
* @ref TextEntity to the page.
|
||||
*/
|
||||
void append(const QString &text, NormalizedRect *area);
|
||||
void append(const QString &text, const NormalizedRect &area);
|
||||
|
||||
/**
|
||||
* Returns the bounding rect of the text which matches the following criteria
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
#ifndef _OKULAR_TEXTPAGE_P_H_
|
||||
#define _OKULAR_TEXTPAGE_P_H_
|
||||
|
||||
#include "textpage.h"
|
||||
#include <QList>
|
||||
#include <QMap>
|
||||
#include <QPair>
|
||||
|
@ -15,15 +16,6 @@
|
|||
|
||||
class SearchPoint;
|
||||
|
||||
/**
|
||||
* Memory-optimized storage of a TextEntity. Stores a string and its bounding box.
|
||||
*
|
||||
* When a generator adds a TextEntity to a TextPage, it is internally stored as TinyTextEntity.
|
||||
* TinyTextEntity is also internally used to get the geometry of text selections and highlight areas.
|
||||
*
|
||||
* @see TextEntity
|
||||
*/
|
||||
class TinyTextEntity;
|
||||
class RegionText;
|
||||
|
||||
namespace Okular
|
||||
|
@ -31,7 +23,6 @@ namespace Okular
|
|||
class PagePrivate;
|
||||
class RegularAreaRect;
|
||||
class Page;
|
||||
typedef QList<TinyTextEntity *> TextList;
|
||||
|
||||
/**
|
||||
* Returns whether the two strings match.
|
||||
|
@ -50,13 +41,13 @@ public:
|
|||
TextPagePrivate();
|
||||
~TextPagePrivate();
|
||||
|
||||
RegularAreaRect *findTextInternalForward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator start, int start_offset, const TextList::ConstIterator end);
|
||||
RegularAreaRect *findTextInternalBackward(int searchID, const QString &query, TextComparisonFunction comparer, const TextList::ConstIterator start, int start_offset, const TextList::ConstIterator end);
|
||||
RegularAreaRect *findTextInternalForward(int searchID, const QString &query, TextComparisonFunction comparer, const TextEntity::List::ConstIterator start, int start_offset, const TextEntity::List::ConstIterator end);
|
||||
RegularAreaRect *findTextInternalBackward(int searchID, const QString &query, TextComparisonFunction comparer, const TextEntity::List::ConstIterator start, int start_offset, const TextEntity::List::ConstIterator end);
|
||||
|
||||
/**
|
||||
* Copy a TextList to m_words, the pointers of list are adopted
|
||||
*/
|
||||
void setWordList(const TextList &list);
|
||||
void setWordList(const TextEntity::List &list);
|
||||
|
||||
/**
|
||||
* Make necessary modifications in the TextList to make the text order correct, so
|
||||
|
@ -65,7 +56,7 @@ public:
|
|||
void correctTextOrder();
|
||||
|
||||
// variables those can be accessed directly from TextPage
|
||||
TextList m_words;
|
||||
TextEntity::List m_words;
|
||||
QMap<int, SearchPoint *> m_searchPoints;
|
||||
Page *m_page;
|
||||
|
||||
|
|
|
@ -206,11 +206,11 @@ Okular::TextPage *DjVuGenerator::textPage(Okular::TextRequest *request)
|
|||
userMutex()->unlock();
|
||||
QList<KDjVu::TextEntity>::ConstIterator it = te.constBegin();
|
||||
QList<KDjVu::TextEntity>::ConstIterator itEnd = te.constEnd();
|
||||
QList<Okular::TextEntity *> words;
|
||||
QList<Okular::TextEntity> words;
|
||||
const KDjVu::Page *djvupage = m_djvu->pages().at(page->number());
|
||||
for (; it != itEnd; ++it) {
|
||||
const KDjVu::TextEntity &cur = *it;
|
||||
words.append(new Okular::TextEntity(cur.text(), new Okular::NormalizedRect(cur.rect(), djvupage->width(), djvupage->height())));
|
||||
words.append(Okular::TextEntity(cur.text(), Okular::NormalizedRect(cur.rect(), djvupage->width(), djvupage->height())));
|
||||
}
|
||||
Okular::TextPage *textpage = new Okular::TextPage(words);
|
||||
return textpage;
|
||||
|
|
|
@ -228,44 +228,39 @@ Okular::TextPage *DviGenerator::textPage(Okular::TextRequest *request)
|
|||
const Okular::Page *page = request->page();
|
||||
|
||||
qCDebug(OkularDviDebug);
|
||||
dviPageInfo *pageInfo = new dviPageInfo();
|
||||
dviPageInfo pageInfo;
|
||||
|
||||
pageInfo->width = page->width();
|
||||
pageInfo->height = page->height();
|
||||
pageInfo.width = page->width();
|
||||
pageInfo.height = page->height();
|
||||
|
||||
pageInfo->pageNumber = page->number() + 1;
|
||||
pageInfo.pageNumber = page->number() + 1;
|
||||
|
||||
pageInfo->resolution = m_resolution;
|
||||
pageInfo.resolution = m_resolution;
|
||||
|
||||
QMutexLocker lock(userMutex());
|
||||
|
||||
// get page text from m_dviRenderer
|
||||
Okular::TextPage *ktp = nullptr;
|
||||
if (m_dviRenderer) {
|
||||
SimplePageSize s = m_dviRenderer->sizeOfPage(pageInfo->pageNumber);
|
||||
pageInfo->resolution = (double)(pageInfo->width) / s.width().getLength_in_inch();
|
||||
SimplePageSize s = m_dviRenderer->sizeOfPage(pageInfo.pageNumber);
|
||||
pageInfo.resolution = (double)(pageInfo.width) / s.width().getLength_in_inch();
|
||||
|
||||
m_dviRenderer->getText(pageInfo);
|
||||
m_dviRenderer->getText(&pageInfo);
|
||||
lock.unlock();
|
||||
|
||||
ktp = extractTextFromPage(pageInfo);
|
||||
}
|
||||
delete pageInfo;
|
||||
return ktp;
|
||||
}
|
||||
|
||||
Okular::TextPage *DviGenerator::extractTextFromPage(dviPageInfo *pageInfo)
|
||||
Okular::TextPage *DviGenerator::extractTextFromPage(const dviPageInfo &pageInfo)
|
||||
{
|
||||
QList<Okular::TextEntity *> textOfThePage;
|
||||
QList<Okular::TextEntity> textOfThePage;
|
||||
|
||||
QVector<TextBox>::ConstIterator it = pageInfo->textBoxList.constBegin();
|
||||
QVector<TextBox>::ConstIterator itEnd = pageInfo->textBoxList.constEnd();
|
||||
int pageWidth = pageInfo.width, pageHeight = pageInfo.height;
|
||||
|
||||
int pageWidth = pageInfo->width, pageHeight = pageInfo->height;
|
||||
|
||||
for (; it != itEnd; ++it) {
|
||||
TextBox curTB = *it;
|
||||
textOfThePage.push_back(new Okular::TextEntity(curTB.text, new Okular::NormalizedRect(curTB.box, pageWidth, pageHeight)));
|
||||
for (const TextBox &curTB : std::as_const(pageInfo.textBoxList)) {
|
||||
textOfThePage.push_back(Okular::TextEntity(curTB.text, Okular::NormalizedRect(curTB.box, pageWidth, pageHeight)));
|
||||
}
|
||||
|
||||
Okular::TextPage *ktp = new Okular::TextPage(textOfThePage);
|
||||
|
|
|
@ -57,7 +57,7 @@ private:
|
|||
QBitArray m_linkGenerated;
|
||||
|
||||
void loadPages(QVector<Okular::Page *> &pagesVector);
|
||||
Okular::TextPage *extractTextFromPage(dviPageInfo *pageInfo);
|
||||
Okular::TextPage *extractTextFromPage(const dviPageInfo &pageInfo);
|
||||
void fillViewportFromAnchor(Okular::DocumentViewport &vp, const Anchor anch, int pW, int pH) const;
|
||||
void fillViewportFromAnchor(Okular::DocumentViewport &vp, const Anchor anch, const Okular::Page *page) const;
|
||||
QList<Okular::ObjectRect *> generateDviLinks(const dviPageInfo *pageInfo);
|
||||
|
|
|
@ -1706,7 +1706,7 @@ bool PDFGenerator::exportTo(const QString &fileName, const Okular::ExportFormat
|
|||
inline void append(Okular::TextPage *ktp, const QString &s, double l, double b, double r, double t)
|
||||
{
|
||||
// qWarning(PDFDebug).nospace() << "text: " << s << " at (" << l << "," << t << ")x(" << r <<","<<b<<")";
|
||||
ktp->append(s, new Okular::NormalizedRect(l, t, r, b));
|
||||
ktp->append(s, Okular::NormalizedRect(l, t, r, b));
|
||||
}
|
||||
|
||||
Okular::TextPage *PDFGenerator::abstractTextPage(const std::vector<std::unique_ptr<Poppler::TextBox>> &text, double height, double width, int rot)
|
||||
|
|
|
@ -1577,9 +1577,9 @@ Okular::TextPage *XpsPage::textPage()
|
|||
for (int i = 0; i < text.length(); i++) {
|
||||
const int width = metrics.horizontalAdvance(text, i + 1);
|
||||
|
||||
Okular::NormalizedRect *rect =
|
||||
new Okular::NormalizedRect((origin.x() + lastWidth) / m_pageSize.width(), (origin.y() - metrics.height()) / m_pageSize.height(), (origin.x() + width) / m_pageSize.width(), origin.y() / m_pageSize.height());
|
||||
rect->transform(matrix);
|
||||
Okular::NormalizedRect rect =
|
||||
Okular::NormalizedRect((origin.x() + lastWidth) / m_pageSize.width(), (origin.y() - metrics.height()) / m_pageSize.height(), (origin.x() + width) / m_pageSize.width(), origin.y() / m_pageSize.height());
|
||||
rect.transform(matrix);
|
||||
textPage->append(text.mid(i, 1), rect);
|
||||
|
||||
lastWidth = width;
|
||||
|
|
|
@ -3137,13 +3137,12 @@ void PageView::guessTableDividers()
|
|||
rects.append(tsp.rectInItem);
|
||||
const Okular::TextEntity::List words = tsp.item->page()->words(&rects, Okular::TextPage::CentralPixelTextAreaInclusionBehaviour);
|
||||
|
||||
for (const Okular::TextEntity *te : words) {
|
||||
if (te->text().isEmpty()) {
|
||||
delete te;
|
||||
for (const Okular::TextEntity &te : words) {
|
||||
if (te.text().isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Okular::NormalizedRect wordArea = *te->area();
|
||||
Okular::NormalizedRect wordArea = te.area();
|
||||
|
||||
// convert it from item coordinates to part coordinates
|
||||
wordArea.left -= tsp.rectInItem.left;
|
||||
|
@ -3170,8 +3169,6 @@ void PageView::guessTableDividers()
|
|||
colTicks.append(qMakePair(wordArea.right, -1));
|
||||
rowTicks.append(qMakePair(wordArea.top, +1));
|
||||
rowTicks.append(qMakePair(wordArea.bottom, -1));
|
||||
|
||||
delete te;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue