Made event.change evaluate to whole incoming text.

Currently event.change evaluates from first point of difference to end of the string. This MR modifies it to evaluate to whole incoming text. Now we evaluate whatever change is coming in, even if that change contains some common substring with original string (not considering only from the first point of difference).

The current definition makes it difficult to evaluate the final value from event.value and event.change within Javascript. This change is also better from compatibility POV as other readers such as Adobe and PDF.js make similar calculations.
This commit is contained in:
Pratham Gandhi 2024-06-11 22:41:59 +00:00 committed by Albert Astals Cid
parent 51980dd07e
commit 1b78471a41
3 changed files with 81 additions and 57 deletions

View file

@ -29,8 +29,8 @@ class DocumentTest : public QObject
private Q_SLOTS:
void testCloseDuringRotationJob();
void testDocdataMigration();
void testDiff_data();
void testDiff();
void testEvaluateKeystrokeEventChange_data();
void testEvaluateKeystrokeEventChange();
};
// Test that we don't crash if the document is closed while a RotationJob
@ -128,57 +128,53 @@ void DocumentTest::testDocdataMigration()
delete m_document;
}
void DocumentTest::testDiff_data()
void DocumentTest::testEvaluateKeystrokeEventChange_data()
{
QTest::addColumn<QString>("oldVal");
QTest::addColumn<QString>("newVal");
QTest::addColumn<int>("selStart");
QTest::addColumn<int>("selEnd");
QTest::addColumn<QString>("expectedDiff");
QTest::addRow("empty") << ""
<< ""
<< "";
<< "" << 0 << 0 << "";
QTest::addRow("a") << ""
<< "a"
<< "a";
<< "a" << 0 << 0 << "a";
QTest::addRow("ab") << "a"
<< "b"
<< "b";
<< "b" << 0 << 1 << "b";
QTest::addRow("ab2") << "a"
<< "ab"
<< "b";
<< "ab" << 1 << 1 << "b";
QTest::addRow("kaesekuchen") << "Käse"
<< "Käsekuchen"
<< "kuchen";
<< "Käsekuchen" << 4 << 4 << "kuchen";
QTest::addRow("replace") << "kuchen"
<< "wurst"
<< "wurst";
<< "wurst" << 0 << 6 << "wurst";
QTest::addRow("okular") << "Oku"
<< "Okular"
<< "lar";
<< "Okular" << 3 << 3 << "lar";
QTest::addRow("okular2") << "Oku"
<< "Okular" << 0 << 3 << "Okular";
QTest::addRow("removal1") << "a"
<< ""
<< "";
<< "" << 0 << 1 << "";
QTest::addRow("removal2") << "ab"
<< "a"
<< "";
<< "a" << 1 << 2 << "";
QTest::addRow("overlapping chang") << "abcd"
<< "abclmnopd" << 1 << 3 << "bclmnop";
QTest::addRow("unicode") << "☮🤌"
<< "☮🤌❤️"
<< "❤️";
<< "☮🤌❤️" << 2 << 2 << "❤️";
QTest::addRow("unicode2") << ""
<< "☮🤌❤️"
<< "🤌❤️";
<< "☮🤌❤️" << 1 << 1 << "🤌❤️";
QTest::addRow("unicode3") << "🤍"
<< "🤌"
<< "🤌";
<< "🤌" << 0 << 1 << "🤌";
}
void DocumentTest::testDiff()
void DocumentTest::testEvaluateKeystrokeEventChange()
{
QFETCH(QString, oldVal);
QFETCH(QString, newVal);
QFETCH(int, selStart);
QFETCH(int, selEnd);
QFETCH(QString, expectedDiff);
QCOMPARE(Okular::DocumentPrivate::diff(oldVal, newVal), expectedDiff);
QCOMPARE(Okular::DocumentPrivate::evaluateKeystrokeEventChange(oldVal, newVal, selStart, selEnd), expectedDiff);
}
QTEST_MAIN(DocumentTest)

View file

@ -4358,27 +4358,22 @@ void Document::processFormatAction(const Action *action, Okular::FormFieldText *
}
}
QString DocumentPrivate::diff(const QString &oldVal, const QString &newVal)
QString DocumentPrivate::evaluateKeystrokeEventChange(const QString &oldVal, const QString &newVal, int selStart, int selEnd)
{
// We need to consider unicode surrogate pairs and others so working
// with QString directly, even with the private QStringIterator is
// not that simple to get right
// so let's just convert to ucs4
// also, given that toUcs4 is either a QList or a QVector depending on
// qt version, let's try keep it very auto-typed to ease Qt6 porting
/*
The change needs to be evaluated here in accordance with code points.
selStart and selEnd parameters passed to this method should be been adjusted accordingly.
auto oldUcs4 = oldVal.toStdU32String();
auto newUcs4 = newVal.toStdU32String();
for (size_t i = 0; i < std::min(oldUcs4.size(), newUcs4.size()); i++) {
if (oldUcs4.at(i) != newUcs4.at(i)) {
return QString::fromUcs4(std::u32string_view {newUcs4}.substr(i).data(), newUcs4.size() - i);
}
Since QString methods work in terms of code units, we convert the strings to UTF-32.
*/
std::u32string oldUcs4 = oldVal.toStdU32String();
std::u32string newUcs4 = newVal.toStdU32String();
if (selStart < 0 || selEnd < 0 || (selEnd - selStart) + (static_cast<int>(newUcs4.size()) - static_cast<int>(oldUcs4.size())) < 0) {
// Prevent Okular from crashing if incorrect parameters are passed or some bug causes incorrect calculation
return {};
}
if (oldUcs4.size() < newUcs4.size()) {
return QString::fromUcs4(std::u32string_view {newUcs4}.substr(oldUcs4.size()).data(), newUcs4.size() - oldUcs4.size());
}
return {};
const size_t changeLength = (selEnd - selStart) + (newUcs4.size() - oldUcs4.size());
return QString::fromUcs4(std::u32string_view {newUcs4}.substr(selStart, changeLength).data(), changeLength);
}
void Document::processKeystrokeAction(const Action *action, Okular::FormFieldText *fft, const QVariant &newValue, int prevCursorPos, int prevAnchorPos)
@ -4396,18 +4391,51 @@ void Document::processKeystrokeAction(const Action *action, Okular::FormFieldTex
}
std::shared_ptr<Event> event = Event::createKeystrokeEvent(fft, d->m_pagesVector[foundPage]);
event->setChange(DocumentPrivate::diff(fft->text(), newValue.toString()));
int selStart;
if (fft->text().size() - newValue.toString().size() == 1 && prevCursorPos == prevAnchorPos) {
// consider a one character removal as selection of that character and then its removal.
selStart = prevCursorPos - 1;
} else {
selStart = std::min(prevCursorPos, prevAnchorPos);
}
/* Set the selStart and selEnd event properties
QString using UTF-16 counts a code point as made up of 1 or 2 16-bit code units.
When encoded using 2 code units, the units are referred to as surrogate pairs.
selectionStart() and selectionEnd() methods evaluate prevCursorPos and prevAnchorPos based on code units during selection.
While this unit-based evaulation is suitable for detecting changes, for providing consistency with Adobe Reader for values of selStart and selEnd,
it would be best to evaluate in terms of code points rather than the code units.
To correct the values of selStart and selEnd accordingly, we iterate over the code units. If a surrogate pair is encountered, then selStart and
selEnd are accordingly decremented.
*/
int selStart = std::min(prevCursorPos, prevAnchorPos);
int selEnd = std::max(prevCursorPos, prevAnchorPos);
int codeUnit;
int initialSelStart = selStart;
int initialSelEnd = selEnd;
for (codeUnit = 0; codeUnit < initialSelStart && codeUnit < fft->text().size(); codeUnit++) {
if (fft->text().at(codeUnit).isHighSurrogate()) {
// skip the low surrogate and decrement selStart and selEnd
codeUnit++;
selStart--;
selEnd--;
}
}
for (; codeUnit < initialSelEnd && codeUnit < fft->text().size(); codeUnit++) {
if (fft->text().at(codeUnit).isHighSurrogate()) {
// skip the low surrogate and decrement selEnd
codeUnit++;
selEnd--;
}
}
std::u32string oldUcs4 = fft->text().toStdU32String();
std::u32string newUcs4 = newValue.toString().toStdU32String();
// It is necessary to count size in terms of code points rather than code units for deletion.
if (oldUcs4.size() - newUcs4.size() == 1 && selStart == selEnd) {
// consider a one character removal as selection of that character and then its removal.
selStart--;
}
event->setSelStart(selStart);
event->setSelEnd(selEnd);
// Use the corrected selStart and selEnd for evaluating the change.
event->setChange(DocumentPrivate::evaluateKeystrokeEventChange(fft->text(), newValue.toString(), selStart, selEnd));
const ScriptAction *linkscript = static_cast<const ScriptAction *>(action);
d->executeScriptEvent(event, linkscript);

View file

@ -232,7 +232,7 @@ public:
void clearAndWaitForRequests();
OKULARCORE_EXPORT static QString diff(const QString &oldVal, const QString &newVal);
OKULARCORE_EXPORT static QString evaluateKeystrokeEventChange(const QString &oldVal, const QString &newVal, int selStart, int selEnd);
/*
* Executes a ScriptAction with the event passed as parameter.