LibPDF: Implement PostScriptCalculatorFunction

Includes a tokenizer and interpreter for the subset of PostScript
supported in PDF type 4 functions.
This commit is contained in:
Nico Weber 2023-11-06 22:07:28 +01:00 committed by Andreas Kling
parent bda162fc0d
commit 80eec1e16b
2 changed files with 638 additions and 3 deletions

View file

@ -7,7 +7,9 @@
#include <AK/DeprecatedString.h>
#include <AK/Forward.h>
#include <LibCore/MappedFile.h>
#include <LibPDF/CommonNames.h>
#include <LibPDF/Document.h>
#include <LibPDF/Function.h>
#include <LibTest/Macros.h>
#include <LibTest/TestCase.h>
@ -86,3 +88,108 @@ TEST_CASE(malformed_pdf_document)
EXPECT(document_or_error.is_error());
}
}
static PDF::Value make_array(Vector<float> floats)
{
Vector<PDF::Value> values;
for (auto f : floats)
values.append(PDF::Value { f });
return PDF::Value { adopt_ref(*new PDF::ArrayObject(move(values))) };
}
static PDF::PDFErrorOr<NonnullRefPtr<PDF::Function>> make_postscript_function(StringView program, Vector<float> domain, Vector<float> range)
{
HashMap<DeprecatedFlyString, PDF::Value> map;
map.set(PDF::CommonNames::FunctionType, PDF::Value { 4 });
map.set(PDF::CommonNames::Domain, make_array(move(domain)));
map.set(PDF::CommonNames::Range, make_array(move(range)));
auto dict = adopt_ref(*new PDF::DictObject(move(map)));
auto stream = adopt_ref(*new PDF::StreamObject(dict, MUST(ByteBuffer::copy(program.bytes()))));
// document isn't used for anything, but UBSan complains about a (harmless) method call on a null object without it.
auto file = MUST(Core::MappedFile::map("linearized.pdf"sv));
auto document = MUST(PDF::Document::create(file->bytes()));
return PDF::Function::create(document, stream);
}
static NonnullRefPtr<PDF::Function> check_postscript_function(StringView program, Vector<float> domain, Vector<float> range)
{
auto function = make_postscript_function(program, move(domain), move(range));
if (function.is_error())
FAIL(function.error().message());
return function.value();
}
static void check_evaluate(StringView program, Vector<float> inputs, Vector<float> outputs)
{
Vector<float> domain;
for (size_t i = 0; i < inputs.size(); ++i) {
domain.append(-100.0f);
domain.append(100.0f);
}
Vector<float> range;
for (size_t i = 0; i < outputs.size(); ++i) {
range.append(-100.0f);
range.append(100.0f);
}
auto function = check_postscript_function(program, domain, range);
auto result = function->evaluate(inputs);
if (result.is_error())
FAIL(result.error().message());
EXPECT_EQ(result.value(), outputs);
}
TEST_CASE(postscript)
{
// Arithmetic operators
check_evaluate("{ abs }"sv, { 0.5f }, { 0.5f });
check_evaluate("{ add }"sv, { 0.25f, 0.5f }, { 0.75f });
check_evaluate("{ atan }"sv, { 1.0f, 0.01f }, { AK::to_degrees(atan2f(0.01f, 1.0f)) });
check_evaluate("{ ceiling }"sv, { 0.5f }, { 1.0f });
check_evaluate("{ cos }"sv, { 1.0f }, { cosf(AK::to_radians(1.0f)) });
check_evaluate("{ cvi }"sv, { 0.5f }, { 0.0f });
check_evaluate("{ cvr }"sv, { 0.5f }, { 0.5f });
check_evaluate("{ div }"sv, { 0.5f, 1.0f }, { 0.5f });
check_evaluate("{ exp }"sv, { 0.0f }, { 1.0f });
check_evaluate("{ floor }"sv, { 0.5f }, { 0.0f });
check_evaluate("{ idiv }"sv, { 0.5f, 1.0f }, { 0.0f });
check_evaluate("{ ln }"sv, { 10.0f }, { logf(10.0f) });
check_evaluate("{ log }"sv, { 10.0f }, { log10f(10.0f) });
check_evaluate("{ mod }"sv, { 0.5f, 0.25f }, { 0.0f });
check_evaluate("{ mul }"sv, { 0.5f, 0.25f }, { 0.125f });
check_evaluate("{ neg }"sv, { 0.5f }, { -0.5f });
check_evaluate("{ round }"sv, { 0.5f }, { 1.0f });
check_evaluate("{ sin }"sv, { 1.0f }, { sinf(AK::to_radians(1.0f)) });
check_evaluate("{ sqrt }"sv, { 0.5f }, { sqrtf(0.5f) });
check_evaluate("{ sub }"sv, { 0.5f, 0.25f }, { 0.25f });
check_evaluate("{ truncate }"sv, { 0.5f }, { 0.0f });
// Relational, boolean, and bitwise operators
check_evaluate("{ and }"sv, { 0.0f, 1.0f }, { 0.0f });
check_evaluate("{ bitshift }"sv, { 1.0f, 3.0f }, { 8.0f });
check_evaluate("{ bitshift }"sv, { 8.0f, -2.0f }, { 2.0f });
check_evaluate("{ eq }"sv, { 0.5f, 0.5f }, { 1.0f });
check_evaluate("{ ge }"sv, { 0.5f, 0.5f }, { 1.0f });
check_evaluate("{ gt }"sv, { 0.5f, 0.5f }, { 0.0f });
check_evaluate("{ le }"sv, { 0.5f, 0.5f }, { 1.0f });
check_evaluate("{ lt }"sv, { 0.5f, 0.5f }, { 0.0f });
check_evaluate("{ ne }"sv, { 0.5f, 0.5f }, { 0.0f });
check_evaluate("{ not }"sv, { 0.5f }, { 0.0f });
check_evaluate("{ or }"sv, { 0.0f, 1.0f }, { 1.0f });
check_evaluate("{ xor }"sv, { 0.0f, 1.0f }, { 1.0f });
// Conditional operators
check_evaluate("{ { 4 } if }"sv, { 1.0f }, { 4.0f });
check_evaluate("{ { 4 } if }"sv, { 0.0f }, {});
check_evaluate("{ { 4 } { 5 } ifelse }"sv, { 1.0f }, { 4.0f });
check_evaluate("{ { 4 } { 5 } ifelse }"sv, { 0.0f }, { 5.0f });
// Stack operators
check_evaluate("{ 2 copy }"sv, { 8.0f, 0.5f, 1.0f }, { 8.0f, 0.5f, 1.0f, 0.5f, 1.0f });
check_evaluate("{ dup }"sv, { 1.0f, 0.5f }, { 1.0f, 0.5f, 0.5f });
check_evaluate("{ exch }"sv, { 8.0f, 1.0f, 0.5f }, { 8.0f, 0.5f, 1.0f });
check_evaluate("{ 1 index }"sv, { 8.0f, 1.0f, 0.5f }, { 8.0f, 1.0f, 0.5f, 1.0f });
check_evaluate("{ pop }"sv, { 8.0f, 1.0f, 0.5f }, { 8.0f, 1.0f });
check_evaluate("{ 3 1 roll }"sv, { 0.5f, 1.0f, 2.0f }, { 2.0f, 0.5f, 1.0f });
check_evaluate("{ 3 -1 roll }"sv, { 0.5f, 1.0f, 2.0f }, { 1.0f, 2.0f, 0.5f });
}

View file

@ -4,6 +4,7 @@
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/NonnullOwnPtr.h>
#include <LibPDF/CommonNames.h>
#include <LibPDF/Document.h>
#include <LibPDF/Function.h>
@ -131,12 +132,537 @@ PDFErrorOr<ReadonlySpan<float>> StitchingFunction::evaluate(ReadonlySpan<float>)
class PostScriptCalculatorFunction final : public Function {
public:
static PDFErrorOr<NonnullRefPtr<PostScriptCalculatorFunction>> create(Vector<Bound> domain, Optional<Vector<Bound>> range, NonnullRefPtr<StreamObject>);
virtual PDFErrorOr<ReadonlySpan<float>> evaluate(ReadonlySpan<float>) const override;
private:
// TABLE 3.39 Operators in type 4 functions
enum class OperatorType {
Operand,
// Arithmetic operators
Abs,
Add,
Atan,
Ceiling,
Cos,
Cvi,
Cvr,
Div,
Exp,
Floor,
Idiv,
Ln,
Log,
Mod,
Mul,
Neg,
Round,
Sin,
Sqrt,
Sub,
Truncate,
// Relational, boolean, and bitwise operators
And,
Bitshift,
Eq,
False,
Ge,
Gt,
Le,
Lt,
Ne,
Not,
Or,
True,
Xor,
// Conditional operators
If,
IfElse,
// Stack operators
Copy,
Dup,
Exch,
Index,
Pop,
Roll,
};
static Optional<OperatorType> parse_operator(Reader&);
struct IfElse;
struct Token {
// FIXME: Could nan-box this.
OperatorType type;
Variant<Empty, float, int> value {};
};
struct IfElse {
Vector<Token> if_true;
Vector<Token> if_false;
};
static PDFErrorOr<Vector<Token>> parse_postscript_calculator_function(Reader&, Vector<NonnullOwnPtr<IfElse>>&);
struct Stack {
Array<float, 100> stack;
size_t top { 0 };
PDFErrorOr<void> push(float value)
{
if (top == stack.size())
return Error { Error::Type::RenderingUnsupported, "PostScript stack overflow"_string };
stack[top++] = value;
return {};
}
PDFErrorOr<float> pop()
{
if (top == 0)
return Error { Error::Type::RenderingUnsupported, "PostScript stack underflow"_string };
return stack[--top];
}
};
PDFErrorOr<void> execute(Vector<Token> const&, Stack&) const;
Vector<Bound> m_domain;
Vector<Bound> m_range;
Vector<Token> m_tokens;
Vector<NonnullOwnPtr<IfElse>> m_if_elses;
Vector<float> mutable m_result;
};
PDFErrorOr<ReadonlySpan<float>> PostScriptCalculatorFunction::evaluate(ReadonlySpan<float>) const
Optional<PostScriptCalculatorFunction::OperatorType> PostScriptCalculatorFunction::parse_operator(Reader& reader)
{
return Error(Error::Type::RenderingUnsupported, "PostScriptCalculatorFunction not yet implemented"_string);
auto match_keyword = [&](char const* keyword) {
if (reader.matches(keyword)) {
reader.consume((int)strlen(keyword));
return true;
}
return false;
};
if (match_keyword("abs"))
return OperatorType::Abs;
if (match_keyword("add"))
return OperatorType::Add;
if (match_keyword("atan"))
return OperatorType::Atan;
if (match_keyword("ceiling"))
return OperatorType::Ceiling;
if (match_keyword("cos"))
return OperatorType::Cos;
if (match_keyword("cvi"))
return OperatorType::Cvi;
if (match_keyword("cvr"))
return OperatorType::Cvr;
if (match_keyword("div"))
return OperatorType::Div;
if (match_keyword("exp"))
return OperatorType::Exp;
if (match_keyword("floor"))
return OperatorType::Floor;
if (match_keyword("idiv"))
return OperatorType::Idiv;
if (match_keyword("ln"))
return OperatorType::Ln;
if (match_keyword("log"))
return OperatorType::Log;
if (match_keyword("mod"))
return OperatorType::Mod;
if (match_keyword("mul"))
return OperatorType::Mul;
if (match_keyword("neg"))
return OperatorType::Neg;
if (match_keyword("round"))
return OperatorType::Round;
if (match_keyword("sin"))
return OperatorType::Sin;
if (match_keyword("sqrt"))
return OperatorType::Sqrt;
if (match_keyword("sub"))
return OperatorType::Sub;
if (match_keyword("truncate"))
return OperatorType::Truncate;
if (match_keyword("and"))
return OperatorType::And;
if (match_keyword("bitshift"))
return OperatorType::Bitshift;
if (match_keyword("eq"))
return OperatorType::Eq;
if (match_keyword("false"))
return OperatorType::False;
if (match_keyword("ge"))
return OperatorType::Ge;
if (match_keyword("gt"))
return OperatorType::Gt;
if (match_keyword("le"))
return OperatorType::Le;
if (match_keyword("lt"))
return OperatorType::Lt;
if (match_keyword("ne"))
return OperatorType::Ne;
if (match_keyword("not"))
return OperatorType::Not;
if (match_keyword("or"))
return OperatorType::Or;
if (match_keyword("true"))
return OperatorType::True;
if (match_keyword("xor"))
return OperatorType::Xor;
// If and Ifelse handled elsewhere.
if (match_keyword("copy"))
return OperatorType::Copy;
if (match_keyword("dup"))
return OperatorType::Dup;
if (match_keyword("exch"))
return OperatorType::Exch;
if (match_keyword("index"))
return OperatorType::Index;
if (match_keyword("pop"))
return OperatorType::Pop;
if (match_keyword("roll"))
return OperatorType::Roll;
return {};
}
PDFErrorOr<Vector<PostScriptCalculatorFunction::Token>>
PostScriptCalculatorFunction::parse_postscript_calculator_function(Reader& reader, Vector<NonnullOwnPtr<IfElse>>& if_elses)
{
// Assumes valid syntax.
reader.consume_whitespace();
if (!reader.consume('{'))
return Error { Error::Type::MalformedPDF, "PostScript expected '{'" };
Vector<PostScriptCalculatorFunction::Token> tokens;
while (!reader.matches('}')) {
if (reader.consume_whitespace())
continue;
if (reader.matches('{')) {
auto if_true = TRY(parse_postscript_calculator_function(reader, if_elses));
reader.consume_whitespace();
if (reader.matches("if")) {
reader.consume(2);
tokens.append({ OperatorType::If, (int)if_elses.size() });
if_elses.append(adopt_own(*new IfElse { move(if_true), {} }));
continue;
}
VERIFY(reader.matches('{'));
auto if_false = TRY(parse_postscript_calculator_function(reader, if_elses));
reader.consume_whitespace();
if (reader.matches("ifelse")) {
reader.consume(6);
tokens.append({ OperatorType::IfElse, (int)if_elses.size() });
if_elses.append(adopt_own(*new IfElse { move(if_true), move(if_false) }));
continue;
}
return Error { Error::Type::MalformedPDF, "PostScript confused parsing {}-delimited expressions"_string };
}
if (reader.matches_number()) {
// FIXME: Nicer float conversion.
char const* start = reinterpret_cast<char const*>(reader.bytes().slice(reader.offset()).data());
char* endptr;
float value = strtof(start, &endptr);
reader.move_by(endptr - start);
tokens.append({ OperatorType::Operand, value });
continue;
}
if (Optional<OperatorType> op = parse_operator(reader); op.has_value()) {
tokens.append({ op.value() });
continue;
}
return Error { Error::Type::MalformedPDF, "PostScript unknown operator"_string };
}
VERIFY(reader.consume('}'));
return tokens;
}
PDFErrorOr<NonnullRefPtr<PostScriptCalculatorFunction>>
PostScriptCalculatorFunction::create(Vector<Bound> domain, Optional<Vector<Bound>> range, NonnullRefPtr<StreamObject> stream)
{
if (!range.has_value())
return Error { Error::Type::MalformedPDF, "Function type 4 requires /Range" };
Vector<NonnullOwnPtr<IfElse>> if_elses;
Reader reader { stream->bytes() };
auto tokens = TRY(parse_postscript_calculator_function(reader, if_elses));
auto function = adopt_ref(*new PostScriptCalculatorFunction());
function->m_domain = move(domain);
function->m_range = move(range.value());
function->m_tokens = move(tokens);
function->m_if_elses = move(if_elses);
return function;
}
PDFErrorOr<void> PostScriptCalculatorFunction::execute(Vector<Token> const& tokens, Stack& stack) const
{
for (auto const& token : tokens) {
switch (token.type) {
case OperatorType::Operand:
TRY(stack.push(token.value.get<float>()));
break;
case OperatorType::Abs:
TRY(stack.push(fabsf(TRY(stack.pop()))));
break;
case OperatorType::Add: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a + b));
break;
}
case OperatorType::Atan: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(AK::to_degrees(atan2f(b, a))));
break;
}
case OperatorType::Ceiling:
TRY(stack.push(ceilf(TRY(stack.pop()))));
break;
case OperatorType::Cos:
TRY(stack.push(cosf(AK::to_radians(TRY(stack.pop())))));
break;
case OperatorType::Cvi:
TRY(stack.push((int)TRY(stack.pop())));
break;
case OperatorType::Cvr:
TRY(stack.push(TRY(stack.pop())));
break;
case OperatorType::Div: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a / b));
break;
}
case OperatorType::Exp:
TRY(stack.push(expf(TRY(stack.pop()))));
break;
case OperatorType::Floor:
TRY(stack.push(floorf(TRY(stack.pop()))));
break;
case OperatorType::Idiv: {
int b = (int)TRY(stack.pop());
int a = (int)TRY(stack.pop());
TRY(stack.push(a / b));
break;
}
case OperatorType::Ln:
TRY(stack.push(logf(TRY(stack.pop()))));
break;
case OperatorType::Log:
TRY(stack.push(log10f(TRY(stack.pop()))));
break;
case OperatorType::Mod: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(fmodf(a, b)));
break;
}
case OperatorType::Mul: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a * b));
break;
}
case OperatorType::Neg:
TRY(stack.push(-TRY(stack.pop())));
break;
case OperatorType::Round:
TRY(stack.push(roundf(TRY(stack.pop()))));
break;
case OperatorType::Sin:
TRY(stack.push(sinf(AK::to_radians(TRY(stack.pop())))));
break;
case OperatorType::Sqrt:
TRY(stack.push(sqrtf(TRY(stack.pop()))));
break;
case OperatorType::Sub: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a - b));
break;
}
case OperatorType::Truncate:
TRY(stack.push(truncf(TRY(stack.pop()))));
break;
case OperatorType::And: {
int b = (int)TRY(stack.pop());
int a = (int)TRY(stack.pop());
TRY(stack.push(a & b));
break;
}
case OperatorType::Bitshift: {
int b = (int)TRY(stack.pop());
int a = (int)TRY(stack.pop());
if (b >= 0)
TRY(stack.push(a << b));
else
TRY(stack.push(a >> -b));
break;
}
case OperatorType::Eq: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a == b ? 1.0f : 0.0f));
break;
}
case OperatorType::False:
TRY(stack.push(0.0f));
break;
case OperatorType::Ge: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a >= b ? 1.0f : 0.0f));
break;
}
case OperatorType::Gt: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a > b ? 1.0f : 0.0f));
break;
}
case OperatorType::Le: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a <= b ? 1.0f : 0.0f));
break;
}
case OperatorType::Lt: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a < b ? 1.0f : 0.0f));
break;
}
case OperatorType::Ne: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(a != b ? 1.0f : 0.0f));
break;
}
case OperatorType::Not: {
TRY(stack.push(TRY(stack.pop()) == 0.0f ? 1.0f : 0.0f));
break;
}
case OperatorType::Or: {
int b = (int)TRY(stack.pop());
int a = (int)TRY(stack.pop());
TRY(stack.push(a | b));
break;
}
case OperatorType::True:
TRY(stack.push(1.0f));
break;
case OperatorType::Xor: {
int b = (int)TRY(stack.pop());
int a = (int)TRY(stack.pop());
TRY(stack.push(a ^ b));
break;
}
case OperatorType::If: {
auto const& if_else = m_if_elses[token.value.get<int>()];
VERIFY(if_else->if_false.is_empty());
if (TRY(stack.pop()) != 0.0f)
TRY(execute(if_else->if_true, stack));
break;
}
case OperatorType::IfElse: {
auto const& if_else = m_if_elses[token.value.get<int>()];
if (TRY(stack.pop()) != 0.0f)
TRY(execute(if_else->if_true, stack));
else
TRY(execute(if_else->if_false, stack));
break;
}
case OperatorType::Copy: {
int n = (int)TRY(stack.pop());
if (n < 0)
return Error { Error::Type::RenderingUnsupported, "PostScript copy with negative argument"_string };
if ((size_t)n > stack.top)
return Error { Error::Type::RenderingUnsupported, "PostScript copy with argument larger than stack"_string };
for (int i = 0; i < n; ++i)
TRY(stack.push(stack.stack[stack.top - n]));
break;
}
case OperatorType::Dup:
TRY(stack.push(stack.stack[stack.top - 1]));
break;
case OperatorType::Exch: {
float b = TRY(stack.pop());
float a = TRY(stack.pop());
TRY(stack.push(b));
TRY(stack.push(a));
break;
}
case OperatorType::Index: {
int i = (int)TRY(stack.pop());
if (i < 0)
return Error { Error::Type::RenderingUnsupported, "PostScript index with negative argument"_string };
if ((size_t)i >= stack.top)
return Error { Error::Type::RenderingUnsupported, "PostScript index with argument larger than stack"_string };
TRY(stack.push(stack.stack[stack.top - 1 - i]));
break;
}
case OperatorType::Pop:
TRY(stack.pop());
break;
case OperatorType::Roll: {
int j = -(int)TRY(stack.pop());
int n = (int)TRY(stack.pop());
if (n < 0)
return Error { Error::Type::RenderingUnsupported, "PostScript roll with negative argument"_string };
if ((size_t)n > stack.top)
return Error { Error::Type::RenderingUnsupported, "PostScript roll with argument larger than stack"_string };
if (j < 0)
j += n;
if (j < 0)
return Error { Error::Type::RenderingUnsupported, "PostScript roll with negative argument"_string };
if (j > n)
return Error { Error::Type::RenderingUnsupported, "PostScript roll with argument larger than stack"_string };
// http://pointer-overloading.blogspot.com/2013/09/algorithms-rotating-one-dimensional.html
auto elements = stack.stack.span().slice(stack.top - n, n);
elements.slice(0, j).reverse();
elements.slice(j).reverse();
elements.reverse();
break;
}
}
}
return {};
}
PDFErrorOr<ReadonlySpan<float>> PostScriptCalculatorFunction::evaluate(ReadonlySpan<float> xs) const
{
if (xs.size() != m_domain.size())
return Error { Error::Type::MalformedPDF, "Function argument size does not match domain size" };
Stack stack;
for (size_t i = 0; i < xs.size(); ++i)
TRY(stack.push(clamp(xs[i], m_domain[i].lower, m_domain[i].upper)));
TRY(execute(m_tokens, stack));
if (stack.top != m_range.size())
return Error { Error::Type::MalformedPDF, "Postscript result size does not match range size"_string };
// FIXME: Does this need reversing?
m_result.resize(stack.top);
for (size_t i = 0; i < stack.top; ++i)
m_result[i] = clamp(stack.stack[i], m_range[i].lower, m_range[i].upper);
return m_result;
}
PDFErrorOr<NonnullRefPtr<Function>> Function::create(Document* document, NonnullRefPtr<Object> object)
@ -191,7 +717,9 @@ PDFErrorOr<NonnullRefPtr<Function>> Function::create(Document* document, Nonnull
case 3:
return adopt_ref(*new StitchingFunction());
case 4:
return adopt_ref(*new PostScriptCalculatorFunction());
if (!object->is<StreamObject>())
return Error { Error::Type::MalformedPDF, "Function type 4 requires stream object" };
return PostScriptCalculatorFunction::create(move(domain), move(optional_range), object->cast<StreamObject>());
default:
dbgln("invalid function type {}", function_type);
return Error(Error::Type::MalformedPDF, "Function has unkonwn type"_string);