Refactor line building (#4497)

This commit is contained in:
Laurenz 2024-07-04 12:57:40 +02:00 committed by GitHub
parent 75246f930b
commit 0ef672c347
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
22 changed files with 651 additions and 517 deletions

5
Cargo.lock generated
View file

@ -2604,8 +2604,7 @@ dependencies = [
[[package]]
name = "typst-assets"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f13f85360328da54847dd7fefaf272dfa5b6d1fdeb53f32938924c39bf5b2c6c"
source = "git+https://github.com/typst/typst-assets?rev=4ee794c#4ee794cf8fb98eb67194e757c9820ab8562d853b"
[[package]]
name = "typst-cli"
@ -2656,7 +2655,7 @@ dependencies = [
[[package]]
name = "typst-dev-assets"
version = "0.11.0"
source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d9de82b631bc775124a69384c8d860db04#48a924d9de82b631bc775124a69384c8d860db04"
source = "git+https://github.com/typst/typst-dev-assets?rev=48a924d#48a924d9de82b631bc775124a69384c8d860db04"
[[package]]
name = "typst-docs"

View file

@ -26,8 +26,8 @@ typst-svg = { path = "crates/typst-svg", version = "0.11.0" }
typst-syntax = { path = "crates/typst-syntax", version = "0.11.0" }
typst-timing = { path = "crates/typst-timing", version = "0.11.0" }
typst-utils = { path = "crates/typst-utils", version = "0.11.0" }
typst-assets = "0.11.0"
typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d9de82b631bc775124a69384c8d860db04" }
typst-assets = { git = "https://github.com/typst/typst-assets", rev = "4ee794c" }
typst-dev-assets = { git = "https://github.com/typst/typst-dev-assets", rev = "48a924d" }
az = "1.2"
base64 = "0.22"
bitflags = { version = "2", features = ["serde"] }

View file

@ -116,6 +116,6 @@ impl Tag {
impl Debug for Tag {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
write!(f, "Tag({:?})", self.elem)
write!(f, "Tag({:?})", self.elem.elem().name())
}
}

View file

@ -79,7 +79,7 @@ impl<'a> Item<'a> {
}
/// The natural layouted width of the item.
pub fn width(&self) -> Abs {
pub fn natural_width(&self) -> Abs {
match self {
Self::Text(shaped) => shaped.width,
Self::Absolute(v, _) => *v,

View file

@ -1,11 +1,18 @@
use unicode_bidi::BidiInfo;
use std::fmt::{self, Debug, Formatter};
use std::ops::{Deref, DerefMut};
use super::*;
use crate::engine::Engine;
use crate::layout::{Abs, Em, Fr, Frame, FrameItem, Point};
use crate::layout::{Abs, Dir, Em, Fr, Frame, FrameItem, Point};
use crate::text::{Lang, TextElem};
use crate::utils::Numeric;
const SHY: char = '\u{ad}';
const HYPHEN: char = '-';
const EN_DASH: char = '';
const EM_DASH: char = '—';
const LINE_SEPARATOR: char = '\u{2028}'; // We use LS to distinguish justified breaks.
/// A layouted line, consisting of a sequence of layouted paragraph items that
/// are mostly borrowed from the preparation phase. This type enables you to
/// measure the size of a line in a range before committing to building the
@ -16,20 +23,9 @@ use crate::utils::Numeric;
/// line, respectively. But even those can partially reuse previous results when
/// the break index is safe-to-break per rustybuzz.
pub struct Line<'a> {
/// Bidi information about the paragraph.
pub bidi: &'a BidiInfo<'a>,
/// The trimmed range the line spans in the paragraph.
pub trimmed: Range,
/// The untrimmed end where the line ends.
pub end: usize,
/// A reshaped text item if the line sliced up a text item at the start.
pub first: Option<Item<'a>>,
/// Inner items which don't need to be reprocessed.
pub inner: &'a [Item<'a>],
/// A reshaped text item if the line sliced up a text item at the end. If
/// there is only one text item, this takes precedence over `first`.
pub last: Option<Item<'a>>,
/// The width of the line.
/// The items the line is made of.
pub items: Items<'a>,
/// The exact natural width of the line.
pub width: Abs,
/// Whether the line should be justified.
pub justify: bool,
@ -39,45 +35,27 @@ pub struct Line<'a> {
}
impl<'a> Line<'a> {
/// Iterate over the line's items.
pub fn items(&self) -> impl Iterator<Item = &Item<'a>> {
self.first.iter().chain(self.inner).chain(&self.last)
}
/// Return items that intersect the given `text_range`.
pub fn slice(&self, text_range: Range) -> impl Iterator<Item = &Item<'a>> {
let mut cursor = self.trimmed.start;
let mut start = 0;
let mut end = 0;
for (i, item) in self.items().enumerate() {
if cursor <= text_range.start {
start = i;
}
let len = item.textual_len();
if cursor < text_range.end || cursor + len <= text_range.end {
end = i + 1;
} else {
break;
}
cursor += len;
/// Create an empty line.
pub fn empty() -> Self {
Self {
items: Items::new(),
width: Abs::zero(),
justify: false,
dash: None,
}
self.items().skip(start).take(end - start)
}
/// How many glyphs are in the text where we can insert additional
/// space when encountering underfull lines.
pub fn justifiables(&self) -> usize {
let mut count = 0;
for shaped in self.items().filter_map(Item::text) {
for shaped in self.items.iter().filter_map(Item::text) {
count += shaped.justifiables();
}
// CJK character at line end should not be adjusted.
if self
.items()
.items
.last()
.and_then(Item::text)
.map(|s| s.cjk_justifiable_at_last())
@ -89,19 +67,27 @@ impl<'a> Line<'a> {
count
}
/// How much can the line stretch
/// How much the line can stretch.
pub fn stretchability(&self) -> Abs {
self.items().filter_map(Item::text).map(|s| s.stretchability()).sum()
self.items
.iter()
.filter_map(Item::text)
.map(|s| s.stretchability())
.sum()
}
/// How much can the line shrink
/// How much the line can shrink.
pub fn shrinkability(&self) -> Abs {
self.items().filter_map(Item::text).map(|s| s.shrinkability()).sum()
self.items
.iter()
.filter_map(Item::text)
.map(|s| s.shrinkability())
.sum()
}
/// Whether the line has items with negative width.
pub fn has_negative_width_items(&self) -> bool {
self.items().any(|item| match item {
self.items.iter().any(|item| match item {
Item::Absolute(amount, _) => *amount < Abs::zero(),
Item::Frame(frame, _) => frame.width() < Abs::zero(),
_ => false,
@ -110,7 +96,8 @@ impl<'a> Line<'a> {
/// The sum of fractions in the line.
pub fn fr(&self) -> Fr {
self.items()
self.items
.iter()
.filter_map(|item| match item {
Item::Fractional(fr, _) => Some(*fr),
_ => None,
@ -122,234 +109,299 @@ impl<'a> Line<'a> {
/// A dash at the end of a line.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum Dash {
/// A hyphen added to break a word.
SoftHyphen,
/// Regular hyphen, present in a compound word, e.g. beija-flor.
HardHyphen,
/// An em dash.
Long,
/// An en dash.
Short,
/// A soft hyphen added to break a word.
Soft,
/// A regular hyphen, present in a compound word, e.g. beija-flor.
Hard,
/// Another kind of dash. Only relevant for cost computation.
Other,
}
/// Create a line which spans the given range.
pub fn line<'a>(
engine: &Engine,
p: &'a Preparation,
mut range: Range,
range: Range,
breakpoint: Breakpoint,
pred: Option<&Line>,
) -> Line<'a> {
let end = range.end;
let mut justify =
p.justify && end < p.bidi.text.len() && breakpoint != Breakpoint::Mandatory;
// The line's full text.
let full = &p.text[range.clone()];
// Whether the line is justified.
let justify = full.ends_with(LINE_SEPARATOR)
|| (p.justify && breakpoint != Breakpoint::Mandatory);
// Process dashes.
let dash = if breakpoint == Breakpoint::Hyphen || full.ends_with(SHY) {
Some(Dash::Soft)
} else if full.ends_with(HYPHEN) {
Some(Dash::Hard)
} else if full.ends_with([EN_DASH, EM_DASH]) {
Some(Dash::Other)
} else {
None
};
// Trim the line at the end, if necessary for this breakpoint.
let trim = range.start + breakpoint.trim(full).len();
// Collect the items for the line.
let mut items = collect_items(engine, p, range, trim);
// Add a hyphen at the line start, if a previous dash should be repeated.
if pred.map_or(false, |pred| should_repeat_hyphen(pred, full)) {
if let Some(shaped) = items.first_text_mut() {
shaped.prepend_hyphen(engine, p.fallback);
}
}
// Add a hyphen at the line end, if we ended on a soft hyphen.
if dash == Some(Dash::Soft) {
if let Some(shaped) = items.last_text_mut() {
shaped.push_hyphen(engine, p.fallback);
}
}
// Deal with CJ characters at line boundaries.
adjust_cj_at_line_boundaries(p, full, &mut items);
// Compute the line's width.
let width = items.iter().map(Item::natural_width).sum();
Line { items, width, justify, dash }
}
/// Collects / reshapes all items for the line with the given `range`.
///
/// The `trim` defines an end position to which text items are trimmed. For
/// example, the `range` may span "hello\n", but the `trim` specifies that the
/// linebreak is trimmed.
///
/// We do not factor the `trim` diredctly into the `range` because we still want
/// to keep non-text items after the trim (e.g. tags).
fn collect_items<'a>(
engine: &Engine,
p: &'a Preparation,
range: Range,
trim: usize,
) -> Items<'a> {
let mut items = Items::new();
let mut fallback = None;
// Collect the items for each consecutively ordered run.
reorder(p, range.clone(), |subrange, rtl| {
let from = items.len();
collect_range(engine, p, subrange, trim, &mut items, &mut fallback);
if rtl {
items.reorder(from);
}
});
// Trim weak spacing at the start of the line.
let prefix = items
.iter()
.take_while(|item| matches!(item, Item::Absolute(_, true)))
.count();
if prefix > 0 {
items.drain(..prefix);
}
// Trim weak spacing at the end of the line.
while matches!(items.last(), Some(Item::Absolute(_, true))) {
items.pop();
}
// Add fallback text to expand the line height, if necessary.
if !items.iter().any(|item| matches!(item, Item::Text(_))) {
if let Some(fallback) = fallback {
items.push(fallback);
}
}
items
}
/// Calls `f` for the the BiDi-reordered ranges of a line.
fn reorder<F>(p: &Preparation, range: Range, mut f: F)
where
F: FnMut(Range, bool),
{
// If there is nothing bidirectional going on, skip reordering.
let Some(bidi) = &p.bidi else {
f(range, p.dir == Dir::RTL);
return;
};
// The bidi crate panics for empty lines.
if range.is_empty() {
return Line {
bidi: &p.bidi,
end,
trimmed: range,
first: None,
inner: &[],
last: None,
width: Abs::zero(),
justify,
dash: None,
f(range, p.dir == Dir::RTL);
return;
}
// Find the paragraph that contains the line.
let para = bidi
.paragraphs
.iter()
.find(|para| para.range.contains(&range.start))
.unwrap();
// Compute the reordered ranges in visual order (left to right).
let (levels, runs) = bidi.visual_runs(para, range.clone());
// Call `f` for each run.
for run in runs {
let rtl = levels[run.start].is_rtl();
f(run, rtl)
}
}
/// Collects / reshapes all items for the given `subrange` with continous
/// direction.
fn collect_range<'a>(
engine: &Engine,
p: &'a Preparation,
range: Range,
trim: usize,
items: &mut Items<'a>,
fallback: &mut Option<ItemEntry<'a>>,
) {
for (subrange, item) in p.slice(range.clone()) {
// All non-text items are just kept, they can't be split.
let Item::Text(shaped) = item else {
items.push(item);
continue;
};
}
let prepend_hyphen = pred.map_or(false, should_insert_hyphen);
// The intersection range of the item, the subrange, and the line's
// trimming.
let sliced =
range.start.max(subrange.start)..range.end.min(subrange.end).min(trim);
// Slice out the relevant items.
let (mut expanded, mut inner) = p.slice(range.clone());
let mut width = Abs::zero();
// Whether the item is split by the line.
let split = subrange.start < sliced.start || sliced.end < subrange.end;
// Weak space (`Absolute(_, true)`) is removed at the end of the line
while let Some((Item::Absolute(_, true), before)) = inner.split_last() {
inner = before;
range.end -= 1;
expanded.end -= 1;
}
// Weak space (`Absolute(_, true)`) is removed at the beginning of the line
while let Some((Item::Absolute(_, true), after)) = inner.split_first() {
inner = after;
range.start += 1;
expanded.end += 1;
}
// Reshape the last item if it's split in half or hyphenated.
let mut last = None;
let mut dash = None;
if let Some((Item::Text(shaped), before)) = inner.split_last() {
// Compute the range we want to shape, trimming whitespace at the
// end of the line.
let base = expanded.end - shaped.text.len();
let start = range.start.max(base);
let text = &p.bidi.text[start..range.end];
// U+200B ZERO WIDTH SPACE is used to provide a line break opportunity,
// we want to trim it too.
let trimmed = text.trim_end().trim_end_matches('\u{200B}');
range.end = start + trimmed.len();
// Deal with hyphens, dashes and justification.
let shy = trimmed.ends_with('\u{ad}');
let hyphen = breakpoint == Breakpoint::Hyphen;
dash = if hyphen || shy {
Some(Dash::SoftHyphen)
} else if trimmed.ends_with('-') {
Some(Dash::HardHyphen)
} else if trimmed.ends_with('') {
Some(Dash::Short)
} else if trimmed.ends_with('—') {
Some(Dash::Long)
if sliced.is_empty() {
// When there is no text, still keep this as a fallback item, which
// we can use to force a non-zero line-height when the line doesn't
// contain any other text.
*fallback = Some(ItemEntry::from(Item::Text(shaped.empty())));
} else if split {
// When the item is split in half, reshape it.
let reshaped = shaped.reshape(engine, sliced);
items.push(Item::Text(reshaped));
} else {
None
};
justify |= text.ends_with('\u{2028}');
// When the item is fully contained, just keep it.
items.push(item);
}
}
}
// Deal with CJK punctuation at line ends.
let gb_style = cjk_punct_style(shaped.lang, shaped.region);
let maybe_adjust_last_glyph = trimmed.ends_with(END_PUNCT_PAT)
|| (p.cjk_latin_spacing && trimmed.ends_with(is_of_cj_script));
/// Add spacing around punctuation marks for CJ glyphs at line boundaries.
///
/// See Requirements for Chinese Text Layout, Section 3.1.6.3 Compression of
/// punctuation marks at line start or line end.
fn adjust_cj_at_line_boundaries(p: &Preparation, text: &str, items: &mut Items) {
if text.starts_with(BEGIN_PUNCT_PAT)
|| (p.cjk_latin_spacing && text.starts_with(is_of_cj_script))
{
adjust_cj_at_line_start(p, items);
}
// Usually, we don't want to shape an empty string because:
// - We don't want the height of trimmed whitespace in a different font
// to be considered for the line height.
// - Even if it's in the same font, its unnecessary.
if text.ends_with(END_PUNCT_PAT)
|| (p.cjk_latin_spacing && text.ends_with(is_of_cj_script))
{
adjust_cj_at_line_end(p, items);
}
}
/// Add spacing around punctuation marks for CJ glyphs at the line start.
fn adjust_cj_at_line_start(p: &Preparation, items: &mut Items) {
let Some(shaped) = items.first_text_mut() else { return };
let Some(glyph) = shaped.glyphs.first() else { return };
if glyph.is_cjk_right_aligned_punctuation() {
// If the first glyph is a CJK punctuation, we want to
// shrink it.
let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
let shrink = glyph.shrinkability().0;
glyph.shrink_left(shrink);
shaped.width -= shrink.at(shaped.size);
} else if p.cjk_latin_spacing && glyph.is_cj_script() && glyph.x_offset > Em::zero() {
// If the first glyph is a CJK character adjusted by
// [`add_cjk_latin_spacing`], restore the original width.
let glyph = shaped.glyphs.to_mut().first_mut().unwrap();
let shrink = glyph.x_offset;
glyph.x_advance -= shrink;
glyph.x_offset = Em::zero();
glyph.adjustability.shrinkability.0 = Em::zero();
shaped.width -= shrink.at(shaped.size);
}
}
/// Add spacing around punctuation marks for CJ glyphs at the line end.
fn adjust_cj_at_line_end(p: &Preparation, items: &mut Items) {
let Some(shaped) = items.last_text_mut() else { return };
let Some(glyph) = shaped.glyphs.last() else { return };
// Deal with CJK punctuation at line ends.
let style = cjk_punct_style(shaped.lang, shaped.region);
if glyph.is_cjk_left_aligned_punctuation(style) {
// If the last glyph is a CJK punctuation, we want to
// shrink it.
let shrink = glyph.shrinkability().1;
let punct = shaped.glyphs.to_mut().last_mut().unwrap();
punct.shrink_right(shrink);
shaped.width -= shrink.at(shaped.size);
} else if p.cjk_latin_spacing
&& glyph.is_cj_script()
&& (glyph.x_advance - glyph.x_offset) > Em::one()
{
// If the last glyph is a CJK character adjusted by
// [`add_cjk_latin_spacing`], restore the original width.
let shrink = glyph.x_advance - glyph.x_offset - Em::one();
let glyph = shaped.glyphs.to_mut().last_mut().unwrap();
glyph.x_advance -= shrink;
glyph.adjustability.shrinkability.1 = Em::zero();
shaped.width -= shrink.at(shaped.size);
}
}
/// Whether a hyphen should be inserted at the start of the next line.
fn should_repeat_hyphen(pred_line: &Line, text: &str) -> bool {
// If the predecessor line does not end with a `Dash::Hard`, we shall
// not place a hyphen at the start of the next line.
if pred_line.dash != Some(Dash::Hard) {
return false;
}
// The hyphen should repeat only in the languages that require that feature.
// For more information see the discussion at https://github.com/typst/typst/issues/3235
let Some(Item::Text(shaped)) = pred_line.items.last() else { return false };
match shaped.lang {
// - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
// - Czech: see https://prirucka.ujc.cas.cz/?id=164
// - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
// - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
// - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
// - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
Lang::LOWER_SORBIAN
| Lang::CZECH
| Lang::CROATIAN
| Lang::POLISH
| Lang::PORTUGUESE
| Lang::SLOVAK => true,
// In Spanish the hyphen is required only if the word next to hyphen is
// not capitalized. Otherwise, the hyphen must not be repeated.
//
// There is one exception though. When the whole line is empty, we need
// the shaped empty string to make the line the appropriate height. That
// is the case exactly if the string is empty and there are no other
// items in the line.
if hyphen
|| start + shaped.text.len() > range.end
|| maybe_adjust_last_glyph
|| prepend_hyphen
{
if hyphen || start < range.end || before.is_empty() {
let mut reshaped = shaped.reshape(engine, &p.spans, start..range.end);
if hyphen || shy {
reshaped.push_hyphen(engine, p.fallback);
}
// See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
// https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
Lang::SPANISH => text.chars().next().map_or(false, |c| !c.is_uppercase()),
if let Some(last_glyph) = reshaped.glyphs.last() {
if last_glyph.is_cjk_left_aligned_punctuation(gb_style) {
// If the last glyph is a CJK punctuation, we want to
// shrink it. See Requirements for Chinese Text Layout,
// Section 3.1.6.3 Compression of punctuation marks at
// line start or line end
let shrink_amount = last_glyph.shrinkability().1;
let punct = reshaped.glyphs.to_mut().last_mut().unwrap();
punct.shrink_right(shrink_amount);
reshaped.width -= shrink_amount.at(reshaped.size);
} else if p.cjk_latin_spacing
&& last_glyph.is_cj_script()
&& (last_glyph.x_advance - last_glyph.x_offset) > Em::one()
{
// If the last glyph is a CJK character adjusted by
// [`add_cjk_latin_spacing`], restore the original
// width.
let shrink_amount =
last_glyph.x_advance - last_glyph.x_offset - Em::one();
let glyph = reshaped.glyphs.to_mut().last_mut().unwrap();
glyph.x_advance -= shrink_amount;
glyph.adjustability.shrinkability.1 = Em::zero();
reshaped.width -= shrink_amount.at(reshaped.size);
}
}
width += reshaped.width;
last = Some(Item::Text(reshaped));
}
inner = before;
}
}
// Deal with CJ characters at line starts.
let text = &p.bidi.text[range.start..end];
let maybe_adjust_first_glyph = text.starts_with(BEGIN_PUNCT_PAT)
|| (p.cjk_latin_spacing && text.starts_with(is_of_cj_script));
// Reshape the start item if it's split in half.
let mut first = None;
if let Some((Item::Text(shaped), after)) = inner.split_first() {
// Compute the range we want to shape.
let base = expanded.start;
let end = range.end.min(base + shaped.text.len());
// Reshape if necessary.
if range.start + shaped.text.len() > end
|| maybe_adjust_first_glyph
|| prepend_hyphen
{
// If the range is empty, we don't want to push an empty text item.
if range.start < end {
let reshaped = shaped.reshape(engine, &p.spans, range.start..end);
width += reshaped.width;
first = Some(Item::Text(reshaped));
}
inner = after;
}
}
if prepend_hyphen {
let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
if let Some(reshaped) = reshaped {
let width_before = reshaped.width;
reshaped.prepend_hyphen(engine, p.fallback);
width += reshaped.width - width_before;
}
}
if maybe_adjust_first_glyph {
let reshaped = first.as_mut().or(last.as_mut()).and_then(Item::text_mut);
if let Some(reshaped) = reshaped {
if let Some(first_glyph) = reshaped.glyphs.first() {
if first_glyph.is_cjk_right_aligned_punctuation() {
// If the first glyph is a CJK punctuation, we want to
// shrink it.
let shrink_amount = first_glyph.shrinkability().0;
let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
glyph.shrink_left(shrink_amount);
let amount_abs = shrink_amount.at(reshaped.size);
reshaped.width -= amount_abs;
width -= amount_abs;
} else if p.cjk_latin_spacing
&& first_glyph.is_cj_script()
&& first_glyph.x_offset > Em::zero()
{
// If the first glyph is a CJK character adjusted by
// [`add_cjk_latin_spacing`], restore the original width.
let shrink_amount = first_glyph.x_offset;
let glyph = reshaped.glyphs.to_mut().first_mut().unwrap();
glyph.x_advance -= shrink_amount;
glyph.x_offset = Em::zero();
glyph.adjustability.shrinkability.0 = Em::zero();
let amount_abs = shrink_amount.at(reshaped.size);
reshaped.width -= amount_abs;
width -= amount_abs;
}
}
}
}
// Measure the inner items.
for item in inner {
width += item.width();
}
Line {
bidi: &p.bidi,
trimmed: range,
end,
first,
inner,
last,
width,
justify,
dash,
_ => false,
}
}
@ -365,18 +417,19 @@ pub fn commit(
let mut remaining = width - line.width - p.hang;
let mut offset = Abs::zero();
// Reorder the line from logical to visual order.
let (reordered, starts_rtl) = reorder(line);
if !starts_rtl {
// We always build the line from left to right. In an LTR paragraph, we must
// thus add the hanging indent to the offset. When the paragraph is RTL, the
// hanging indent arises naturally due to the line width.
if p.dir == Dir::LTR {
offset += p.hang;
}
// Handle hanging punctuation to the left.
if let Some(Item::Text(text)) = reordered.first() {
if let Some(Item::Text(text)) = line.items.first() {
if let Some(glyph) = text.glyphs.first() {
if !text.dir.is_positive()
&& TextElem::overhang_in(text.styles)
&& (reordered.len() > 1 || text.glyphs.len() > 1)
&& (line.items.len() > 1 || text.glyphs.len() > 1)
{
let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
offset -= amount;
@ -386,11 +439,11 @@ pub fn commit(
}
// Handle hanging punctuation to the right.
if let Some(Item::Text(text)) = reordered.last() {
if let Some(Item::Text(text)) = line.items.last() {
if let Some(glyph) = text.glyphs.last() {
if text.dir.is_positive()
&& TextElem::overhang_in(text.styles)
&& (reordered.len() > 1 || text.glyphs.len() > 1)
&& (line.items.len() > 1 || text.glyphs.len() > 1)
{
let amount = overhang(glyph.c) * glyph.x_advance.at(text.size);
remaining += amount;
@ -408,16 +461,16 @@ pub fn commit(
let mut extra_justification = Abs::zero();
let shrinkability = line.shrinkability();
let stretch = line.stretchability();
let stretchability = line.stretchability();
if remaining < Abs::zero() && shrinkability > Abs::zero() && shrink {
// Attempt to reduce the length of the line, using shrinkability.
justification_ratio = (remaining / shrinkability).max(-1.0);
remaining = (remaining + shrinkability).min(Abs::zero());
} else if line.justify && fr.is_zero() {
// Attempt to increase the length of the line, using stretchability.
if stretch > Abs::zero() {
justification_ratio = (remaining / stretch).min(1.0);
remaining = (remaining - stretch).max(Abs::zero());
if stretchability > Abs::zero() {
justification_ratio = (remaining / stretchability).min(1.0);
remaining = (remaining - stretchability).max(Abs::zero());
}
let justifiables = line.justifiables();
@ -433,7 +486,7 @@ pub fn commit(
// Build the frames and determine the height and baseline.
let mut frames = vec![];
for item in reordered {
for item in line.items.iter() {
let mut push = |offset: &mut Abs, frame: Frame| {
let width = frame.width();
top.set_max(frame.baseline());
@ -460,8 +513,12 @@ pub fn commit(
}
}
Item::Text(shaped) => {
let mut frame =
shaped.build(engine, justification_ratio, extra_justification);
let mut frame = shaped.build(
engine,
&p.spans,
justification_ratio,
extra_justification,
);
frame.post_process(shaped.styles);
push(&mut offset, frame);
}
@ -499,94 +556,6 @@ pub fn commit(
Ok(output)
}
/// Return a line's items in visual order.
fn reorder<'a>(line: &'a Line<'a>) -> (Vec<&Item<'a>>, bool) {
let mut reordered = vec![];
// The bidi crate doesn't like empty lines.
if line.trimmed.is_empty() {
return (line.slice(line.trimmed.clone()).collect(), false);
}
// Find the paragraph that contains the line.
let para = line
.bidi
.paragraphs
.iter()
.find(|para| para.range.contains(&line.trimmed.start))
.unwrap();
// Compute the reordered ranges in visual order (left to right).
let (levels, runs) = line.bidi.visual_runs(para, line.trimmed.clone());
let starts_rtl = levels.first().is_some_and(|level| level.is_rtl());
// Collect the reordered items.
for run in runs {
// Skip reset L1 runs because handling them would require reshaping
// again in some cases.
if line.bidi.levels[run.start] != levels[run.start] {
continue;
}
let prev = reordered.len();
reordered.extend(line.slice(run.clone()));
if levels[run.start].is_rtl() {
reordered[prev..].reverse();
}
}
(reordered, starts_rtl)
}
/// Whether a hyphen should be inserted at the start of the next line.
fn should_insert_hyphen(pred_line: &Line) -> bool {
// If the predecessor line does not end with a Dash::HardHyphen, we shall
// not place a hyphen at the start of the next line.
if pred_line.dash != Some(Dash::HardHyphen) {
return false;
}
// If there's a trimmed out space, we needn't repeat the hyphen. That's the
// case of a text like "...kebab é a -melhor- comida que existe", where the
// hyphens are a kind of emphasis marker.
if pred_line.trimmed.end != pred_line.end {
return false;
}
// The hyphen should repeat only in the languages that require that feature.
// For more information see the discussion at https://github.com/typst/typst/issues/3235
let Some(Item::Text(shape)) = pred_line.last.as_ref() else { return false };
match shape.lang {
// - Lower Sorbian: see https://dolnoserbski.de/ortografija/psawidla/K3
// - Czech: see https://prirucka.ujc.cas.cz/?id=164
// - Croatian: see http://pravopis.hr/pravilo/spojnica/68/
// - Polish: see https://www.ortograf.pl/zasady-pisowni/lacznik-zasady-pisowni
// - Portuguese: see https://www2.senado.leg.br/bdsf/bitstream/handle/id/508145/000997415.pdf (Base XX)
// - Slovak: see https://www.zones.sk/studentske-prace/gramatika/10620-pravopis-rozdelovanie-slov/
Lang::LOWER_SORBIAN
| Lang::CZECH
| Lang::CROATIAN
| Lang::POLISH
| Lang::PORTUGUESE
| Lang::SLOVAK => true,
// In Spanish the hyphen is required only if the word next to hyphen is
// not capitalized. Otherwise, the hyphen must not be repeated.
//
// See § 4.1.1.1.2.e on the "Ortografía de la lengua española"
// https://www.rae.es/ortografía/como-signo-de-división-de-palabras-a-final-de-línea
Lang::SPANISH => pred_line.bidi.text[pred_line.end..]
.chars()
.next()
.map(|c| !c.is_uppercase())
.unwrap_or(false),
_ => false,
}
}
/// How much a character should hang into the end margin.
///
/// For more discussion, see:
@ -607,3 +576,119 @@ fn overhang(c: char) -> f64 {
_ => 0.0,
}
}
/// A collection of owned or borrowed paragraph items.
pub struct Items<'a>(Vec<ItemEntry<'a>>);
impl<'a> Items<'a> {
/// Create empty items.
pub fn new() -> Self {
Self(vec![])
}
/// Push a new item.
pub fn push(&mut self, entry: impl Into<ItemEntry<'a>>) {
self.0.push(entry.into());
}
/// Iterate over the items
pub fn iter(&self) -> impl Iterator<Item = &Item<'a>> {
self.0.iter().map(|item| &**item)
}
/// Access the first item.
pub fn first(&self) -> Option<&Item<'a>> {
self.0.first().map(|item| &**item)
}
/// Access the last item.
pub fn last(&self) -> Option<&Item<'a>> {
self.0.last().map(|item| &**item)
}
/// Access the first item mutably, if it is text.
pub fn first_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
self.0.first_mut()?.text_mut()
}
/// Access the last item mutably, if it is text.
pub fn last_text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
self.0.last_mut()?.text_mut()
}
/// Reorder the items starting at the given index to RTL.
pub fn reorder(&mut self, from: usize) {
self.0[from..].reverse()
}
}
impl<'a> FromIterator<ItemEntry<'a>> for Items<'a> {
fn from_iter<I: IntoIterator<Item = ItemEntry<'a>>>(iter: I) -> Self {
Self(iter.into_iter().collect())
}
}
impl<'a> Deref for Items<'a> {
type Target = Vec<ItemEntry<'a>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<'a> DerefMut for Items<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
/// A reference to or a boxed item.
pub enum ItemEntry<'a> {
Ref(&'a Item<'a>),
Box(Box<Item<'a>>),
}
impl<'a> ItemEntry<'a> {
fn text_mut(&mut self) -> Option<&mut ShapedText<'a>> {
match self {
Self::Ref(item) => {
let text = item.text()?;
*self = Self::Box(Box::new(Item::Text(text.clone())));
match self {
Self::Box(item) => item.text_mut(),
_ => unreachable!(),
}
}
Self::Box(item) => item.text_mut(),
}
}
}
impl<'a> Deref for ItemEntry<'a> {
type Target = Item<'a>;
fn deref(&self) -> &Self::Target {
match self {
Self::Ref(item) => item,
Self::Box(item) => item,
}
}
}
impl Debug for ItemEntry<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
(**self).fmt(f)
}
}
impl<'a> From<&'a Item<'a>> for ItemEntry<'a> {
fn from(item: &'a Item<'a>) -> Self {
Self::Ref(item)
}
}
impl<'a> From<Item<'a>> for ItemEntry<'a> {
fn from(item: Item<'a>) -> Self {
Self::Box(Box::new(item))
}
}

View file

@ -1,6 +1,7 @@
use std::ops::{Add, Sub};
use icu_properties::maps::CodePointMapData;
use icu_properties::sets::CodePointSetData;
use icu_properties::LineBreak;
use icu_provider::AsDeserializingBufferProvider;
use icu_provider_adapters::fork::ForkByKeyProvider;
@ -27,30 +28,33 @@ const MIN_RATIO: f64 = -1.0;
const MIN_APPROX_RATIO: f64 = -0.5;
const BOUND_EPS: f64 = 1e-3;
/// The ICU blob data.
fn blob() -> BlobDataProvider {
BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap()
}
/// The general line break segmenter.
static SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
let provider =
BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
LineSegmenter::try_new_lstm_with_buffer_provider(&provider).unwrap()
});
static SEGMENTER: Lazy<LineSegmenter> =
Lazy::new(|| LineSegmenter::try_new_lstm_with_buffer_provider(&blob()).unwrap());
/// The line break segmenter for Chinese/Japanese text.
static CJ_SEGMENTER: Lazy<LineSegmenter> = Lazy::new(|| {
let provider =
BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
let cj_blob =
BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU_CJ_SEGMENT)
.unwrap();
let cj_provider = ForkByKeyProvider::new(cj_blob, provider);
let cj_provider = ForkByKeyProvider::new(cj_blob, blob());
LineSegmenter::try_new_lstm_with_buffer_provider(&cj_provider).unwrap()
});
/// The Unicode line break properties for each code point.
static LINEBREAK_DATA: Lazy<CodePointMapData<LineBreak>> = Lazy::new(|| {
let provider =
BlobDataProvider::try_new_from_static_blob(typst_assets::icu::ICU).unwrap();
let deser_provider = provider.as_deserializing();
icu_properties::maps::load_line_break(&deser_provider).unwrap()
icu_properties::maps::load_line_break(&blob().as_deserializing()).unwrap()
});
/// The set of Unicode default ignorables.
static DEFAULT_IGNORABLE_DATA: Lazy<CodePointSetData> = Lazy::new(|| {
icu_properties::sets::load_default_ignorable_code_point(&blob().as_deserializing())
.unwrap()
});
/// A line break opportunity.
@ -64,6 +68,37 @@ pub enum Breakpoint {
Hyphen,
}
impl Breakpoint {
/// Trim a line before this breakpoint.
pub fn trim(self, line: &str) -> &str {
// Trim default ignorables.
let ignorable = DEFAULT_IGNORABLE_DATA.as_borrowed();
let line = line.trim_end_matches(|c| ignorable.contains(c));
match self {
// Trim whitespace.
Self::Normal => line.trim_end_matches(char::is_whitespace),
// Trim linebreaks.
Self::Mandatory => {
let lb = LINEBREAK_DATA.as_borrowed();
line.trim_end_matches(|c| {
matches!(
lb.get(c),
LineBreak::MandatoryBreak
| LineBreak::CarriageReturn
| LineBreak::LineFeed
| LineBreak::NextLine
)
})
}
// Trim nothing further.
Self::Hyphen => line,
}
}
}
/// Breaks the paragraph into lines.
pub fn linebreak<'a>(
engine: &Engine,
@ -180,14 +215,11 @@ fn linebreak_optimized_bounded<'a>(
pred: usize,
total: Cost,
line: Line<'a>,
end: usize,
}
// Dynamic programming table.
let mut table = vec![Entry {
pred: 0,
total: 0.0,
line: line(engine, p, 0..0, Breakpoint::Mandatory, None),
}];
let mut table = vec![Entry { pred: 0, total: 0.0, line: Line::empty(), end: 0 }];
let mut active = 0;
let mut prev_end = 0;
@ -200,7 +232,7 @@ fn linebreak_optimized_bounded<'a>(
let mut line_lower_bound = None;
for (pred_index, pred) in table.iter().enumerate().skip(active) {
let start = pred.line.end;
let start = pred.end;
let unbreakable = prev_end == start;
// If the minimum cost we've established for the line is already
@ -221,6 +253,7 @@ fn linebreak_optimized_bounded<'a>(
width,
&pred.line,
&attempt,
end,
breakpoint,
unbreakable,
);
@ -263,7 +296,7 @@ fn linebreak_optimized_bounded<'a>(
// If this attempt is better than what we had before, take it!
if best.as_ref().map_or(true, |best| best.total >= total) {
best = Some(Entry { pred: pred_index, total, line: attempt });
best = Some(Entry { pred: pred_index, total, line: attempt, end });
}
}
@ -282,7 +315,7 @@ fn linebreak_optimized_bounded<'a>(
let mut idx = table.len() - 1;
// This should only happen if our bound was faulty. Which shouldn't happen!
if table[idx].line.end != p.bidi.text.len() {
if table[idx].end != p.text.len() {
#[cfg(debug_assertions)]
panic!("bounded paragraph layout is incomplete");
@ -340,7 +373,7 @@ fn linebreak_optimized_approximate(
let mut prev_end = 0;
breakpoints(p, |end, breakpoint| {
let at_end = end == p.bidi.text.len();
let at_end = end == p.text.len();
// Find the optimal predecessor.
let mut best: Option<Entry> = None;
@ -362,7 +395,7 @@ fn linebreak_optimized_approximate(
// make it the desired width. We trim at the end to not take into
// account trailing spaces. This is, again, only an approximation of
// the real behaviour of `line`.
let trimmed_end = start + p.bidi.text[start..end].trim_end().len();
let trimmed_end = start + p.text[start..end].trim_end().len();
let line_ratio = raw_ratio(
p,
width,
@ -428,8 +461,9 @@ fn linebreak_optimized_approximate(
idx = table[idx].pred;
}
let mut pred = Line::empty();
let mut start = 0;
let mut exact = 0.0;
let mut pred = line(engine, p, 0..0, Breakpoint::Mandatory, None);
// The cost that we optimized was only an approximate cost, so the layout we
// got here is only likely to be good, not guaranteed to be the best. We now
@ -438,26 +472,36 @@ fn linebreak_optimized_approximate(
for idx in indices.into_iter().rev() {
let Entry { end, breakpoint, unbreakable, .. } = table[idx];
let start = pred.end;
let attempt = line(engine, p, start..end, breakpoint, Some(&pred));
let (_, line_cost) =
ratio_and_cost(p, metrics, width, &pred, &attempt, breakpoint, unbreakable);
let (_, line_cost) = ratio_and_cost(
p,
metrics,
width,
&pred,
&attempt,
end,
breakpoint,
unbreakable,
);
exact += line_cost;
pred = attempt;
start = end;
exact += line_cost;
}
exact
}
/// Compute the stretch ratio and cost of a line.
#[allow(clippy::too_many_arguments)]
fn ratio_and_cost(
p: &Preparation,
metrics: &CostMetrics,
available_width: Abs,
pred: &Line,
attempt: &Line,
end: usize,
breakpoint: Breakpoint,
unbreakable: bool,
) -> (f64, Cost) {
@ -474,7 +518,7 @@ fn ratio_and_cost(
metrics,
breakpoint,
ratio,
attempt.end == p.bidi.text.len(),
end == p.text.len(),
attempt.justify,
unbreakable,
pred.dash.is_some() && attempt.dash.is_some(),
@ -587,7 +631,14 @@ fn raw_cost(
/// code much simpler and the consumers of this function don't need the
/// composability and flexibility of external iteration anyway.
fn breakpoints<'a>(p: &'a Preparation<'a>, mut f: impl FnMut(usize, Breakpoint)) {
let text = p.bidi.text;
let text = p.text;
// Single breakpoint at the end for empty text.
if text.is_empty() {
f(0, Breakpoint::Mandatory);
return;
}
let hyphenate = p.hyphenate != Some(false);
let lb = LINEBREAK_DATA.as_borrowed();
let segmenter = match p.lang {
@ -747,8 +798,9 @@ fn linebreak_link(link: &str, mut f: impl FnMut(usize)) {
fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
p.hyphenate
.or_else(|| {
let shaped = p.find(offset)?.text()?;
Some(TextElem::hyphenate_in(shaped.styles))
let (_, item) = p.get(offset);
let styles = item.text()?.styles;
Some(TextElem::hyphenate_in(styles))
})
.unwrap_or(false)
}
@ -756,8 +808,9 @@ fn hyphenate_at(p: &Preparation, offset: usize) -> bool {
/// The text language at the given offset.
fn lang_at(p: &Preparation, offset: usize) -> Option<hypher::Lang> {
let lang = p.lang.or_else(|| {
let shaped = p.find(offset)?.text()?;
Some(TextElem::lang_in(shaped.styles))
let (_, item) = p.get(offset);
let styles = item.text()?.styles;
Some(TextElem::lang_in(styles))
})?;
let bytes = lang.as_str().as_bytes().try_into().ok()?;
@ -813,17 +866,14 @@ struct Estimates {
impl Estimates {
/// Compute estimations for approximate Knuth-Plass layout.
fn compute(p: &Preparation) -> Self {
let cap = p.bidi.text.len();
let cap = p.text.len();
let mut widths = CummulativeVec::with_capacity(cap);
let mut stretchability = CummulativeVec::with_capacity(cap);
let mut shrinkability = CummulativeVec::with_capacity(cap);
let mut justifiables = CummulativeVec::with_capacity(cap);
for item in &p.items {
let textual_len = item.textual_len();
let after = widths.len() + textual_len;
for (range, item) in p.items.iter() {
if let Item::Text(shaped) = item {
for g in shaped.glyphs.iter() {
let byte_len = g.range.len();
@ -835,13 +885,13 @@ impl Estimates {
justifiables.push(byte_len, g.is_justifiable() as usize);
}
} else {
widths.push(textual_len, item.width());
widths.push(range.len(), item.natural_width());
}
widths.adjust(after);
stretchability.adjust(after);
shrinkability.adjust(after);
justifiables.adjust(after);
widths.adjust(range.end);
stretchability.adjust(range.end);
shrinkability.adjust(range.end);
justifiables.adjust(range.end);
}
Self {
@ -871,11 +921,6 @@ where
Self { total, summed }
}
/// Get the covered byte length.
fn len(&self) -> usize {
self.summed.len()
}
/// Adjust to cover the given byte length.
fn adjust(&mut self, len: usize) {
self.summed.resize(len, self.total);

View file

@ -13,16 +13,24 @@ use crate::text::{Costs, Lang, TextElem};
/// Only when a line break falls onto a text index that is not safe-to-break per
/// rustybuzz, we have to reshape that portion.
pub struct Preparation<'a> {
/// The paragraph's full text.
pub text: &'a str,
/// Bidirectional text embedding levels for the paragraph.
pub bidi: BidiInfo<'a>,
///
/// This is `None` if the paragraph is BiDi-uniform (all the base direction).
pub bidi: Option<BidiInfo<'a>>,
/// Text runs, spacing and layouted elements.
pub items: Vec<Item<'a>>,
pub items: Vec<(Range, Item<'a>)>,
/// Maps from byte indices to item indices.
pub indices: Vec<usize>,
/// The span mapper.
pub spans: SpanMapper,
/// Whether to hyphenate if it's the same for all children.
pub hyphenate: Option<bool>,
/// Costs for various layout decisions.
pub costs: Costs,
/// The dominant direction.
pub dir: Dir,
/// The text language if it's the same for all children.
pub lang: Option<Lang>,
/// The paragraph's resolved horizontal alignment.
@ -44,46 +52,18 @@ pub struct Preparation<'a> {
}
impl<'a> Preparation<'a> {
/// Find the item that contains the given `text_offset`.
pub fn find(&self, text_offset: usize) -> Option<&Item<'a>> {
let mut cursor = 0;
for item in &self.items {
let end = cursor + item.textual_len();
if (cursor..end).contains(&text_offset) {
return Some(item);
}
cursor = end;
}
None
/// Get the item that contains the given `text_offset`.
pub fn get(&self, offset: usize) -> &(Range, Item<'a>) {
let idx = self.indices.get(offset).copied().unwrap_or(0);
&self.items[idx]
}
/// Return the items that intersect the given `text_range`.
///
/// Returns the expanded range around the items and the items.
pub fn slice(&self, text_range: Range) -> (Range, &[Item<'a>]) {
let mut cursor = 0;
let mut start = 0;
let mut end = 0;
let mut expanded = text_range.clone();
for (i, item) in self.items.iter().enumerate() {
if cursor <= text_range.start {
start = i;
expanded.start = cursor;
}
let len = item.textual_len();
if cursor < text_range.end || cursor + len <= text_range.end {
end = i + 1;
expanded.end = cursor + len;
} else {
break;
}
cursor += len;
}
(expanded, &self.items[start..end])
/// Iterate over the items that intersect the given `sliced` range.
pub fn slice(&self, sliced: Range) -> impl Iterator<Item = &(Range, Item<'a>)> {
let start = self.indices.get(sliced.start).copied().unwrap_or(0);
self.items[start..].iter().take_while(move |(range, _)| {
range.start < sliced.end || range.end <= sliced.end
})
}
}
@ -99,42 +79,57 @@ pub fn prepare<'a>(
spans: SpanMapper,
styles: StyleChain<'a>,
) -> SourceResult<Preparation<'a>> {
let bidi = BidiInfo::new(
text,
match TextElem::dir_in(styles) {
Dir::LTR => Some(BidiLevel::ltr()),
Dir::RTL => Some(BidiLevel::rtl()),
_ => None,
},
);
let dir = TextElem::dir_in(styles);
let default_level = match dir {
Dir::RTL => BidiLevel::rtl(),
_ => BidiLevel::ltr(),
};
let bidi = BidiInfo::new(text, Some(default_level));
let is_bidi = bidi
.levels
.iter()
.any(|level| level.is_ltr() != default_level.is_ltr());
let mut cursor = 0;
let mut items = Vec::with_capacity(segments.len());
// Shape the text to finalize the items.
for segment in segments {
let end = cursor + segment.textual_len();
let len = segment.textual_len();
let end = cursor + len;
let range = cursor..end;
match segment {
Segment::Text(_, styles) => {
shape_range(&mut items, engine, &bidi, cursor..end, &spans, styles);
shape_range(&mut items, engine, text, &bidi, range, styles);
}
Segment::Item(item) => items.push(item),
Segment::Item(item) => items.push((range, item)),
}
cursor = end;
}
// Build the mapping from byte to item indices.
let mut indices = Vec::with_capacity(text.len());
for (i, (range, _)) in items.iter().enumerate() {
indices.extend(range.clone().map(|_| i));
}
let cjk_latin_spacing = TextElem::cjk_latin_spacing_in(styles).is_auto();
if cjk_latin_spacing {
add_cjk_latin_spacing(&mut items);
}
Ok(Preparation {
bidi,
text,
bidi: is_bidi.then_some(bidi),
items,
indices,
spans,
hyphenate: children.shared_get(styles, TextElem::hyphenate_in),
costs: TextElem::costs_in(styles),
dir,
lang: children.shared_get(styles, TextElem::lang_in),
align: AlignElem::alignment_in(styles).resolve(styles).x,
justify: ParElem::justify_in(styles),
@ -150,10 +145,14 @@ pub fn prepare<'a>(
/// Add some spacing between Han characters and western characters. See
/// Requirements for Chinese Text Layout, Section 3.2.2 Mixed Text Composition
/// in Horizontal Written Mode
fn add_cjk_latin_spacing(items: &mut [Item]) {
let mut items = items.iter_mut().filter(|x| !matches!(x, Item::Tag(_))).peekable();
fn add_cjk_latin_spacing(items: &mut [(Range, Item)]) {
let mut items = items
.iter_mut()
.filter(|(_, x)| !matches!(x, Item::Tag(_)))
.peekable();
let mut prev: Option<&ShapedGlyph> = None;
while let Some(item) = items.next() {
while let Some((_, item)) = items.next() {
let Some(text) = item.text_mut() else {
prev = None;
continue;
@ -168,7 +167,7 @@ fn add_cjk_latin_spacing(items: &mut [Item]) {
let next = glyphs.peek().map(|n| n as _).or_else(|| {
items
.peek()
.and_then(|i| i.text())
.and_then(|(_, i)| i.text())
.and_then(|shaped| shaped.glyphs.first())
});

View file

@ -14,7 +14,6 @@ use super::{Item, Range, SpanMapper};
use crate::engine::Engine;
use crate::foundations::{Smart, StyleChain};
use crate::layout::{Abs, Dir, Em, Frame, FrameItem, Point, Size};
use crate::syntax::Span;
use crate::text::{
decorate, families, features, variant, Font, FontVariant, Glyph, Lang, Region,
TextElem, TextItem,
@ -27,6 +26,7 @@ use crate::World;
/// This type contains owned or borrowed shaped text runs, which can be
/// measured, used to reshape substrings more quickly and converted into a
/// frame.
#[derive(Clone)]
pub struct ShapedText<'a> {
/// The start of the text in the full paragraph.
pub base: usize,
@ -80,8 +80,6 @@ pub struct ShapedGlyph {
pub safe_to_break: bool,
/// The first char in this glyph's cluster.
pub c: char,
/// The source code location of the glyph and its byte offset within it.
pub span: (Span, u16),
/// Whether this glyph is justifiable for CJK scripts.
pub is_justifiable: bool,
/// The script of the glyph.
@ -214,6 +212,7 @@ impl<'a> ShapedText<'a> {
pub fn build(
&self,
engine: &Engine,
spans: &SpanMapper,
justification_ratio: f64,
extra_justification: Abs,
) -> Frame {
@ -268,7 +267,7 @@ impl<'a> ShapedText<'a> {
// We may not be able to reach the offset completely if
// it exceeds u16, but better to have a roughly correct
// span offset than nothing.
let mut span = shaped.span;
let mut span = spans.span_at(shaped.range.start);
span.1 = span.1.saturating_add(span_offset.saturating_as());
// |<---- a Glyph ---->|
@ -331,7 +330,7 @@ impl<'a> ShapedText<'a> {
}
/// Measure the top and bottom extent of this text.
fn measure(&self, engine: &Engine) -> (Abs, Abs) {
pub fn measure(&self, engine: &Engine) -> (Abs, Abs) {
let mut top = Abs::zero();
let mut bottom = Abs::zero();
@ -409,12 +408,7 @@ impl<'a> ShapedText<'a> {
/// shaping process if possible.
///
/// The text `range` is relative to the whole paragraph.
pub fn reshape(
&'a self,
engine: &Engine,
spans: &SpanMapper,
text_range: Range,
) -> ShapedText<'a> {
pub fn reshape(&'a self, engine: &Engine, text_range: Range) -> ShapedText<'a> {
let text = &self.text[text_range.start - self.base..text_range.end - self.base];
if let Some(glyphs) = self.slice_safe_to_break(text_range.clone()) {
#[cfg(debug_assertions)]
@ -436,7 +430,6 @@ impl<'a> ShapedText<'a> {
engine,
text_range.start,
text,
spans,
self.styles,
self.dir,
self.lang,
@ -445,6 +438,16 @@ impl<'a> ShapedText<'a> {
}
}
/// Derive an empty text run with the same properties as this one.
pub fn empty(&self) -> Self {
Self {
text: "",
width: Abs::zero(),
glyphs: Cow::Borrowed(&[]),
..*self
}
}
/// Push a hyphen to end of the text.
pub fn push_hyphen(&mut self, engine: &Engine, fallback: bool) {
self.insert_hyphen(engine, fallback, Side::Right)
@ -493,7 +496,6 @@ impl<'a> ShapedText<'a> {
range,
safe_to_break: true,
c: '-',
span: (Span::detached(), 0),
is_justifiable: false,
script: Script::Common,
};
@ -592,11 +594,11 @@ impl Debug for ShapedText<'_> {
/// Group a range of text by BiDi level and script, shape the runs and generate
/// items for them.
pub fn shape_range<'a>(
items: &mut Vec<Item<'a>>,
items: &mut Vec<(Range, Item<'a>)>,
engine: &Engine,
text: &'a str,
bidi: &BidiInfo<'a>,
range: Range,
spans: &SpanMapper,
styles: StyleChain<'a>,
) {
let script = TextElem::script_in(styles);
@ -604,17 +606,9 @@ pub fn shape_range<'a>(
let region = TextElem::region_in(styles);
let mut process = |range: Range, level: BidiLevel| {
let dir = if level.is_ltr() { Dir::LTR } else { Dir::RTL };
let shaped = shape(
engine,
range.start,
&bidi.text[range],
spans,
styles,
dir,
lang,
region,
);
items.push(Item::Text(shaped));
let shaped =
shape(engine, range.start, &text[range.clone()], styles, dir, lang, region);
items.push((range, Item::Text(shaped)));
};
let mut prev_level = BidiLevel::ltr();
@ -625,14 +619,14 @@ pub fn shape_range<'a>(
// set (rather than inferred from the glyphs), we keep the script at an
// unchanging `Script::Unknown` so that only level changes cause breaks.
for i in range.clone() {
if !bidi.text.is_char_boundary(i) {
if !text.is_char_boundary(i) {
continue;
}
let level = bidi.levels[i];
let curr_script = match script {
Smart::Auto => {
bidi.text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
text[i..].chars().next().map_or(Script::Unknown, |c| c.script())
}
Smart::Custom(_) => Script::Unknown,
};
@ -668,7 +662,6 @@ fn shape<'a>(
engine: &Engine,
base: usize,
text: &'a str,
spans: &SpanMapper,
styles: StyleChain<'a>,
dir: Dir,
lang: Lang,
@ -677,7 +670,6 @@ fn shape<'a>(
let size = TextElem::size_in(styles);
let mut ctx = ShapingContext {
engine,
spans,
size,
glyphs: vec![],
used: vec![],
@ -717,7 +709,6 @@ fn shape<'a>(
/// Holds shaping results and metadata common to all shaped segments.
struct ShapingContext<'a, 'v> {
engine: &'a Engine<'v>,
spans: &'a SpanMapper,
glyphs: Vec<ShapedGlyph>,
used: Vec<Font>,
styles: StyleChain<'a>,
@ -830,7 +821,6 @@ fn shape_segment<'a>(
range: start..end,
safe_to_break: !info.unsafe_to_break(),
c,
span: ctx.spans.span_at(start),
is_justifiable: is_justifiable(
c,
script,
@ -921,7 +911,6 @@ fn shape_tofus(ctx: &mut ShapingContext, base: usize, text: &str, font: Font) {
range: start..end,
safe_to_break: true,
c,
span: ctx.spans.span_at(start),
is_justifiable: is_justifiable(
c,
script,

View file

@ -18,9 +18,9 @@ use crate::realize::StyleVec;
///
/// # Example
/// ```example
/// #show par: set block(spacing: 0.65em)
/// #set par(
/// first-line-indent: 1em,
/// spacing: 0.65em,
/// justify: true,
/// )
///
@ -115,8 +115,7 @@ pub struct ParElem {
/// By typographic convention, paragraph breaks are indicated either by some
/// space between paragraphs or by indented first lines. Consider reducing
/// the [paragraph spacing]($block.spacing) to the [`leading`]($par.leading)
/// when using this property (e.g. using
/// `[#show par: set block(spacing: 0.65em)]`).
/// when using this property (e.g. using `[#set par(spacing: 0.65em)]`).
#[ghost]
pub first_line_indent: Length,

View file

@ -593,10 +593,9 @@ The example below
```typ
#set page(margin: 1.75in)
#set par(leading: 0.55em, first-line-indent: 1.8em, justify: true)
#set par(leading: 0.55em, spacing: 0.55em, first-line-indent: 1.8em, justify: true)
#set text(font: "New Computer Modern")
#show raw: set text(font: "New Computer Modern Mono")
#show par: set block(spacing: 0.55em)
#show heading: set block(above: 1.4em, below: 1em)
```

View file

@ -120,7 +120,7 @@ a table listing all syntax that is available in code mode:
| Named function | `{let f(x) = 2 * x}` | [Function]($function) |
| Set rule | `{set text(14pt)}` | [Styling]($styling/#set-rules) |
| Set-if rule | `{set text(..) if .. }` | [Styling]($styling/#set-rules) |
| Show-set rule | `{show par: set block(..)}` | [Styling]($styling/#show-rules) |
| Show-set rule | `{show heading: set block(..)}` | [Styling]($styling/#show-rules) |
| Show rule with function | `{show raw: it => {..}}` | [Styling]($styling/#show-rules) |
| Show-everything rule | `{show: columns.with(2)}` | [Styling]($styling/#show-rules) |
| Context expression | `{context text.lang}` | [Context]($context) |

Binary file not shown.

Before

Width:  |  Height:  |  Size: 378 B

After

Width:  |  Height:  |  Size: 361 B

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.5 KiB

After

Width:  |  Height:  |  Size: 1.5 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 850 B

After

Width:  |  Height:  |  Size: 881 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 74 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 91 B

View file

@ -4,7 +4,7 @@
// Test version constructor.
// Empty.
#version()
#test(array(version()), ())
// Plain.
#test(version(1, 2).major, 1)

View file

@ -47,14 +47,14 @@ Totally #h() ignored
Hello #h(2cm, weak: true)
--- issue-4087 ---
// weak space at the end of the line would be removed.
// Weak space at the end of the line is removed.
This is the first line #h(2cm, weak: true) A new line
// non-weak space would be consume a specified width and push next line.
// Non-weak space consumes a specified width and pushes to next line.
This is the first line #h(2cm, weak: false) A new line
// similarly weak space at the beginning of the line would be removed.
This is the first line\ #h(2cm, weak: true) A new line
// Similarly, weak space at the beginning of the line is removed.
This is the first line \ #h(2cm, weak: true) A new line
// non-spacing, on the other hand, is not removed.
This is the first line\ #h(2cm, weak: false) A new line
// Non-weak-spacing, on the other hand, is not removed.
This is the first line \ #h(2cm, weak: false) A new line

View file

@ -78,3 +78,22 @@ Welcome \ here. Does this work well?
#set text(dir: rtl)
لآن وقد أظلم الليل وبدأت النجوم
تنضخ وجه الطبيعة التي أعْيَتْ من طول ما انبعثت في النهار
--- par-trailing-whitespace ---
// Ensure that trailing whitespace layouts as intended.
#box(fill: aqua, " ")
--- par-empty-metadata ---
// Check that metadata still works in a zero length paragraph.
#block(height: 0pt)[#""#metadata(false)<hi>]
#context test(query(<hi>).first().value, false)
--- par-metadata-after-trimmed-space ---
// Ensure that metadata doesn't prevent trailing spaces from being trimmed.
#set par(justify: true, linebreaks: "simple")
#set text(hyphenate: false)
Lorem ipsum dolor #metadata(none) nonumy eirmod tempor.
--- issue-4278-par-trim-before-equation ---
#set par(justify: true)
#lorem(6) aa $a = c + b$