Hyphenation

This commit is contained in:
Laurenz 2022-03-08 19:49:26 +01:00
parent d24c7030d8
commit b71113d37a
9 changed files with 128 additions and 14 deletions

7
Cargo.lock generated
View file

@ -240,6 +240,12 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
[[package]]
name = "hypher"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29349e08e99b98d0e16a0ca738d181d5c73431a9a46b78918318c4bc9b10106"
[[package]]
name = "iai"
version = "0.1.1"
@ -806,6 +812,7 @@ dependencies = [
"either",
"flate2",
"fxhash",
"hypher",
"iai",
"image",
"kurbo",

View file

@ -22,6 +22,7 @@ serde = { version = "1", features = ["derive"] }
typed-arena = "2"
# Text and font handling
hypher = "0.1"
kurbo = "0.8"
ttf-parser = "0.12"
rustybuzz = "0.4"

View file

@ -1,3 +1,5 @@
//! Text shaping and paragraph layout.
mod deco;
mod link;
mod par;

View file

@ -27,12 +27,17 @@ pub enum ParChild {
#[class]
impl ParNode {
/// An ISO 639-1 language code.
pub const LANG: Option<EcoString> = None;
/// The direction for text and inline objects.
pub const DIR: Dir = Dir::LTR;
/// How to align text and inline objects in their line.
pub const ALIGN: Align = Align::Left;
/// Whether to justify text in its line.
pub const JUSTIFY: bool = false;
/// Whether to hyphenate text to improve line breaking. When `auto`, words
/// will will be hyphenated if and only if justification is enabled.
pub const HYPHENATE: Smart<bool> = Smart::Auto;
/// The spacing between lines (dependent on scaled font size).
pub const LEADING: Linear = Relative::new(0.65).into();
/// The extra spacing between paragraphs (dependent on scaled font size).
@ -49,13 +54,14 @@ impl ParNode {
}
fn set(args: &mut Args, styles: &mut StyleMap) -> TypResult<()> {
let lang = args.named::<Option<EcoString>>("lang")?;
let mut dir =
args.named("lang")?
.map(|iso: EcoString| match iso.to_lowercase().as_str() {
"ar" | "he" | "fa" | "ur" | "ps" | "yi" => Dir::RTL,
"en" | "fr" | "de" => Dir::LTR,
_ => Dir::LTR,
});
lang.clone().flatten().map(|iso| match iso.to_lowercase().as_str() {
"ar" | "dv" | "fa" | "he" | "ks" | "pa" | "ps" | "sd" | "ug" | "ur"
| "yi" => Dir::RTL,
_ => Dir::LTR,
});
if let Some(Spanned { v, span }) = args.named::<Spanned<Dir>>("dir")? {
if v.axis() != SpecAxis::Horizontal {
@ -74,9 +80,11 @@ impl ParNode {
dir.map(|dir| dir.start().into())
};
styles.set_opt(Self::LANG, lang);
styles.set_opt(Self::DIR, dir);
styles.set_opt(Self::ALIGN, align);
styles.set_opt(Self::JUSTIFY, args.named("justify")?);
styles.set_opt(Self::HYPHENATE, args.named("hyphenate")?);
styles.set_opt(Self::LEADING, args.named("leading")?);
styles.set_opt(Self::SPACING, args.named("spacing")?);
styles.set_opt(Self::INDENT, args.named("indent")?);
@ -137,7 +145,7 @@ impl Layout for ParNode {
let par = ParLayout::new(ctx, self, bidi, regions, &styles)?;
// Break the paragraph into lines.
let lines = break_into_lines(&mut ctx.fonts, &par, regions.first.x);
let lines = break_into_lines(&mut ctx.fonts, &par, regions.first.x, styles);
// Stack the lines into one frame per region.
Ok(stack_lines(&ctx.fonts, lines, regions, styles))
@ -278,6 +286,7 @@ impl<'a> ParLayout<'a> {
fonts: &mut FontStore,
mut range: Range,
mandatory: bool,
hyphen: bool,
) -> LineLayout<'a> {
// Find the items which bound the text range.
let last_idx = self.find(range.end.saturating_sub(1)).unwrap();
@ -308,7 +317,10 @@ impl<'a> ParLayout<'a> {
// empty string.
if !shifted.is_empty() || rest.is_empty() {
// Reshape that part.
let reshaped = shaped.reshape(fonts, shifted);
let mut reshaped = shaped.reshape(fonts, shifted);
if hyphen {
reshaped.push_hyphen(fonts);
}
last = Some(ParItem::Text(reshaped));
}
@ -524,6 +536,7 @@ fn break_into_lines<'a>(
fonts: &mut FontStore,
par: &'a ParLayout<'a>,
width: Length,
styles: StyleChain,
) -> Vec<LineLayout<'a>> {
// The already determined lines and the current line attempt.
let mut lines = vec![];
@ -531,9 +544,9 @@ fn break_into_lines<'a>(
let mut last = None;
// Find suitable line breaks.
for (end, mandatory) in LineBreakIterator::new(&par.bidi.text) {
for (end, mandatory, hyphen) in breakpoints(&par.bidi.text, styles) {
// Compute the line and its size.
let mut line = par.line(fonts, start .. end, mandatory);
let mut line = par.line(fonts, start .. end, mandatory, hyphen);
// If the line doesn't fit anymore, we push the last fitting attempt
// into the stack and rebuild the line from its end. The resulting
@ -542,7 +555,7 @@ fn break_into_lines<'a>(
if let Some((last_line, last_end)) = last.take() {
lines.push(last_line);
start = last_end;
line = par.line(fonts, start .. end, mandatory);
line = par.line(fonts, start .. end, mandatory, hyphen);
}
}
@ -565,6 +578,47 @@ fn break_into_lines<'a>(
lines
}
/// Determine all possible points in the text where lines can broken.
fn breakpoints<'a>(
text: &'a str,
styles: StyleChain,
) -> impl Iterator<Item = (usize, bool, bool)> + 'a {
let mut lang = None;
if styles.get(ParNode::HYPHENATE).unwrap_or(styles.get(ParNode::JUSTIFY)) {
lang = styles
.get_ref(ParNode::LANG)
.as_ref()
.and_then(|iso| iso.as_bytes().try_into().ok())
.and_then(hypher::Lang::from_iso);
}
let breaks = LineBreakIterator::new(text);
let mut last = 0;
if let Some(lang) = lang {
Either::Left(breaks.flat_map(move |(end, mandatory)| {
let word = &text[last .. end];
let trimmed = word.trim_end_matches(|c: char| !c.is_alphabetic());
let suffix = last + trimmed.len();
let mut start = std::mem::replace(&mut last, end);
if trimmed.is_empty() {
Either::Left([(end, mandatory, false)].into_iter())
} else {
Either::Right(hypher::hyphenate(trimmed, lang).map(move |syllable| {
start += syllable.len();
if start == suffix {
start = end;
}
let hyphen = start < end;
(start, mandatory && !hyphen, hyphen)
}))
}
}))
} else {
Either::Right(breaks.map(|(e, m)| (e, m, false)))
}
}
/// Combine the lines into one frame per region.
fn stack_lines(
fonts: &FontStore,

View file

@ -135,6 +135,34 @@ impl<'a> ShapedText<'a> {
}
}
/// Push a hyphen to end of the text.
pub fn push_hyphen(&mut self, fonts: &mut FontStore) {
// When there are no glyphs, we just use the vertical metrics of the
// first available font.
let size = self.styles.get(TextNode::SIZE).abs;
let variant = variant(self.styles);
families(self.styles).find_map(|family| {
// Allow hyphens to overhang a bit.
const INSET: f64 = 0.4;
let face_id = fonts.select(family, variant)?;
let face = fonts.get(face_id);
let ttf = face.ttf();
let glyph_id = ttf.glyph_index('-')?;
let x_advance = face.to_em(ttf.glyph_hor_advance(glyph_id)?);
self.size.x += INSET * x_advance.resolve(size);
self.glyphs.to_mut().push(ShapedGlyph {
face_id,
glyph_id: glyph_id.0,
x_advance,
x_offset: Em::zero(),
text_index: self.text.len(),
safe_to_break: true,
is_space: false,
});
Some(())
});
}
/// Find the subslice of glyphs that represent the given text range if both
/// sides are safe to break.
fn slice_safe_to_break(&self, text_range: Range<usize>) -> Option<&[ShapedGlyph]> {
@ -531,8 +559,9 @@ fn measure(
if glyphs.is_empty() {
// When there are no glyphs, we just use the vertical metrics of the
// first available font.
let variant = variant(styles);
for family in families(styles) {
if let Some(face_id) = fonts.select(family, variant(styles)) {
if let Some(face_id) = fonts.select(family, variant) {
expand(fonts.get(face_id));
break;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.9 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 11 KiB

After

Width:  |  Height:  |  Size: 11 KiB

View file

@ -0,0 +1,14 @@
// Test hyphenation.
---
#set page(width: 70pt)
#set par(lang: "en", hyphenate: true)
Warm welcomes to Typst.
#h(6pt) networks, the rest.
---
#set page(width: 60pt)
#set par(lang: "el", hyphenate: true)
διαμερίσματα. \
λατρευτός

View file

@ -1,9 +1,16 @@
---
#set par(indent: 14pt, spacing: 0pt, leading: 5pt, justify: true)
#set page(width: 180pt)
#set par(
lang: "en",
justify: true,
indent: 14pt,
spacing: 0pt,
leading: 5pt,
)
This text is justified, meaning that spaces are stretched so that the text
forms as "block" with flush edges at both sides.
forms a "block" with flush edges at both sides.
First line indents and hyphenation play nicely with justified text.