Skip to content

Commit

Permalink
Merge pull request #151 from harfbuzz/10.1.0
Browse files Browse the repository at this point in the history
Sync with 10.1.0
  • Loading branch information
alerque authored Nov 29, 2024
2 parents 8c52723 + 5136d88 commit 9473e49
Show file tree
Hide file tree
Showing 12 changed files with 521 additions and 604 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
`rustybuzz` is a complete [harfbuzz](https://github.com/harfbuzz/harfbuzz)'s
shaping algorithm port to Rust.

Matches `harfbuzz` v10.0.1, commit [c7ef6a2e](https://github.com/harfbuzz/harfbuzz/commit/c7ef6a2ed58ae8ec108ee0962bef46f42c73a60c) (one commit after v10.0.1)
Matches `harfbuzz` v10.1.0.

## Why?

Expand Down
5 changes: 4 additions & 1 deletion scripts/gen-shaping-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,9 +204,12 @@ def convert_test_folder(root_dir, hb_shape_exe, tests_dir, tests_name, custom):
def convert_test_files(root_dir, hb_shape_exe, tests_dir, tests_name, files, custom):
fonts = set()

macos_snippet = "#[cfg(target_os = \"macos\")]\n" if tests_name == "macos" else ""

rust_code = (
"// WARNING: this file was generated by ../scripts/gen-shaping-tests.py\n"
"\n"
"\n" +
macos_snippet +
"use crate::shape;\n"
"\n"
)
Expand Down
51 changes: 28 additions & 23 deletions scripts/gen-tag-table.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,49 +335,54 @@ def __init__(self):
self.from_bcp_47_uninherited = None
# Whether the parser is in a <td> element
self._td = False
# Whether the parser is after a <br> element within the current <tr> element
self._br = False
# Whether the parser ignores the rest of the current <td> element
self._disengaged = False
# The text of the <td> elements of the current <tr> element.
self._current_tr = []

def handle_starttag(self, tag, attrs):
if tag == 'br':
self._br = True
def handle_starttag (self, tag, attrs):
if tag == 'a':
if self._current_tr and not self._disengaged:
self._current_tr[-1] = ''
self._disengaged = True
elif tag == 'br':
self._disengaged = True
elif tag == 'meta':
for attr, value in attrs:
if attr == 'name' and value == 'updated_at':
self.header = self.get_starttag_text()
self.header = self.get_starttag_text ()
break
elif tag == 'td':
self._td = True
self._current_tr.append('')
self._current_tr.append ('')
elif tag == 'tr':
self._br = False
self._disengaged = False
self._current_tr = []

def handle_endtag(self, tag):
def handle_endtag (self, tag):
if tag == 'td':
self._td = False
self._disengaged = False
elif tag == 'tr' and self._current_tr:
expect(2 <= len(self._current_tr) <= 3)
name = self._current_tr[0].strip()
tag = self._current_tr[1].strip("\t\n\v\f\r '")
expect (2 <= len (self._current_tr) <= 3)
name = self._current_tr[0].strip ()
tag = self._current_tr[1].strip ("\t\n\v\f\r '")
rank = 0
if len(tag) > 4:
expect(tag.endswith('(deprecated)'), 'ill-formed OpenType tag: %s' % tag)
name += '(deprecated)'
tag = tag.split(' ')[0]
if len (tag) > 4:
expect (tag.endswith (' (deprecated)'), 'ill-formed OpenType tag: %s' % tag)
name += ' (deprecated)'
tag = tag.split (' ')[0]
rank = 1
self.names[tag] = re.sub(' languages$', '', name)
self.names[tag] = re.sub (' languages$', '', name)
if not self._current_tr[2]:
return
iso_codes = self._current_tr[2].strip()
self.to_bcp_47[tag].update(ISO_639_3_TO_1.get(code, code) for code in iso_codes.replace(' ', '').split(','))
rank += 2 * len(self.to_bcp_47[tag])
iso_codes = self._current_tr[2].strip ()
self.to_bcp_47[tag].update (ISO_639_3_TO_1.get (code, code) for code in iso_codes.replace (' ', '').split (','))
rank += 2 * len (self.to_bcp_47[tag])
self.ranks[tag] = rank

def handle_data(self, data):
if self._td and not self._br:
def handle_data (self, data):
if self._td and not self._disengaged:
self._current_tr[-1] += data

def handle_charref(self, name):
Expand Down Expand Up @@ -980,7 +985,7 @@ def same_tag(bcp_47_tag, ot_tags):
print('fn strncmp(s1: &str, s2: &str, n: usize) -> bool {')
print(' let n1 = core::cmp::min(n, s1.len());')
print(' let n2 = core::cmp::min(n, s2.len());')
print(' &s1[..n1] == &s2[..n2]')
print(' s1[..n1] == s2[..n2]')
print('}')
print()
print('/// Converts a multi-subtag BCP 47 language tag to language tags.')
Expand Down
9 changes: 5 additions & 4 deletions scripts/ms-use/IndicPositionalCategory-Additional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ A9BE ; Right # Mc JAVANESE CONSONANT SIGN PENGKAL # Reduced from
AA35   ; Top # Mn       CHAM CONSONANT SIGN
1112A..1112B ; Top # Mn [2] CHAKMA VOWEL SIGN U..CHAKMA VOWEL SIGN UU # see USE issue #25
11131..11132 ; Top # Mn [2] CHAKMA O MARK..CHAKMA AU MARK # see USE issue #25
1E4EC..1E4EF ; Top # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH # 1E4EE is below, but made to for ccc
1E4EC..1E4EF ; Top # Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH # 1E4EE is below, but made to for ccc

# ================================================

Expand Down Expand Up @@ -77,11 +77,12 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
10F83 ; Bottom # Mn OLD UYGHUR COMBINING DOT BELOW
10F84 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS ABOVE # Overriden, ccc controls order
10F85 ; Bottom # Mn OLD UYGHUR COMBINING TWO DOTS BELOW
113CF ; Bottom # Mc TULU-TIGALARI SIGN LOOPED VIRAMA # Issue #17
16F4F ; Bottom # Mn MIAO SIGN CONSONANT MODIFIER BAR
16F51..16F87 ; Bottom # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI
16F8F..16F92 ; Bottom # Mn [4] MIAO TONE RIGHT..MIAO TONE BELOW
1E5EE ; Bottom # Mn OL ONAL SIGN MU # Not really below, but need to override to fit into Universal model
1E5EF ; Bottom # Mn OL ONAL SIGN IKIR
1E5EE ; Bottom # Mn OL ONAL SIGN MU # Not really below, but need to override to fit into Universal model
1E5EF ; Bottom # Mn OL ONAL SIGN IKIR

# ================================================

Expand All @@ -94,7 +95,7 @@ AA35   ; Top # Mn       CHAM CONSONANT SIGN
07EB..07F3 ; Top # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE
07FD ; Top # Mn NKO DANTAYALAN # Not really top, but assigned here to allow ccc to control mark order
1885..1886 ; Top # Mn [2] MONGOLIAN LETTER ALI GALI BALUDA..MONGOLIAN LETTER ALI GALI THREE BALUDA
1B6C ; Top # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP
1B6C ; Top # Mn BALINESE MUSICAL SYMBOL COMBINING ENDEP
1CF8..1CF9 ; Top # Mn [2] VEDIC TONE RING ABOVE..VEDIC TONE DOUBLE RING ABOVE
10D24..10D27 ; Top # Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
10EAB..10EAC ; Top # Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
Expand Down
9 changes: 4 additions & 5 deletions scripts/ms-use/IndicSyllabicCategory-Additional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
# OVERRIDES TO ASSIGNED VALUES
# ================================================

# Indic_Syllabic_Category=Bindu
# Indic_Syllabic_Category=Bindu
193A ; Bindu # Mn LIMBU SIGN KEMPHRENG
AA29 ; Bindu # Mn CHAM VOWEL SIGN AA
10A0D ; Bindu # Mn KHAROSHTHI SIGN DOUBLE RING BELOW
Expand Down Expand Up @@ -47,14 +47,13 @@ AA29 ; Bindu # Mn CHAM VOWEL SIGN AA

# ================================================

# Indic_Syllabic_Category=Gemination_Mark
# Indic_Syllabic_Category=Gemination_Mark
11134 ; Gemination_Mark # Mc CHAKMA MAAYYAA

# ================================================

# Indic_Syllabic_Category=Nukta
# Indic_Syllabic_Category=Nukta
0F71 ; Nukta # Mn TIBETAN VOWEL SIGN AA # Reassigned to get this before an above vowel, but see #22
1BF2..1BF3 ; Nukta # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN # see USE issue #20
113CF ; Nukta # Mc TULU-TIGALARI SIGN LOOPED VIRAMA

# ================================================
Expand Down Expand Up @@ -115,7 +114,7 @@ AABD ; Vowel_Independent # Lo TAI VIET VOWEL AN
18B00..18CD5 ; Consonant # Lo [470] KHITAN SMALL SCRIPT CHARACTER-18B00..KHITAN SMALL SCRIPT CHARACTER-18CD5
18CFF ; Consonant # Lo KHITAN SMALL SCRIPT CHARACTER-18CFF
1BC00..1BC6A ; Consonant # Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC70..1BC7C ; Consonant # Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
1BC80..1BC88 ; Consonant # Lo [9] DUPLOYAN AFFIX HIGH ACUTE..DUPLOYAN AFFIX HIGH VERTICAL
1BC90..1BC99 ; Consonant # Lo [10] DUPLOYAN AFFIX LOW ACUTE..DUPLOYAN AFFIX LOW ARROW
1E100..1E12C ; Consonant # Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
Expand Down
3 changes: 1 addition & 2 deletions src/hb/aat_layout_kerx_table.rs
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,7 @@ fn apply_state_machine_kerning<T, E>(
// go differently if we start from state 0 here.
if state != START_OF_TEXT && buffer.backtrack_len() != 0 && buffer.idx < buffer.len {
// If there's no value and we're just epsilon-transitioning to state 0, safe to break.
if entry.is_actionable() || entry.new_state != START_OF_TEXT || entry.has_advance()
{
if entry.is_actionable() || entry.new_state != START_OF_TEXT || entry.has_advance() {
buffer.unsafe_to_break_from_outbuffer(
Some(buffer.backtrack_len() - 1),
Some(buffer.idx + 1),
Expand Down
1 change: 1 addition & 0 deletions src/hb/ot_shaper_use.rs
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ pub mod category {
pub const HVM: u8 = 53; // HIEROGLYPH_SEGMENT_END
pub const HM: u8 = 54; // HIEROGLYPH_MOD
pub const HR: u8 = 55; // HIEROGLYPH_MIRROR
pub const RK: u8 = 56; // REORDERING_KILLER
}

// These features are applied all at once, before reordering,
Expand Down
3 changes: 2 additions & 1 deletion src/hb/ot_shaper_use_machine.rl
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ SE = 52; # HIEROGLYPH_SEGMENT_END
HVM = 53; # HALANT_OR_VOWEL_MODIFIER
HM = 54; # HIEROGLYPH_MOD
HR = 55; # HIEROGLYPH_MIRROR
RK = 56; # REORDERING_KILLER

FAbv = 24; # CONS_FINAL_ABOVE
FBlw = 25; # CONS_FINAL_BELOW
Expand Down Expand Up @@ -107,7 +108,7 @@ symbol_cluster_tail = SMAbv+ SMBlw* | SMBlw+;

virama_terminated_cluster_tail =
consonant_modifiers
IS
(IS | RK)
;
virama_terminated_cluster =
complex_syllable_start
Expand Down
Loading

0 comments on commit 9473e49

Please sign in to comment.