1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252 | 1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
74
57
37
21
23
1
30
30
10
5
1
1
1
1
1
1
1
1
1
74
57
37
30
23
21
16
1
1
1
1
1
1
9
9
9
48
48
48
48
48
6
11
4
1
48
9
1
8
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
11
11
11
11
11
11
1
1
10
8
8
2
2
2
10
10
10
5
11
11
5
5
5
5
5
6
6
6
6
1
1
5
1
4
4
4
4
4
2
1
1
4
4
1
1
1
1
1
1
1
1
1
1
| DefaultShaper = require './DefaultShaper'
GlyphInfo = require '../GlyphInfo'
#
# This is a shaper for the Hangul script, used by the Korean language.
# It does the following:
# - decompose if unsupported by the font:
# <LV> -> <L,V>
# <LVT> -> <L,V,T>
# <LV,T> -> <L,V,T>
#
# - compose if supported by the font:
# <L,V> -> <LV>
# <L,V,T> -> <LVT>
# <LV,T> -> <LVT>
#
# - reorder tone marks (S is any valid syllable):
# <S, M> -> <M, S>
#
# - apply ljmo, vjmo, and tjmo OpenType features to decomposed Jamo sequences.
#
# This logic is based on the following documents:
# - http://www.microsoft.com/typography/OpenTypeDev/hangul/intro.htm
# - http://ktug.org/~nomos/harfbuzz-hangul/hangulshaper.pdf
#
class HangulShaper extends DefaultShaper
HANGUL_BASE = 0xac00
HANGUL_END = 0xd7a4
HANGUL_COUNT = HANGUL_END - HANGUL_BASE + 1
L_BASE = 0x1100 # lead
V_BASE = 0x1161 # vowel
T_BASE = 0x11a7 # trail
L_COUNT = 19
V_COUNT = 21
T_COUNT = 28
L_END = L_BASE + L_COUNT - 1
V_END = V_BASE + V_COUNT - 1
T_END = T_BASE + T_COUNT - 1
DOTTED_CIRCLE = 0x25cc
isL = (code) -> 0x1100 <= code <= 0x115f or 0xa960 <= code <= 0xa97c
isV = (code) -> 0x1160 <= code <= 0x11a7 or 0xd7b0 <= code <= 0xd7c6
isT = (code) -> 0x11a8 <= code <= 0x11ff or 0xd7cb <= code <= 0xd7fb
isTone = (code) -> 0x302e <= code <= 0x302f
isLVT = (code) -> HANGUL_BASE <= code <= HANGUL_END
isLV = (c) ->
c -= HANGUL_BASE
return c < HANGUL_COUNT and c % T_COUNT is 0
isCombiningL = (code) -> L_BASE <= code <= L_END
isCombiningV = (code) -> V_BASE <= code <= V_END
isCombiningT = (code) -> T_BASE + 1 <= code <= T_END
# Character categories
X = 0 # Other character
L = 1 # Leading consonant
V = 2 # Medial vowel
T = 3 # Trailing consonant
LV = 4 # Composed <LV> syllable
LVT = 5 # Composed <LVT> syllable
M = 6 # Tone mark
# This function classifies a character using the above categories.
getType = (code) ->
return L if isL code
return V if isV code
return T if isT code
return LV if isLV code
return LVT if isLVT code
return M if isTone code
return X
# State machine actions
NO_ACTION = 0
DECOMPOSE = 1
COMPOSE = 2
TONE_MARK = 4
INVALID = 5
# Build a state machine that accepts valid syllables, and applies actions along the way.
# The logic this is implementing is documented at the top of the file.
STATE_TABLE = [
# X L V T LV LVT M
# State 0: start state
[ [ NO_ACTION, 0 ], [ NO_ACTION, 1 ], [ NO_ACTION, 0 ], [ NO_ACTION, 0 ], [ DECOMPOSE, 2 ], [ DECOMPOSE, 3 ], [ INVALID, 0 ] ]
# State 1: <L>
[ [ NO_ACTION, 0 ], [ NO_ACTION, 1 ], [ COMPOSE, 2 ], [ NO_ACTION, 0 ], [ DECOMPOSE, 2 ], [ DECOMPOSE, 3 ], [ INVALID, 0 ] ]
# State 2: <L,V> or <LV>
[ [ NO_ACTION, 0 ], [ NO_ACTION, 1 ], [ NO_ACTION, 0 ], [ COMPOSE, 3 ], [ DECOMPOSE, 2 ], [ DECOMPOSE, 3 ], [ TONE_MARK, 0 ] ]
# State 3: <L,V,T> or <LVT>
[ [ NO_ACTION, 0 ], [ NO_ACTION, 1 ], [ NO_ACTION, 0 ], [ NO_ACTION, 0 ], [ DECOMPOSE, 2 ], [ DECOMPOSE, 3 ], [ TONE_MARK, 0 ] ]
]
@assignFeatures: (glyphs, script, font) ->
state = 0
i = 0
while i < glyphs.length
glyph = glyphs[i]
code = glyph.codePoints[0]
type = getType code
[ action, state ] = STATE_TABLE[state][type]
switch action
when DECOMPOSE
# Decompose the composed syllable if it is not supported by the font.
Iunless font.hasGlyphForCodePoint code
i = decompose glyphs, i, font
when COMPOSE
# Found a decomposed syllable. Try to compose if supported by the font.
i = compose glyphs, i, font
when TONE_MARK
# Got a valid syllable, followed by a tone mark. Move the tone mark to the beginning of the syllable.
reorderToneMark glyphs, i, font
when INVALID
# Tone mark has no valid syllable to attach to, so insert a dotted circle
i = insertDottedCircle glyphs, i, font
i++
return ['ljmo', 'vjmo', 'tjmo']
getGlyph = (font, code, features) ->
return new GlyphInfo font.glyphForCodePoint(code).id, [code], Object.keys features
decompose = (glyphs, i, font) ->
glyph = glyphs[i]
code = glyph.codePoints[0]
s = code - HANGUL_BASE
t = T_BASE + s % T_COUNT
s = s / T_COUNT | 0
l = L_BASE + s / V_COUNT | 0
v = V_BASE + s % V_COUNT
# Don't decompose if all of the components are not available
Ireturn i unless font.hasGlyphForCodePoint(l) and
font.hasGlyphForCodePoint(v) and
(t is T_BASE or font.hasGlyphForCodePoint(t))
# Replace the current glyph with decomposed L, V, and T glyphs,
# and apply the proper OpenType features to each component.
ljmo = getGlyph font, l, glyph.features
ljmo.features.ljmo = true
vjmo = getGlyph font, v, glyph.features
vjmo.features.vjmo = true
insert = [ ljmo, vjmo ]
Iif t > T_BASE
tjmo = getGlyph font, t, glyph.features
tjmo.features.tjmo = true
insert.push tjmo
glyphs.splice i, 1, insert...
return i + insert.length - 1
compose = (glyphs, i, font) ->
glyph = glyphs[i]
code = glyphs[i].codePoints[0]
type = getType code
prev = glyphs[i - 1].codePoints[0]
prevType = getType prev
# Figure out what type of syllable we're dealing with
if prevType is LV and type is T
# <LV,T>
lv = prev
tjmo = glyph
else
if type is V
# <L,V>
ljmo = glyphs[i - 1]
vjmo = glyph
else
# <L,V,T>
ljmo = glyphs[i - 2]
vjmo = glyphs[i - 1]
tjmo = glyph
l = ljmo.codePoints[0]
v = vjmo.codePoints[0]
# Make sure L and V are combining characters
if isCombiningL(l) and isCombiningV(v)
lv = HANGUL_BASE + ((l - L_BASE) * V_COUNT + (v - V_BASE)) * T_COUNT
t = tjmo?.codePoints[0] or T_BASE
if lv? and (t is T_BASE or isCombiningT(t))
s = lv + (t - T_BASE)
# Replace with a composed glyph if supported by the font,
# otherwise apply the proper OpenType features to each component.
Eif font.hasGlyphForCodePoint s
del = Iif prevType is V then 3 else 2
glyphs.splice i - del + 1, del, getGlyph font, s, glyph.features
return i - del + 1
# Didn't compose (either a non-combining component or unsupported by font).
ljmo?.features.ljmo = true
vjmo?.features.vjmo = true
tjmo?.features.tjmo = true
if prevType is LV
# Sequence was originally <L,V>, which got combined earlier.
# Either the T was non-combining, or the LVT glyph wasn't supported.
# Decompose the glyph again and apply OT features.
decompose glyphs, i - 1, font
return i + 1
return i
reorderToneMark = (glyphs, i, font) ->
glyph = glyphs[i]
code = glyphs[i].codePoints[0]
# Move tone mark to the beginning of the previous syllable, unless it is zero width
Ireturn if font.glyphForCodePoint(code).advanceWidth is 0
prev = glyphs[i - 1].codePoints[0]
len = switch getType prev
when LV, LVT then 1
when V then 2
when T then 3
glyphs.splice i, 1
glyphs.splice i - len, 0, glyph
insertDottedCircle = (glyphs, i, font) ->
glyph = glyphs[i]
code = glyphs[i].codePoints[0]
Eif font.hasGlyphForCodePoint DOTTED_CIRCLE
dottedCircle = getGlyph font, DOTTED_CIRCLE, glyph.features
# If the tone mark is zero width, insert the dotted circle before, otherwise after
idx = Iif font.glyphForCodePoint(code).advanceWidth is 0 then i else i + 1
glyphs.splice idx, 0, dottedCircle
i++
return i
module.exports = HangulShaper
|