2022-08-02 06:19:38 +00:00
|
|
|
package uniseg
|
|
|
|
|
|
|
|
import "unicode/utf8"
|
|
|
|
|
|
|
|
// The states of the line break parser.
|
|
|
|
const (
|
|
|
|
lbAny = iota
|
|
|
|
lbBK
|
|
|
|
lbCR
|
|
|
|
lbLF
|
|
|
|
lbNL
|
|
|
|
lbSP
|
|
|
|
lbZW
|
|
|
|
lbWJ
|
|
|
|
lbGL
|
|
|
|
lbBA
|
|
|
|
lbHY
|
|
|
|
lbCL
|
|
|
|
lbCP
|
|
|
|
lbEX
|
|
|
|
lbIS
|
|
|
|
lbSY
|
|
|
|
lbOP
|
|
|
|
lbQU
|
|
|
|
lbQUSP
|
|
|
|
lbNS
|
|
|
|
lbCLCPSP
|
|
|
|
lbB2
|
|
|
|
lbB2SP
|
|
|
|
lbCB
|
|
|
|
lbBB
|
|
|
|
lbLB21a
|
|
|
|
lbHL
|
|
|
|
lbAL
|
|
|
|
lbNU
|
|
|
|
lbPR
|
|
|
|
lbEB
|
|
|
|
lbIDEM
|
|
|
|
lbNUNU
|
|
|
|
lbNUSY
|
|
|
|
lbNUIS
|
|
|
|
lbNUCL
|
|
|
|
lbNUCP
|
|
|
|
lbPO
|
|
|
|
lbJL
|
|
|
|
lbJV
|
|
|
|
lbJT
|
|
|
|
lbH2
|
|
|
|
lbH3
|
|
|
|
lbOddRI
|
|
|
|
lbEvenRI
|
|
|
|
lbExtPicCn
|
|
|
|
lbZWJBit = 64
|
|
|
|
lbCPeaFWHBit = 128
|
|
|
|
)
|
|
|
|
|
|
|
|
// These constants define whether a given text may be broken into the next line.
|
|
|
|
// If the break is optional (LineCanBreak), you may choose to break or not based
|
|
|
|
// on your own criteria, for example, if the text has reached the available
|
|
|
|
// width.
|
|
|
|
const (
|
|
|
|
LineDontBreak = iota // You may not break the line here.
|
|
|
|
LineCanBreak // You may or may not break the line here.
|
|
|
|
LineMustBreak // You must break the line here.
|
|
|
|
)
|
|
|
|
|
2024-01-30 16:47:01 +00:00
|
|
|
// lbTransitions implements the line break parser's state transitions. It's
|
|
|
|
// anologous to [grTransitions], see comments there for details.
|
|
|
|
//
|
|
|
|
// Unicode version 15.0.0.
|
|
|
|
func lbTransitions(state, prop int) (newState, lineBreak, rule int) {
|
|
|
|
switch uint64(state) | uint64(prop)<<32 {
|
2022-08-02 06:19:38 +00:00
|
|
|
// LB4.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbBK | prAny<<32:
|
|
|
|
return lbAny, LineMustBreak, 40
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB5.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbCR | prLF<<32:
|
|
|
|
return lbLF, LineDontBreak, 50
|
|
|
|
case lbCR | prAny<<32:
|
|
|
|
return lbAny, LineMustBreak, 50
|
|
|
|
case lbLF | prAny<<32:
|
|
|
|
return lbAny, LineMustBreak, 50
|
|
|
|
case lbNL | prAny<<32:
|
|
|
|
return lbAny, LineMustBreak, 50
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB6.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prBK<<32:
|
|
|
|
return lbBK, LineDontBreak, 60
|
|
|
|
case lbAny | prCR<<32:
|
|
|
|
return lbCR, LineDontBreak, 60
|
|
|
|
case lbAny | prLF<<32:
|
|
|
|
return lbLF, LineDontBreak, 60
|
|
|
|
case lbAny | prNL<<32:
|
|
|
|
return lbNL, LineDontBreak, 60
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB7.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prSP<<32:
|
|
|
|
return lbSP, LineDontBreak, 70
|
|
|
|
case lbAny | prZW<<32:
|
|
|
|
return lbZW, LineDontBreak, 70
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB8.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbZW | prSP<<32:
|
|
|
|
return lbZW, LineDontBreak, 70
|
|
|
|
case lbZW | prAny<<32:
|
|
|
|
return lbAny, LineCanBreak, 80
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB11.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prWJ<<32:
|
|
|
|
return lbWJ, LineDontBreak, 110
|
|
|
|
case lbWJ | prAny<<32:
|
|
|
|
return lbAny, LineDontBreak, 110
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB12.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prGL<<32:
|
|
|
|
return lbGL, LineCanBreak, 310
|
|
|
|
case lbGL | prAny<<32:
|
|
|
|
return lbAny, LineDontBreak, 120
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB13 (simple transitions).
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prCL<<32:
|
|
|
|
return lbCL, LineCanBreak, 310
|
|
|
|
case lbAny | prCP<<32:
|
|
|
|
return lbCP, LineCanBreak, 310
|
|
|
|
case lbAny | prEX<<32:
|
|
|
|
return lbEX, LineDontBreak, 130
|
|
|
|
case lbAny | prIS<<32:
|
|
|
|
return lbIS, LineCanBreak, 310
|
|
|
|
case lbAny | prSY<<32:
|
|
|
|
return lbSY, LineCanBreak, 310
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB14.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prOP<<32:
|
|
|
|
return lbOP, LineCanBreak, 310
|
|
|
|
case lbOP | prSP<<32:
|
|
|
|
return lbOP, LineDontBreak, 70
|
|
|
|
case lbOP | prAny<<32:
|
|
|
|
return lbAny, LineDontBreak, 140
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB15.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbQU | prSP<<32:
|
|
|
|
return lbQUSP, LineDontBreak, 70
|
|
|
|
case lbQU | prOP<<32:
|
|
|
|
return lbOP, LineDontBreak, 150
|
|
|
|
case lbQUSP | prOP<<32:
|
|
|
|
return lbOP, LineDontBreak, 150
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB16.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbCL | prSP<<32:
|
|
|
|
return lbCLCPSP, LineDontBreak, 70
|
|
|
|
case lbNUCL | prSP<<32:
|
|
|
|
return lbCLCPSP, LineDontBreak, 70
|
|
|
|
case lbCP | prSP<<32:
|
|
|
|
return lbCLCPSP, LineDontBreak, 70
|
|
|
|
case lbNUCP | prSP<<32:
|
|
|
|
return lbCLCPSP, LineDontBreak, 70
|
|
|
|
case lbCL | prNS<<32:
|
|
|
|
return lbNS, LineDontBreak, 160
|
|
|
|
case lbNUCL | prNS<<32:
|
|
|
|
return lbNS, LineDontBreak, 160
|
|
|
|
case lbCP | prNS<<32:
|
|
|
|
return lbNS, LineDontBreak, 160
|
|
|
|
case lbNUCP | prNS<<32:
|
|
|
|
return lbNS, LineDontBreak, 160
|
|
|
|
case lbCLCPSP | prNS<<32:
|
|
|
|
return lbNS, LineDontBreak, 160
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB17.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prB2<<32:
|
|
|
|
return lbB2, LineCanBreak, 310
|
|
|
|
case lbB2 | prSP<<32:
|
|
|
|
return lbB2SP, LineDontBreak, 70
|
|
|
|
case lbB2 | prB2<<32:
|
|
|
|
return lbB2, LineDontBreak, 170
|
|
|
|
case lbB2SP | prB2<<32:
|
|
|
|
return lbB2, LineDontBreak, 170
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB18.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbSP | prAny<<32:
|
|
|
|
return lbAny, LineCanBreak, 180
|
|
|
|
case lbQUSP | prAny<<32:
|
|
|
|
return lbAny, LineCanBreak, 180
|
|
|
|
case lbCLCPSP | prAny<<32:
|
|
|
|
return lbAny, LineCanBreak, 180
|
|
|
|
case lbB2SP | prAny<<32:
|
|
|
|
return lbAny, LineCanBreak, 180
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB19.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prQU<<32:
|
|
|
|
return lbQU, LineDontBreak, 190
|
|
|
|
case lbQU | prAny<<32:
|
|
|
|
return lbAny, LineDontBreak, 190
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB20.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prCB<<32:
|
|
|
|
return lbCB, LineCanBreak, 200
|
|
|
|
case lbCB | prAny<<32:
|
|
|
|
return lbAny, LineCanBreak, 200
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB21.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prBA<<32:
|
|
|
|
return lbBA, LineDontBreak, 210
|
|
|
|
case lbAny | prHY<<32:
|
|
|
|
return lbHY, LineDontBreak, 210
|
|
|
|
case lbAny | prNS<<32:
|
|
|
|
return lbNS, LineDontBreak, 210
|
|
|
|
case lbAny | prBB<<32:
|
|
|
|
return lbBB, LineCanBreak, 310
|
|
|
|
case lbBB | prAny<<32:
|
|
|
|
return lbAny, LineDontBreak, 210
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB21a.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prHL<<32:
|
|
|
|
return lbHL, LineCanBreak, 310
|
|
|
|
case lbHL | prHY<<32:
|
|
|
|
return lbLB21a, LineDontBreak, 210
|
|
|
|
case lbHL | prBA<<32:
|
|
|
|
return lbLB21a, LineDontBreak, 210
|
|
|
|
case lbLB21a | prAny<<32:
|
|
|
|
return lbAny, LineDontBreak, 211
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB21b.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbSY | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 212
|
|
|
|
case lbNUSY | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 212
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB22.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prIN<<32:
|
|
|
|
return lbAny, LineDontBreak, 220
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB23.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prAL<<32:
|
|
|
|
return lbAL, LineCanBreak, 310
|
|
|
|
case lbAny | prNU<<32:
|
|
|
|
return lbNU, LineCanBreak, 310
|
|
|
|
case lbAL | prNU<<32:
|
|
|
|
return lbNU, LineDontBreak, 230
|
|
|
|
case lbHL | prNU<<32:
|
|
|
|
return lbNU, LineDontBreak, 230
|
|
|
|
case lbNU | prAL<<32:
|
|
|
|
return lbAL, LineDontBreak, 230
|
|
|
|
case lbNU | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 230
|
|
|
|
case lbNUNU | prAL<<32:
|
|
|
|
return lbAL, LineDontBreak, 230
|
|
|
|
case lbNUNU | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 230
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB23a.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prPR<<32:
|
|
|
|
return lbPR, LineCanBreak, 310
|
|
|
|
case lbAny | prID<<32:
|
|
|
|
return lbIDEM, LineCanBreak, 310
|
|
|
|
case lbAny | prEB<<32:
|
|
|
|
return lbEB, LineCanBreak, 310
|
|
|
|
case lbAny | prEM<<32:
|
|
|
|
return lbIDEM, LineCanBreak, 310
|
|
|
|
case lbPR | prID<<32:
|
|
|
|
return lbIDEM, LineDontBreak, 231
|
|
|
|
case lbPR | prEB<<32:
|
|
|
|
return lbEB, LineDontBreak, 231
|
|
|
|
case lbPR | prEM<<32:
|
|
|
|
return lbIDEM, LineDontBreak, 231
|
|
|
|
case lbIDEM | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 231
|
|
|
|
case lbEB | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 231
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB24.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prPO<<32:
|
|
|
|
return lbPO, LineCanBreak, 310
|
|
|
|
case lbPR | prAL<<32:
|
|
|
|
return lbAL, LineDontBreak, 240
|
|
|
|
case lbPR | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 240
|
|
|
|
case lbPO | prAL<<32:
|
|
|
|
return lbAL, LineDontBreak, 240
|
|
|
|
case lbPO | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 240
|
|
|
|
case lbAL | prPR<<32:
|
|
|
|
return lbPR, LineDontBreak, 240
|
|
|
|
case lbAL | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 240
|
|
|
|
case lbHL | prPR<<32:
|
|
|
|
return lbPR, LineDontBreak, 240
|
|
|
|
case lbHL | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 240
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB25 (simple transitions).
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbPR | prNU<<32:
|
|
|
|
return lbNU, LineDontBreak, 250
|
|
|
|
case lbPO | prNU<<32:
|
|
|
|
return lbNU, LineDontBreak, 250
|
|
|
|
case lbOP | prNU<<32:
|
|
|
|
return lbNU, LineDontBreak, 250
|
|
|
|
case lbHY | prNU<<32:
|
|
|
|
return lbNU, LineDontBreak, 250
|
|
|
|
case lbNU | prNU<<32:
|
|
|
|
return lbNUNU, LineDontBreak, 250
|
|
|
|
case lbNU | prSY<<32:
|
|
|
|
return lbNUSY, LineDontBreak, 250
|
|
|
|
case lbNU | prIS<<32:
|
|
|
|
return lbNUIS, LineDontBreak, 250
|
|
|
|
case lbNUNU | prNU<<32:
|
|
|
|
return lbNUNU, LineDontBreak, 250
|
|
|
|
case lbNUNU | prSY<<32:
|
|
|
|
return lbNUSY, LineDontBreak, 250
|
|
|
|
case lbNUNU | prIS<<32:
|
|
|
|
return lbNUIS, LineDontBreak, 250
|
|
|
|
case lbNUSY | prNU<<32:
|
|
|
|
return lbNUNU, LineDontBreak, 250
|
|
|
|
case lbNUSY | prSY<<32:
|
|
|
|
return lbNUSY, LineDontBreak, 250
|
|
|
|
case lbNUSY | prIS<<32:
|
|
|
|
return lbNUIS, LineDontBreak, 250
|
|
|
|
case lbNUIS | prNU<<32:
|
|
|
|
return lbNUNU, LineDontBreak, 250
|
|
|
|
case lbNUIS | prSY<<32:
|
|
|
|
return lbNUSY, LineDontBreak, 250
|
|
|
|
case lbNUIS | prIS<<32:
|
|
|
|
return lbNUIS, LineDontBreak, 250
|
|
|
|
case lbNU | prCL<<32:
|
|
|
|
return lbNUCL, LineDontBreak, 250
|
|
|
|
case lbNU | prCP<<32:
|
|
|
|
return lbNUCP, LineDontBreak, 250
|
|
|
|
case lbNUNU | prCL<<32:
|
|
|
|
return lbNUCL, LineDontBreak, 250
|
|
|
|
case lbNUNU | prCP<<32:
|
|
|
|
return lbNUCP, LineDontBreak, 250
|
|
|
|
case lbNUSY | prCL<<32:
|
|
|
|
return lbNUCL, LineDontBreak, 250
|
|
|
|
case lbNUSY | prCP<<32:
|
|
|
|
return lbNUCP, LineDontBreak, 250
|
|
|
|
case lbNUIS | prCL<<32:
|
|
|
|
return lbNUCL, LineDontBreak, 250
|
|
|
|
case lbNUIS | prCP<<32:
|
|
|
|
return lbNUCP, LineDontBreak, 250
|
|
|
|
case lbNU | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 250
|
|
|
|
case lbNUNU | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 250
|
|
|
|
case lbNUSY | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 250
|
|
|
|
case lbNUIS | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 250
|
|
|
|
case lbNUCL | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 250
|
|
|
|
case lbNUCP | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 250
|
|
|
|
case lbNU | prPR<<32:
|
|
|
|
return lbPR, LineDontBreak, 250
|
|
|
|
case lbNUNU | prPR<<32:
|
|
|
|
return lbPR, LineDontBreak, 250
|
|
|
|
case lbNUSY | prPR<<32:
|
|
|
|
return lbPR, LineDontBreak, 250
|
|
|
|
case lbNUIS | prPR<<32:
|
|
|
|
return lbPR, LineDontBreak, 250
|
|
|
|
case lbNUCL | prPR<<32:
|
|
|
|
return lbPR, LineDontBreak, 250
|
|
|
|
case lbNUCP | prPR<<32:
|
|
|
|
return lbPR, LineDontBreak, 250
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB26.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAny | prJL<<32:
|
|
|
|
return lbJL, LineCanBreak, 310
|
|
|
|
case lbAny | prJV<<32:
|
|
|
|
return lbJV, LineCanBreak, 310
|
|
|
|
case lbAny | prJT<<32:
|
|
|
|
return lbJT, LineCanBreak, 310
|
|
|
|
case lbAny | prH2<<32:
|
|
|
|
return lbH2, LineCanBreak, 310
|
|
|
|
case lbAny | prH3<<32:
|
|
|
|
return lbH3, LineCanBreak, 310
|
|
|
|
case lbJL | prJL<<32:
|
|
|
|
return lbJL, LineDontBreak, 260
|
|
|
|
case lbJL | prJV<<32:
|
|
|
|
return lbJV, LineDontBreak, 260
|
|
|
|
case lbJL | prH2<<32:
|
|
|
|
return lbH2, LineDontBreak, 260
|
|
|
|
case lbJL | prH3<<32:
|
|
|
|
return lbH3, LineDontBreak, 260
|
|
|
|
case lbJV | prJV<<32:
|
|
|
|
return lbJV, LineDontBreak, 260
|
|
|
|
case lbJV | prJT<<32:
|
|
|
|
return lbJT, LineDontBreak, 260
|
|
|
|
case lbH2 | prJV<<32:
|
|
|
|
return lbJV, LineDontBreak, 260
|
|
|
|
case lbH2 | prJT<<32:
|
|
|
|
return lbJT, LineDontBreak, 260
|
|
|
|
case lbJT | prJT<<32:
|
|
|
|
return lbJT, LineDontBreak, 260
|
|
|
|
case lbH3 | prJT<<32:
|
|
|
|
return lbJT, LineDontBreak, 260
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB27.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbJL | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 270
|
|
|
|
case lbJV | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 270
|
|
|
|
case lbJT | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 270
|
|
|
|
case lbH2 | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 270
|
|
|
|
case lbH3 | prPO<<32:
|
|
|
|
return lbPO, LineDontBreak, 270
|
|
|
|
case lbPR | prJL<<32:
|
|
|
|
return lbJL, LineDontBreak, 270
|
|
|
|
case lbPR | prJV<<32:
|
|
|
|
return lbJV, LineDontBreak, 270
|
|
|
|
case lbPR | prJT<<32:
|
|
|
|
return lbJT, LineDontBreak, 270
|
|
|
|
case lbPR | prH2<<32:
|
|
|
|
return lbH2, LineDontBreak, 270
|
|
|
|
case lbPR | prH3<<32:
|
|
|
|
return lbH3, LineDontBreak, 270
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB28.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbAL | prAL<<32:
|
|
|
|
return lbAL, LineDontBreak, 280
|
|
|
|
case lbAL | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 280
|
|
|
|
case lbHL | prAL<<32:
|
|
|
|
return lbAL, LineDontBreak, 280
|
|
|
|
case lbHL | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 280
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// LB29.
|
2024-01-30 16:47:01 +00:00
|
|
|
case lbIS | prAL<<32:
|
|
|
|
return lbAL, LineDontBreak, 290
|
|
|
|
case lbIS | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 290
|
|
|
|
case lbNUIS | prAL<<32:
|
|
|
|
return lbAL, LineDontBreak, 290
|
|
|
|
case lbNUIS | prHL<<32:
|
|
|
|
return lbHL, LineDontBreak, 290
|
|
|
|
|
|
|
|
default:
|
|
|
|
return -1, -1, -1
|
|
|
|
}
|
2022-08-02 06:19:38 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// transitionLineBreakState determines the new state of the line break parser
|
|
|
|
// given the current state and the next code point. It also returns the type of
|
|
|
|
// line break: LineDontBreak, LineCanBreak, or LineMustBreak. If more than one
|
|
|
|
// code point is needed to determine the new state, the byte slice or the string
|
|
|
|
// starting after rune "r" can be used (whichever is not nil or empty) for
|
|
|
|
// further lookups.
|
|
|
|
func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) {
|
|
|
|
// Determine the property of the next character.
|
2024-01-30 16:47:01 +00:00
|
|
|
nextProperty, generalCategory := propertyLineBreak(r)
|
2022-08-02 06:19:38 +00:00
|
|
|
|
|
|
|
// Prepare.
|
|
|
|
var forceNoBreak, isCPeaFWH bool
|
|
|
|
if state >= 0 && state&lbCPeaFWHBit != 0 {
|
|
|
|
isCPeaFWH = true // LB30: CP but ea is not F, W, or H.
|
|
|
|
state = state &^ lbCPeaFWHBit
|
|
|
|
}
|
|
|
|
if state >= 0 && state&lbZWJBit != 0 {
|
|
|
|
state = state &^ lbZWJBit // Extract zero-width joiner bit.
|
|
|
|
forceNoBreak = true // LB8a.
|
|
|
|
}
|
|
|
|
|
|
|
|
defer func() {
|
|
|
|
// Transition into LB30.
|
|
|
|
if newState == lbCP || newState == lbNUCP {
|
2024-01-30 16:47:01 +00:00
|
|
|
ea := propertyEastAsianWidth(r)
|
2022-08-02 06:19:38 +00:00
|
|
|
if ea != prF && ea != prW && ea != prH {
|
|
|
|
newState |= lbCPeaFWHBit
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Override break.
|
|
|
|
if forceNoBreak {
|
|
|
|
lineBreak = LineDontBreak
|
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
// LB1.
|
|
|
|
if nextProperty == prAI || nextProperty == prSG || nextProperty == prXX {
|
|
|
|
nextProperty = prAL
|
|
|
|
} else if nextProperty == prSA {
|
|
|
|
if generalCategory == gcMn || generalCategory == gcMc {
|
|
|
|
nextProperty = prCM
|
|
|
|
} else {
|
|
|
|
nextProperty = prAL
|
|
|
|
}
|
|
|
|
} else if nextProperty == prCJ {
|
|
|
|
nextProperty = prNS
|
|
|
|
}
|
|
|
|
|
|
|
|
// Combining marks.
|
|
|
|
if nextProperty == prZWJ || nextProperty == prCM {
|
|
|
|
var bit int
|
|
|
|
if nextProperty == prZWJ {
|
|
|
|
bit = lbZWJBit
|
|
|
|
}
|
|
|
|
mustBreakState := state < 0 || state == lbBK || state == lbCR || state == lbLF || state == lbNL
|
|
|
|
if !mustBreakState && state != lbSP && state != lbZW && state != lbQUSP && state != lbCLCPSP && state != lbB2SP {
|
|
|
|
// LB9.
|
|
|
|
return state | bit, LineDontBreak
|
|
|
|
} else {
|
|
|
|
// LB10.
|
|
|
|
if mustBreakState {
|
|
|
|
return lbAL | bit, LineMustBreak
|
|
|
|
}
|
|
|
|
return lbAL | bit, LineCanBreak
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Find the applicable transition in the table.
|
|
|
|
var rule int
|
2024-01-30 16:47:01 +00:00
|
|
|
newState, lineBreak, rule = lbTransitions(state, nextProperty)
|
|
|
|
if newState < 0 {
|
2022-08-02 06:19:38 +00:00
|
|
|
// No specific transition found. Try the less specific ones.
|
2024-01-30 16:47:01 +00:00
|
|
|
anyPropProp, anyPropLineBreak, anyPropRule := lbTransitions(state, prAny)
|
|
|
|
anyStateProp, anyStateLineBreak, anyStateRule := lbTransitions(lbAny, nextProperty)
|
|
|
|
if anyPropProp >= 0 && anyStateProp >= 0 {
|
2022-08-02 06:19:38 +00:00
|
|
|
// Both apply. We'll use a mix (see comments for grTransitions).
|
2024-01-30 16:47:01 +00:00
|
|
|
newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
|
|
|
|
if anyPropRule < anyStateRule {
|
|
|
|
lineBreak, rule = anyPropLineBreak, anyPropRule
|
2022-08-02 06:19:38 +00:00
|
|
|
}
|
2024-01-30 16:47:01 +00:00
|
|
|
} else if anyPropProp >= 0 {
|
2022-08-02 06:19:38 +00:00
|
|
|
// We only have a specific state.
|
2024-01-30 16:47:01 +00:00
|
|
|
newState, lineBreak, rule = anyPropProp, anyPropLineBreak, anyPropRule
|
2022-08-02 06:19:38 +00:00
|
|
|
// This branch will probably never be reached because okAnyState will
|
|
|
|
// always be true given the current transition map. But we keep it here
|
|
|
|
// for future modifications to the transition map where this may not be
|
|
|
|
// true anymore.
|
2024-01-30 16:47:01 +00:00
|
|
|
} else if anyStateProp >= 0 {
|
2022-08-02 06:19:38 +00:00
|
|
|
// We only have a specific property.
|
2024-01-30 16:47:01 +00:00
|
|
|
newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
|
2022-08-02 06:19:38 +00:00
|
|
|
} else {
|
|
|
|
// No known transition. LB31: ALL ÷ ALL.
|
|
|
|
newState, lineBreak, rule = lbAny, LineCanBreak, 310
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// LB12a.
|
|
|
|
if rule > 121 &&
|
|
|
|
nextProperty == prGL &&
|
|
|
|
(state != lbSP && state != lbBA && state != lbHY && state != lbLB21a && state != lbQUSP && state != lbCLCPSP && state != lbB2SP) {
|
|
|
|
return lbGL, LineDontBreak
|
|
|
|
}
|
|
|
|
|
|
|
|
// LB13.
|
|
|
|
if rule > 130 && state != lbNU && state != lbNUNU {
|
|
|
|
switch nextProperty {
|
|
|
|
case prCL:
|
|
|
|
return lbCL, LineDontBreak
|
|
|
|
case prCP:
|
|
|
|
return lbCP, LineDontBreak
|
|
|
|
case prIS:
|
|
|
|
return lbIS, LineDontBreak
|
|
|
|
case prSY:
|
|
|
|
return lbSY, LineDontBreak
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// LB25 (look ahead).
|
|
|
|
if rule > 250 &&
|
|
|
|
(state == lbPR || state == lbPO) &&
|
|
|
|
nextProperty == prOP || nextProperty == prHY {
|
|
|
|
var r rune
|
|
|
|
if b != nil { // Byte slice version.
|
|
|
|
r, _ = utf8.DecodeRune(b)
|
|
|
|
} else { // String version.
|
|
|
|
r, _ = utf8.DecodeRuneInString(str)
|
|
|
|
}
|
|
|
|
if r != utf8.RuneError {
|
2024-01-30 16:47:01 +00:00
|
|
|
pr, _ := propertyLineBreak(r)
|
2022-08-02 06:19:38 +00:00
|
|
|
if pr == prNU {
|
|
|
|
return lbNU, LineDontBreak
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// LB30 (part one).
|
|
|
|
if rule > 300 {
|
|
|
|
if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP {
|
2024-01-30 16:47:01 +00:00
|
|
|
ea := propertyEastAsianWidth(r)
|
2022-08-02 06:19:38 +00:00
|
|
|
if ea != prF && ea != prW && ea != prH {
|
|
|
|
return lbOP, LineDontBreak
|
|
|
|
}
|
|
|
|
} else if isCPeaFWH {
|
|
|
|
switch nextProperty {
|
|
|
|
case prAL:
|
|
|
|
return lbAL, LineDontBreak
|
|
|
|
case prHL:
|
|
|
|
return lbHL, LineDontBreak
|
|
|
|
case prNU:
|
|
|
|
return lbNU, LineDontBreak
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// LB30a.
|
|
|
|
if newState == lbAny && nextProperty == prRI {
|
|
|
|
if state != lbOddRI && state != lbEvenRI { // Includes state == -1.
|
|
|
|
// Transition into the first RI.
|
|
|
|
return lbOddRI, lineBreak
|
|
|
|
}
|
|
|
|
if state == lbOddRI {
|
|
|
|
// Don't break pairs of Regional Indicators.
|
|
|
|
return lbEvenRI, LineDontBreak
|
|
|
|
}
|
|
|
|
return lbOddRI, lineBreak
|
|
|
|
}
|
|
|
|
|
|
|
|
// LB30b.
|
|
|
|
if rule > 302 {
|
|
|
|
if nextProperty == prEM {
|
|
|
|
if state == lbEB || state == lbExtPicCn {
|
|
|
|
return prAny, LineDontBreak
|
|
|
|
}
|
|
|
|
}
|
2024-01-30 16:47:01 +00:00
|
|
|
graphemeProperty := propertyGraphemes(r)
|
2022-08-02 06:19:38 +00:00
|
|
|
if graphemeProperty == prExtendedPictographic && generalCategory == gcCn {
|
|
|
|
return lbExtPicCn, LineCanBreak
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|