VictoriaMetrics/vendor/github.com/rivo/uniseg/graphemerules.go

139 lines
4.4 KiB
Go
Raw Normal View History

2022-08-02 06:19:38 +00:00
package uniseg
// The states of the grapheme cluster parser.
const (
grAny = iota
grCR
grControlLF
grL
grLVV
grLVTT
grPrepend
grExtendedPictographic
grExtendedPictographicZWJ
grRIOdd
grRIEven
)
// The grapheme cluster parser's breaking instructions.
const (
grNoBoundary = iota
grBoundary
)
// The grapheme cluster parser's state transitions. Maps (state, property) to
// (new state, breaking instruction, rule number). The breaking instruction
// always refers to the boundary between the last and next code point.
//
// This map is queried as follows:
//
2022-09-13 13:44:44 +00:00
// 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property.
// 3. Find any state + specific property.
// 4. If only (2) or (3) (but not both) was found, stop.
// 5. If both (2) and (3) were found, use state from (3) and breaking instruction
// from the transition with the lower rule number, prefer (3) if rule numbers
// are equal. Stop.
// 6. Assume grAny and grBoundary.
2022-08-02 06:19:38 +00:00
//
// Unicode version 14.0.0.
var grTransitions = map[[2]int][3]int{
// GB5
{grAny, prCR}: {grCR, grBoundary, 50},
{grAny, prLF}: {grControlLF, grBoundary, 50},
{grAny, prControl}: {grControlLF, grBoundary, 50},
// GB4
{grCR, prAny}: {grAny, grBoundary, 40},
{grControlLF, prAny}: {grAny, grBoundary, 40},
// GB3.
2023-02-22 02:06:20 +00:00
{grCR, prLF}: {grControlLF, grNoBoundary, 30},
2022-08-02 06:19:38 +00:00
// GB6.
{grAny, prL}: {grL, grBoundary, 9990},
{grL, prL}: {grL, grNoBoundary, 60},
{grL, prV}: {grLVV, grNoBoundary, 60},
{grL, prLV}: {grLVV, grNoBoundary, 60},
{grL, prLVT}: {grLVTT, grNoBoundary, 60},
// GB7.
{grAny, prLV}: {grLVV, grBoundary, 9990},
{grAny, prV}: {grLVV, grBoundary, 9990},
{grLVV, prV}: {grLVV, grNoBoundary, 70},
{grLVV, prT}: {grLVTT, grNoBoundary, 70},
// GB8.
{grAny, prLVT}: {grLVTT, grBoundary, 9990},
{grAny, prT}: {grLVTT, grBoundary, 9990},
{grLVTT, prT}: {grLVTT, grNoBoundary, 80},
// GB9.
{grAny, prExtend}: {grAny, grNoBoundary, 90},
{grAny, prZWJ}: {grAny, grNoBoundary, 90},
// GB9a.
{grAny, prSpacingMark}: {grAny, grNoBoundary, 91},
// GB9b.
{grAny, prPrepend}: {grPrepend, grBoundary, 9990},
{grPrepend, prAny}: {grAny, grNoBoundary, 92},
// GB11.
{grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990},
{grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110},
{grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110},
{grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110},
// GB12 / GB13.
{grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990},
{grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120},
{grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120},
}
// transitionGraphemeState determines the new state of the grapheme cluster
2022-09-13 13:44:44 +00:00
// parser given the current state and the next code point. It also returns the
// code point's grapheme property (the value mapped by the [graphemeCodePoints]
// table) and whether a cluster boundary was detected.
func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) {
2022-08-02 06:19:38 +00:00
// Determine the property of the next character.
2022-09-13 13:44:44 +00:00
prop = property(graphemeCodePoints, r)
2022-08-02 06:19:38 +00:00
// Find the applicable transition.
2022-09-13 13:44:44 +00:00
transition, ok := grTransitions[[2]int{state, prop}]
2022-08-02 06:19:38 +00:00
if ok {
// We have a specific transition. We'll use it.
2022-09-13 13:44:44 +00:00
return transition[0], prop, transition[1] == grBoundary
2022-08-02 06:19:38 +00:00
}
// No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := grTransitions[[2]int{state, prAny}]
2022-09-13 13:44:44 +00:00
transAnyState, okAnyState := grTransitions[[2]int{grAny, prop}]
2022-08-02 06:19:38 +00:00
if okAnyProp && okAnyState {
// Both apply. We'll use a mix (see comments for grTransitions).
newState = transAnyState[0]
boundary = transAnyState[1] == grBoundary
if transAnyProp[2] < transAnyState[2] {
boundary = transAnyProp[1] == grBoundary
}
return
}
if okAnyProp {
// We only have a specific state.
2022-09-13 13:44:44 +00:00
return transAnyProp[0], prop, transAnyProp[1] == grBoundary
2022-08-02 06:19:38 +00:00
// This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be
// true anymore.
}
if okAnyState {
// We only have a specific property.
2022-09-13 13:44:44 +00:00
return transAnyState[0], prop, transAnyState[1] == grBoundary
2022-08-02 06:19:38 +00:00
}
// No known transition. GB999: Any ÷ Any.
2022-09-13 13:44:44 +00:00
return grAny, prop, true
2022-08-02 06:19:38 +00:00
}