VictoriaMetrics/vendor/github.com/rivo/uniseg/properties.go

209 lines
3.4 KiB
Go
Raw Normal View History

2021-02-01 17:32:29 +00:00
package uniseg
2022-08-02 06:19:38 +00:00
// The Unicode properties as used in the various parsers. Only the ones needed
// in the context of this package are included.
2021-02-01 17:32:29 +00:00
const (
2022-09-13 13:44:44 +00:00
prXX = 0 // Same as prAny.
prAny = iota // prAny must be 0.
prPrepend // Grapheme properties must come first, to reduce the number of bits stored in the state vector.
2021-02-01 17:32:29 +00:00
prCR
prLF
prControl
prExtend
prRegionalIndicator
prSpacingMark
prL
prV
prT
prLV
prLVT
prZWJ
prExtendedPictographic
2022-08-02 06:19:38 +00:00
prNewline
prWSegSpace
prDoubleQuote
prSingleQuote
prMidNumLet
prNumeric
prMidLetter
prMidNum
prExtendNumLet
prALetter
prFormat
prHebrewLetter
prKatakana
prSp
prSTerm
prClose
prSContinue
prATerm
prUpper
prLower
prSep
prOLetter
prCM
prBA
prBK
prSP
prEX
prQU
prAL
prPR
prPO
prOP
prCP
prIS
prHY
prSY
prNU
prCL
prNL
prGL
prAI
prBB
prHL
prSA
prJL
prJV
prJT
prNS
prZW
prB2
prIN
prWJ
prID
prEB
prCJ
prH2
prH3
prSG
prCB
prRI
prEM
prN
prNa
prA
prW
prH
prF
2022-09-13 13:44:44 +00:00
prEmojiPresentation
2021-02-01 17:32:29 +00:00
)
2022-08-02 06:19:38 +00:00
// Unicode General Categories. Only the ones needed in the context of this
// package are included.
const (
gcNone = iota // gcNone must be 0.
gcCc
gcZs
gcPo
gcSc
gcPs
gcPe
gcSm
gcPd
gcNd
gcLu
gcSk
gcPc
gcLl
gcSo
gcLo
gcPi
gcCf
gcNo
gcPf
gcLC
gcLm
gcMn
gcMe
gcMc
gcNl
gcZl
gcZp
gcCn
gcCs
gcCo
)
2021-02-01 17:32:29 +00:00
2022-09-13 13:44:44 +00:00
// Special code points.
const (
vs15 = 0xfe0e // Variation Selector-15 (text presentation)
vs16 = 0xfe0f // Variation Selector-16 (emoji presentation)
)
2022-08-02 06:19:38 +00:00
// propertySearch performs a binary search on a property slice and returns the
// entry whose range (start = first array element, end = second array element)
// includes r, or an array of 0's if no such entry was found.
func propertySearch[E interface{ [3]int | [4]int }](dictionary []E, r rune) (result E) {
2021-02-01 17:32:29 +00:00
// Run a binary search.
from := 0
2022-08-02 06:19:38 +00:00
to := len(dictionary)
2021-02-01 17:32:29 +00:00
for to > from {
middle := (from + to) / 2
2022-08-02 06:19:38 +00:00
cpRange := dictionary[middle]
2021-02-01 17:32:29 +00:00
if int(r) < cpRange[0] {
to = middle
continue
}
if int(r) > cpRange[1] {
from = middle + 1
continue
}
2022-08-02 06:19:38 +00:00
return cpRange
2021-02-01 17:32:29 +00:00
}
2022-08-02 06:19:38 +00:00
return
}
// property returns the Unicode property value (see constants above) of the
// given code point.
func property(dictionary [][3]int, r rune) int {
return propertySearch(dictionary, r)[2]
}
2024-01-30 16:47:30 +00:00
// propertyLineBreak returns the Unicode property value and General Category
// (see constants above) of the given code point, as listed in the line break
// code points table, while fast tracking ASCII digits and letters.
func propertyLineBreak(r rune) (property, generalCategory int) {
if r >= 'a' && r <= 'z' {
return prAL, gcLl
}
if r >= 'A' && r <= 'Z' {
return prAL, gcLu
}
if r >= '0' && r <= '9' {
return prNU, gcNd
}
entry := propertySearch(lineBreakCodePoints, r)
2022-08-02 06:19:38 +00:00
return entry[2], entry[3]
2021-02-01 17:32:29 +00:00
}
2024-01-30 16:47:30 +00:00
// propertyGraphemes returns the Unicode grapheme cluster property value of the
// given code point while fast tracking ASCII characters.
func propertyGraphemes(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prAny
}
if r == 0x0a {
return prLF
}
if r == 0x0d {
return prCR
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prControl
}
return property(graphemeCodePoints, r)
}
// propertyEastAsianWidth returns the Unicode East Asian Width property value of
// the given code point while fast tracking ASCII characters.
func propertyEastAsianWidth(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prNa
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prN
}
return property(eastAsianWidth, r)
}