vendor: run make vendor-update

This commit is contained in:
Aliaksandr Valialkin 2024-01-30 18:47:01 +02:00
parent adf585f7ed
commit 32e60fe09d
No known key found for this signature in database
GPG key ID: 52C003EE2BCDB9EB
24 changed files with 1121 additions and 571 deletions

6
go.mod
View file

@ -33,7 +33,7 @@ require (
golang.org/x/net v0.20.0 // indirect golang.org/x/net v0.20.0 // indirect
golang.org/x/oauth2 v0.16.0 golang.org/x/oauth2 v0.16.0
golang.org/x/sys v0.16.0 golang.org/x/sys v0.16.0
google.golang.org/api v0.159.0 google.golang.org/api v0.160.0
gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v2 v2.4.0
) )
@ -47,7 +47,7 @@ require (
github.com/AzureAD/microsoft-authentication-library-for-go v1.2.1 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.2.1 // indirect
github.com/VividCortex/ewma v1.2.0 // indirect github.com/VividCortex/ewma v1.2.0 // indirect
github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 // indirect
github.com/aws/aws-sdk-go v1.50.5 // indirect github.com/aws/aws-sdk-go v1.50.6 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 // indirect
github.com/aws/aws-sdk-go-v2/credentials v1.16.16 // indirect github.com/aws/aws-sdk-go-v2/credentials v1.16.16 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.14.11 // indirect
@ -100,7 +100,7 @@ require (
github.com/prometheus/common v0.46.0 // indirect github.com/prometheus/common v0.46.0 // indirect
github.com/prometheus/common/sigv4 v0.1.0 // indirect github.com/prometheus/common/sigv4 v0.1.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect github.com/prometheus/procfs v0.12.0 // indirect
github.com/rivo/uniseg v0.4.4 // indirect github.com/rivo/uniseg v0.4.6 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/stretchr/testify v1.8.4 // indirect github.com/stretchr/testify v1.8.4 // indirect
github.com/valyala/bytebufferpool v1.0.0 // indirect github.com/valyala/bytebufferpool v1.0.0 // indirect

12
go.sum
View file

@ -83,8 +83,8 @@ github.com/andybalholm/brotli v1.0.2/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu
github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig= github.com/andybalholm/brotli v1.0.3/go.mod h1:fO7iG3H7G2nSZ7m0zPUDn85XEX2GTukHGRSepvi9Eig=
github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA= github.com/armon/go-metrics v0.4.1 h1:hR91U9KYmb6bLBYLQjyM+3j+rcd/UhE+G78SFnF8gJA=
github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro= github.com/aws/aws-sdk-go v1.38.35/go.mod h1:hcU610XS61/+aQV88ixoOzUoG7v3b31pl2zKMmprdro=
github.com/aws/aws-sdk-go v1.50.5 h1:H2Aadcgwr7a2aqS6ZwcE+l1mA6ZrTseYCvjw2QLmxIA= github.com/aws/aws-sdk-go v1.50.6 h1:FaXvNwHG3Ri1paUEW16Ahk9zLVqSAdqa1M3phjZR35Q=
github.com/aws/aws-sdk-go v1.50.5/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk= github.com/aws/aws-sdk-go v1.50.6/go.mod h1:LF8svs817+Nz+DmiMQKTO3ubZ/6IaTpq3TjupRn3Eqk=
github.com/aws/aws-sdk-go-v2 v1.24.1 h1:xAojnj+ktS95YZlDf0zxWBkbFtymPeDP+rvUQIH3uAU= github.com/aws/aws-sdk-go-v2 v1.24.1 h1:xAojnj+ktS95YZlDf0zxWBkbFtymPeDP+rvUQIH3uAU=
github.com/aws/aws-sdk-go-v2 v1.24.1/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4= github.com/aws/aws-sdk-go-v2 v1.24.1/go.mod h1:LNh45Br1YAkEKaAqvmE1m8FUx6a5b/V0oAKV7of29b4=
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 h1:OCs21ST2LrepDfD3lwlQiOqIGp6JiEUqG84GzTDoyJs= github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.5.4 h1:OCs21ST2LrepDfD3lwlQiOqIGp6JiEUqG84GzTDoyJs=
@ -397,8 +397,8 @@ github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3c
github.com/prometheus/prometheus v0.49.1 h1:90mDvjrFnca2m+0qPSIDr3y7iHPTAagOAElz7j+HtGk= github.com/prometheus/prometheus v0.49.1 h1:90mDvjrFnca2m+0qPSIDr3y7iHPTAagOAElz7j+HtGk=
github.com/prometheus/prometheus v0.49.1/go.mod h1:aDogiyqmv3aBIWDb5z5Sdcxuuf2BOfiJwOIm9JGpMnI= github.com/prometheus/prometheus v0.49.1/go.mod h1:aDogiyqmv3aBIWDb5z5Sdcxuuf2BOfiJwOIm9JGpMnI=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc= github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rivo/uniseg v0.4.4 h1:8TfxU8dW6PdqD27gjM8MVNuicgxIjxpm4K7x4jp8sis= github.com/rivo/uniseg v0.4.6 h1:Sovz9sDSwbOz9tgUy8JpT+KgCkPYJEN/oYzlJiYTNLg=
github.com/rivo/uniseg v0.4.4/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88= github.com/rivo/uniseg v0.4.6/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk= github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
@ -710,8 +710,8 @@ google.golang.org/api v0.24.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0M
google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE= google.golang.org/api v0.28.0/go.mod h1:lIXQywCXRcnZPGlsd8NbLnOjtAoL6em04bJ9+z0MncE=
google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM= google.golang.org/api v0.29.0/go.mod h1:Lcubydp8VUV7KeIHD9z2Bys/sm/vGKnG1UHuDBSrHWM=
google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc= google.golang.org/api v0.30.0/go.mod h1:QGmEvQ87FHZNiUVJkT14jQNYJ4ZJjdRF23ZXz5138Fc=
google.golang.org/api v0.159.0 h1:fVTj+7HHiUYz4JEZCHHoRIeQX7h5FMzrA2RF/DzDdbs= google.golang.org/api v0.160.0 h1:SEspjXHVqE1m5a1fRy8JFB+5jSu+V0GEDKDghF3ttO4=
google.golang.org/api v0.159.0/go.mod h1:0mu0TpK33qnydLvWqbImq2b1eQ5FHRSDCBzAxX9ZHyw= google.golang.org/api v0.160.0/go.mod h1:0mu0TpK33qnydLvWqbImq2b1eQ5FHRSDCBzAxX9ZHyw=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=
google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/appengine v1.5.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4=

View file

@ -34007,9 +34007,18 @@ var awsPartition = partition{
endpointKey{ endpointKey{
Region: "ap-northeast-1", Region: "ap-northeast-1",
}: endpoint{}, }: endpoint{},
endpointKey{
Region: "ap-northeast-2",
}: endpoint{},
endpointKey{
Region: "ap-southeast-1",
}: endpoint{},
endpointKey{ endpointKey{
Region: "ap-southeast-2", Region: "ap-southeast-2",
}: endpoint{}, }: endpoint{},
endpointKey{
Region: "ca-central-1",
}: endpoint{},
endpointKey{ endpointKey{
Region: "eu-central-1", Region: "eu-central-1",
}: endpoint{}, }: endpoint{},
@ -34031,9 +34040,18 @@ var awsPartition = partition{
endpointKey{ endpointKey{
Region: "ui-ap-northeast-1", Region: "ui-ap-northeast-1",
}: endpoint{}, }: endpoint{},
endpointKey{
Region: "ui-ap-northeast-2",
}: endpoint{},
endpointKey{
Region: "ui-ap-southeast-1",
}: endpoint{},
endpointKey{ endpointKey{
Region: "ui-ap-southeast-2", Region: "ui-ap-southeast-2",
}: endpoint{}, }: endpoint{},
endpointKey{
Region: "ui-ca-central-1",
}: endpoint{},
endpointKey{ endpointKey{
Region: "ui-eu-central-1", Region: "ui-eu-central-1",
}: endpoint{}, }: endpoint{},
@ -35867,6 +35885,13 @@ var awscnPartition = partition{
}, },
}, },
}, },
"quicksight": service{
Endpoints: serviceEndpoints{
endpointKey{
Region: "cn-north-1",
}: endpoint{},
},
},
"ram": service{ "ram": service{
Endpoints: serviceEndpoints{ Endpoints: serviceEndpoints{
endpointKey{ endpointKey{
@ -43410,6 +43435,15 @@ var awsisoPartition = partition{
}, },
"datasync": service{ "datasync": service{
Endpoints: serviceEndpoints{ Endpoints: serviceEndpoints{
endpointKey{
Region: "fips-us-iso-east-1",
}: endpoint{
Hostname: "datasync-fips.us-iso-east-1.c2s.ic.gov",
CredentialScope: credentialScope{
Region: "us-iso-east-1",
},
Deprecated: boxedTrue,
},
endpointKey{ endpointKey{
Region: "fips-us-iso-west-1", Region: "fips-us-iso-west-1",
}: endpoint{ }: endpoint{
@ -43419,6 +43453,15 @@ var awsisoPartition = partition{
}, },
Deprecated: boxedTrue, Deprecated: boxedTrue,
}, },
endpointKey{
Region: "us-iso-east-1",
}: endpoint{},
endpointKey{
Region: "us-iso-east-1",
Variant: fipsVariant,
}: endpoint{
Hostname: "datasync-fips.us-iso-east-1.c2s.ic.gov",
},
endpointKey{ endpointKey{
Region: "us-iso-west-1", Region: "us-iso-west-1",
}: endpoint{}, }: endpoint{},

View file

@ -5,4 +5,4 @@ package aws
const SDKName = "aws-sdk-go" const SDKName = "aws-sdk-go"
// SDKVersion is the version of this SDK // SDKVersion is the version of this SDK
const SDKVersion = "1.50.5" const SDKVersion = "1.50.6"

View file

@ -3,7 +3,7 @@
[![Go Reference](https://pkg.go.dev/badge/github.com/rivo/uniseg.svg)](https://pkg.go.dev/github.com/rivo/uniseg) [![Go Reference](https://pkg.go.dev/badge/github.com/rivo/uniseg.svg)](https://pkg.go.dev/github.com/rivo/uniseg)
[![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg) [![Go Report](https://img.shields.io/badge/go%20report-A%2B-brightgreen.svg)](https://goreportcard.com/report/github.com/rivo/uniseg)
This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 14.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html). This Go package implements Unicode Text Segmentation according to [Unicode Standard Annex #29](https://unicode.org/reports/tr29/), Unicode Line Breaking according to [Unicode Standard Annex #14](https://unicode.org/reports/tr14/) (Unicode version 15.0.0), and monospace font string width calculation similar to [wcwidth](https://man7.org/linux/man-pages/man3/wcwidth.3.html).
## Background ## Background
@ -73,7 +73,7 @@ for gr.Next() {
### Using the [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) Function ### Using the [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) Function
This is orders of magnitude faster than the `Graphemes` class, but it requires the handling of states and boundaries: This avoids allocating a new `Graphemes` object but it requires the handling of states and boundaries:
```go ```go
str := "🇩🇪🏳️‍🌈" str := "🇩🇪🏳️‍🌈"
@ -88,29 +88,7 @@ for len(str) > 0 {
### Advanced Examples ### Advanced Examples
Breaking into grapheme clusters and evaluating line breaks: The [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) class offers the most convenient way to access all functionality of this package. But in some cases, it may be better to use the specialized functions directly. For example, if you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString):
```go
str := "First line.\nSecond line."
state := -1
var (
c string
boundaries int
)
for len(str) > 0 {
c, str, boundaries, state = uniseg.StepString(str, state)
fmt.Print(c)
if boundaries&uniseg.MaskLine == uniseg.LineCanBreak {
fmt.Print("|")
} else if boundaries&uniseg.MaskLine == uniseg.LineMustBreak {
fmt.Print("‖")
}
}
// First |line.
// ‖Second |line.‖
```
If you're only interested in word segmentation, use [`FirstWord`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWord) or [`FirstWordInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstWordInString):
```go ```go
str := "Hello, world!" str := "Hello, world!"
@ -133,6 +111,8 @@ Similarly, use
- [`FirstSentence`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentence) or [`FirstSentenceInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentenceInString) for sentence segmentation only, and - [`FirstSentence`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentence) or [`FirstSentenceInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstSentenceInString) for sentence segmentation only, and
- [`FirstLineSegment`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegment) or [`FirstLineSegmentInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegmentInString) for line breaking / word wrapping (although using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) is preferred as it will observe grapheme cluster boundaries). - [`FirstLineSegment`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegment) or [`FirstLineSegmentInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstLineSegmentInString) for line breaking / word wrapping (although using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step) or [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString) is preferred as it will observe grapheme cluster boundaries).
If you're only interested in the width of characters, use [`FirstGraphemeCluster`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeCluster) or [`FirstGraphemeClusterInString`](https://pkg.go.dev/github.com/rivo/uniseg#FirstGraphemeClusterInString). It is much faster than using [`Step`](https://pkg.go.dev/github.com/rivo/uniseg#Step), [`StepString`](https://pkg.go.dev/github.com/rivo/uniseg#StepString), or the [`Graphemes`](https://pkg.go.dev/github.com/rivo/uniseg#Graphemes) class because it does not include the logic for word / sentence / line boundaries.
Finally, if you need to reverse a string while preserving grapheme clusters, use [`ReverseString`](https://pkg.go.dev/github.com/rivo/uniseg#ReverseString): Finally, if you need to reverse a string while preserving grapheme clusters, use [`ReverseString`](https://pkg.go.dev/github.com/rivo/uniseg#ReverseString):
```go ```go

View file

@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT. // Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// eastAsianWidth are taken from // eastAsianWidth are taken from
// https://www.unicode.org/Public/14.0.0/ucd/EastAsianWidth.txt // https://www.unicode.org/Public/15.0.0/ucd/EastAsianWidth.txt
// and // and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt // https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only) // ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement. // license agreement.
var eastAsianWidth = [][3]int{ var eastAsianWidth = [][3]int{
{0x0000, 0x001F, prN}, // Cc [32] <control-0000>..<control-001F> {0x0000, 0x001F, prN}, // Cc [32] <control-0000>..<control-001F>
@ -504,6 +504,7 @@ var eastAsianWidth = [][3]int{
{0x0CE2, 0x0CE3, prN}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE2, 0x0CE3, prN}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CE6, 0x0CEF, prN}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE {0x0CE6, 0x0CEF, prN}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
{0x0CF1, 0x0CF2, prN}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA {0x0CF1, 0x0CF2, prN}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
{0x0CF3, 0x0CF3, prN}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prN}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D00, 0x0D01, prN}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prN}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D02, 0x0D03, prN}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D04, 0x0D0C, prN}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L {0x0D04, 0x0D0C, prN}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@ -565,7 +566,7 @@ var eastAsianWidth = [][3]int{
{0x0EBD, 0x0EBD, prN}, // Lo LAO SEMIVOWEL SIGN NYO {0x0EBD, 0x0EBD, prN}, // Lo LAO SEMIVOWEL SIGN NYO
{0x0EC0, 0x0EC4, prN}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI {0x0EC0, 0x0EC4, prN}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
{0x0EC6, 0x0EC6, prN}, // Lm LAO KO LA {0x0EC6, 0x0EC6, prN}, // Lm LAO KO LA
{0x0EC8, 0x0ECD, prN}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA {0x0EC8, 0x0ECE, prN}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0ED0, 0x0ED9, prN}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE {0x0ED0, 0x0ED9, prN}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
{0x0EDC, 0x0EDF, prN}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO {0x0EDC, 0x0EDF, prN}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO
{0x0F00, 0x0F00, prN}, // Lo TIBETAN SYLLABLE OM {0x0F00, 0x0F00, prN}, // Lo TIBETAN SYLLABLE OM
@ -1916,6 +1917,7 @@ var eastAsianWidth = [][3]int{
{0x10EAB, 0x10EAC, prN}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EAB, 0x10EAC, prN}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EAD, 0x10EAD, prN}, // Pd YEZIDI HYPHENATION MARK {0x10EAD, 0x10EAD, prN}, // Pd YEZIDI HYPHENATION MARK
{0x10EB0, 0x10EB1, prN}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE {0x10EB0, 0x10EB1, prN}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
{0x10EFD, 0x10EFF, prN}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F00, 0x10F1C, prN}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL {0x10F00, 0x10F1C, prN}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
{0x10F1D, 0x10F26, prN}, // No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF {0x10F1D, 0x10F26, prN}, // No [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
{0x10F27, 0x10F27, prN}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH {0x10F27, 0x10F27, prN}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH
@ -1998,6 +2000,8 @@ var eastAsianWidth = [][3]int{
{0x11236, 0x11237, prN}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA {0x11236, 0x11237, prN}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
{0x11238, 0x1123D, prN}, // Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN {0x11238, 0x1123D, prN}, // Po [6] KHOJKI DANDA..KHOJKI ABBREVIATION SIGN
{0x1123E, 0x1123E, prN}, // Mn KHOJKI SIGN SUKUN {0x1123E, 0x1123E, prN}, // Mn KHOJKI SIGN SUKUN
{0x1123F, 0x11240, prN}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
{0x11241, 0x11241, prN}, // Mn KHOJKI VOWEL SIGN VOCALIC R
{0x11280, 0x11286, prN}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA {0x11280, 0x11286, prN}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA
{0x11288, 0x11288, prN}, // Lo MULTANI LETTER GHA {0x11288, 0x11288, prN}, // Lo MULTANI LETTER GHA
{0x1128A, 0x1128D, prN}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA {0x1128A, 0x1128D, prN}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA
@ -2160,6 +2164,7 @@ var eastAsianWidth = [][3]int{
{0x11A9E, 0x11AA2, prN}, // Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2 {0x11A9E, 0x11AA2, prN}, // Po [5] SOYOMBO HEAD MARK WITH MOON AND SUN AND TRIPLE FLAME..SOYOMBO TERMINAL MARK-2
{0x11AB0, 0x11ABF, prN}, // Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA {0x11AB0, 0x11ABF, prN}, // Lo [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
{0x11AC0, 0x11AF8, prN}, // Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL {0x11AC0, 0x11AF8, prN}, // Lo [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
{0x11B00, 0x11B09, prN}, // Po [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
{0x11C00, 0x11C08, prN}, // Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L {0x11C00, 0x11C08, prN}, // Lo [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
{0x11C0A, 0x11C2E, prN}, // Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA {0x11C0A, 0x11C2E, prN}, // Lo [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
{0x11C2F, 0x11C2F, prN}, // Mc BHAIKSUKI VOWEL SIGN AA {0x11C2F, 0x11C2F, prN}, // Mc BHAIKSUKI VOWEL SIGN AA
@ -2205,6 +2210,19 @@ var eastAsianWidth = [][3]int{
{0x11EF3, 0x11EF4, prN}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF3, 0x11EF4, prN}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prN}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O {0x11EF5, 0x11EF6, prN}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x11EF7, 0x11EF8, prN}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION {0x11EF7, 0x11EF8, prN}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
{0x11F00, 0x11F01, prN}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prN}, // Lo KAWI SIGN REPHA
{0x11F03, 0x11F03, prN}, // Mc KAWI SIGN VISARGA
{0x11F04, 0x11F10, prN}, // Lo [13] KAWI LETTER A..KAWI LETTER O
{0x11F12, 0x11F33, prN}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA
{0x11F34, 0x11F35, prN}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prN}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prN}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prN}, // Mn KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prN}, // Mc KAWI SIGN KILLER
{0x11F42, 0x11F42, prN}, // Mn KAWI CONJOINER
{0x11F43, 0x11F4F, prN}, // Po [13] KAWI DANDA..KAWI PUNCTUATION CLOSING SPIRAL
{0x11F50, 0x11F59, prN}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
{0x11FB0, 0x11FB0, prN}, // Lo LISU LETTER YHA {0x11FB0, 0x11FB0, prN}, // Lo LISU LETTER YHA
{0x11FC0, 0x11FD4, prN}, // No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH {0x11FC0, 0x11FD4, prN}, // No [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
{0x11FD5, 0x11FDC, prN}, // So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI {0x11FD5, 0x11FDC, prN}, // So [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
@ -2217,8 +2235,11 @@ var eastAsianWidth = [][3]int{
{0x12480, 0x12543, prN}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU {0x12480, 0x12543, prN}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
{0x12F90, 0x12FF0, prN}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 {0x12F90, 0x12FF0, prN}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
{0x12FF1, 0x12FF2, prN}, // Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302 {0x12FF1, 0x12FF2, prN}, // Po [2] CYPRO-MINOAN SIGN CM301..CYPRO-MINOAN SIGN CM302
{0x13000, 0x1342E, prN}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 {0x13000, 0x1342F, prN}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
{0x13430, 0x13438, prN}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT {0x13430, 0x1343F, prN}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prN}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13441, 0x13446, prN}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
{0x13447, 0x13455, prN}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x14400, 0x14646, prN}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 {0x14400, 0x14646, prN}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
{0x16800, 0x16A38, prN}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ {0x16800, 0x16A38, prN}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
{0x16A40, 0x16A5E, prN}, // Lo [31] MRO LETTER TA..MRO LETTER TEK {0x16A40, 0x16A5E, prN}, // Lo [31] MRO LETTER TA..MRO LETTER TEK
@ -2263,7 +2284,9 @@ var eastAsianWidth = [][3]int{
{0x1AFFD, 0x1AFFE, prW}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 {0x1AFFD, 0x1AFFE, prW}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
{0x1B000, 0x1B0FF, prW}, // Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 {0x1B000, 0x1B0FF, prW}, // Lo [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2
{0x1B100, 0x1B122, prW}, // Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU {0x1B100, 0x1B122, prW}, // Lo [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU
{0x1B132, 0x1B132, prW}, // Lo HIRAGANA LETTER SMALL KO
{0x1B150, 0x1B152, prW}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO {0x1B150, 0x1B152, prW}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
{0x1B155, 0x1B155, prW}, // Lo KATAKANA LETTER SMALL KO
{0x1B164, 0x1B167, prW}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N {0x1B164, 0x1B167, prW}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
{0x1B170, 0x1B2FB, prW}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB {0x1B170, 0x1B2FB, prW}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
{0x1BC00, 0x1BC6A, prN}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M {0x1BC00, 0x1BC6A, prN}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
@ -2294,6 +2317,7 @@ var eastAsianWidth = [][3]int{
{0x1D200, 0x1D241, prN}, // So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 {0x1D200, 0x1D241, prN}, // So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
{0x1D242, 0x1D244, prN}, // Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME {0x1D242, 0x1D244, prN}, // Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
{0x1D245, 0x1D245, prN}, // So GREEK MUSICAL LEIMMA {0x1D245, 0x1D245, prN}, // So GREEK MUSICAL LEIMMA
{0x1D2C0, 0x1D2D3, prN}, // No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN
{0x1D2E0, 0x1D2F3, prN}, // No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN {0x1D2E0, 0x1D2F3, prN}, // No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
{0x1D300, 0x1D356, prN}, // So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING {0x1D300, 0x1D356, prN}, // So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
{0x1D360, 0x1D378, prN}, // No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE {0x1D360, 0x1D378, prN}, // No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
@ -2353,11 +2377,14 @@ var eastAsianWidth = [][3]int{
{0x1DF00, 0x1DF09, prN}, // Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK {0x1DF00, 0x1DF09, prN}, // Ll [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
{0x1DF0A, 0x1DF0A, prN}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK {0x1DF0A, 0x1DF0A, prN}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
{0x1DF0B, 0x1DF1E, prN}, // Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL {0x1DF0B, 0x1DF1E, prN}, // Ll [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
{0x1DF25, 0x1DF2A, prN}, // Ll [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
{0x1E000, 0x1E006, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE {0x1E000, 0x1E006, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
{0x1E008, 0x1E018, prN}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU {0x1E008, 0x1E018, prN}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
{0x1E01B, 0x1E021, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E01B, 0x1E021, prN}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prN}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E023, 0x1E024, prN}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prN}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA {0x1E026, 0x1E02A, prN}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E030, 0x1E06D, prN}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
{0x1E08F, 0x1E08F, prN}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E100, 0x1E12C, prN}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W {0x1E100, 0x1E12C, prN}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
{0x1E130, 0x1E136, prN}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E130, 0x1E136, prN}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E137, 0x1E13D, prN}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER {0x1E137, 0x1E13D, prN}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@ -2370,6 +2397,10 @@ var eastAsianWidth = [][3]int{
{0x1E2EC, 0x1E2EF, prN}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2EC, 0x1E2EF, prN}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E2F0, 0x1E2F9, prN}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE {0x1E2F0, 0x1E2F9, prN}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
{0x1E2FF, 0x1E2FF, prN}, // Sc WANCHO NGUN SIGN {0x1E2FF, 0x1E2FF, prN}, // Sc WANCHO NGUN SIGN
{0x1E4D0, 0x1E4EA, prN}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
{0x1E4EB, 0x1E4EB, prN}, // Lm NAG MUNDARI SIGN OJOD
{0x1E4EC, 0x1E4EF, prN}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E4F0, 0x1E4F9, prN}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
{0x1E7E0, 0x1E7E6, prN}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO {0x1E7E0, 0x1E7E6, prN}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
{0x1E7E8, 0x1E7EB, prN}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE {0x1E7E8, 0x1E7EB, prN}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
{0x1E7ED, 0x1E7EE, prN}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE {0x1E7ED, 0x1E7EE, prN}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@ -2498,13 +2529,14 @@ var eastAsianWidth = [][3]int{
{0x1F6D0, 0x1F6D2, prW}, // So [3] PLACE OF WORSHIP..SHOPPING TROLLEY {0x1F6D0, 0x1F6D2, prW}, // So [3] PLACE OF WORSHIP..SHOPPING TROLLEY
{0x1F6D3, 0x1F6D4, prN}, // So [2] STUPA..PAGODA {0x1F6D3, 0x1F6D4, prN}, // So [2] STUPA..PAGODA
{0x1F6D5, 0x1F6D7, prW}, // So [3] HINDU TEMPLE..ELEVATOR {0x1F6D5, 0x1F6D7, prW}, // So [3] HINDU TEMPLE..ELEVATOR
{0x1F6DD, 0x1F6DF, prW}, // So [3] PLAYGROUND SLIDE..RING BUOY {0x1F6DC, 0x1F6DF, prW}, // So [4] WIRELESS..RING BUOY
{0x1F6E0, 0x1F6EA, prN}, // So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE {0x1F6E0, 0x1F6EA, prN}, // So [11] HAMMER AND WRENCH..NORTHEAST-POINTING AIRPLANE
{0x1F6EB, 0x1F6EC, prW}, // So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING {0x1F6EB, 0x1F6EC, prW}, // So [2] AIRPLANE DEPARTURE..AIRPLANE ARRIVING
{0x1F6F0, 0x1F6F3, prN}, // So [4] SATELLITE..PASSENGER SHIP {0x1F6F0, 0x1F6F3, prN}, // So [4] SATELLITE..PASSENGER SHIP
{0x1F6F4, 0x1F6FC, prW}, // So [9] SCOOTER..ROLLER SKATE {0x1F6F4, 0x1F6FC, prW}, // So [9] SCOOTER..ROLLER SKATE
{0x1F700, 0x1F773, prN}, // So [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE {0x1F700, 0x1F776, prN}, // So [119] ALCHEMICAL SYMBOL FOR QUINTESSENCE..LUNAR ECLIPSE
{0x1F780, 0x1F7D8, prN}, // So [89] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NEGATIVE CIRCLED SQUARE {0x1F77B, 0x1F77F, prN}, // So [5] HAUMEA..ORCUS
{0x1F780, 0x1F7D9, prN}, // So [90] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..NINE POINTED WHITE STAR
{0x1F7E0, 0x1F7EB, prW}, // So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE {0x1F7E0, 0x1F7EB, prW}, // So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
{0x1F7F0, 0x1F7F0, prW}, // So HEAVY EQUALS SIGN {0x1F7F0, 0x1F7F0, prW}, // So HEAVY EQUALS SIGN
{0x1F800, 0x1F80B, prN}, // So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD {0x1F800, 0x1F80B, prN}, // So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
@ -2521,22 +2553,20 @@ var eastAsianWidth = [][3]int{
{0x1F947, 0x1F9FF, prW}, // So [185] FIRST PLACE MEDAL..NAZAR AMULET {0x1F947, 0x1F9FF, prW}, // So [185] FIRST PLACE MEDAL..NAZAR AMULET
{0x1FA00, 0x1FA53, prN}, // So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP {0x1FA00, 0x1FA53, prN}, // So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP
{0x1FA60, 0x1FA6D, prN}, // So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER {0x1FA60, 0x1FA6D, prN}, // So [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
{0x1FA70, 0x1FA74, prW}, // So [5] BALLET SHOES..THONG SANDAL {0x1FA70, 0x1FA7C, prW}, // So [13] BALLET SHOES..CRUTCH
{0x1FA78, 0x1FA7C, prW}, // So [5] DROP OF BLOOD..CRUTCH {0x1FA80, 0x1FA88, prW}, // So [9] YO-YO..FLUTE
{0x1FA80, 0x1FA86, prW}, // So [7] YO-YO..NESTING DOLLS {0x1FA90, 0x1FABD, prW}, // So [46] RINGED PLANET..WING
{0x1FA90, 0x1FAAC, prW}, // So [29] RINGED PLANET..HAMSA {0x1FABF, 0x1FAC5, prW}, // So [7] GOOSE..PERSON WITH CROWN
{0x1FAB0, 0x1FABA, prW}, // So [11] FLY..NEST WITH EGGS {0x1FACE, 0x1FADB, prW}, // So [14] MOOSE..PEA POD
{0x1FAC0, 0x1FAC5, prW}, // So [6] ANATOMICAL HEART..PERSON WITH CROWN {0x1FAE0, 0x1FAE8, prW}, // So [9] MELTING FACE..SHAKING FACE
{0x1FAD0, 0x1FAD9, prW}, // So [10] BLUEBERRIES..JAR {0x1FAF0, 0x1FAF8, prW}, // So [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND
{0x1FAE0, 0x1FAE7, prW}, // So [8] MELTING FACE..BUBBLES
{0x1FAF0, 0x1FAF6, prW}, // So [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS
{0x1FB00, 0x1FB92, prN}, // So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK {0x1FB00, 0x1FB92, prN}, // So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
{0x1FB94, 0x1FBCA, prN}, // So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON {0x1FB94, 0x1FBCA, prN}, // So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
{0x1FBF0, 0x1FBF9, prN}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE {0x1FBF0, 0x1FBF9, prN}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
{0x20000, 0x2A6DF, prW}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF {0x20000, 0x2A6DF, prW}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
{0x2A6E0, 0x2A6FF, prW}, // Cn [32] <reserved-2A6E0>..<reserved-2A6FF> {0x2A6E0, 0x2A6FF, prW}, // Cn [32] <reserved-2A6E0>..<reserved-2A6FF>
{0x2A700, 0x2B738, prW}, // Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 {0x2A700, 0x2B739, prW}, // Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739
{0x2B739, 0x2B73F, prW}, // Cn [7] <reserved-2B739>..<reserved-2B73F> {0x2B73A, 0x2B73F, prW}, // Cn [6] <reserved-2B73A>..<reserved-2B73F>
{0x2B740, 0x2B81D, prW}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D {0x2B740, 0x2B81D, prW}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
{0x2B81E, 0x2B81F, prW}, // Cn [2] <reserved-2B81E>..<reserved-2B81F> {0x2B81E, 0x2B81F, prW}, // Cn [2] <reserved-2B81E>..<reserved-2B81F>
{0x2B820, 0x2CEA1, prW}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 {0x2B820, 0x2CEA1, prW}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
@ -2547,7 +2577,9 @@ var eastAsianWidth = [][3]int{
{0x2FA1E, 0x2FA1F, prW}, // Cn [2] <reserved-2FA1E>..<reserved-2FA1F> {0x2FA1E, 0x2FA1F, prW}, // Cn [2] <reserved-2FA1E>..<reserved-2FA1F>
{0x2FA20, 0x2FFFD, prW}, // Cn [1502] <reserved-2FA20>..<reserved-2FFFD> {0x2FA20, 0x2FFFD, prW}, // Cn [1502] <reserved-2FA20>..<reserved-2FFFD>
{0x30000, 0x3134A, prW}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A {0x30000, 0x3134A, prW}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
{0x3134B, 0x3FFFD, prW}, // Cn [60595] <reserved-3134B>..<reserved-3FFFD> {0x3134B, 0x3134F, prW}, // Cn [5] <reserved-3134B>..<reserved-3134F>
{0x31350, 0x323AF, prW}, // Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
{0x323B0, 0x3FFFD, prW}, // Cn [56398] <reserved-323B0>..<reserved-3FFFD>
{0xE0001, 0xE0001, prN}, // Cf LANGUAGE TAG {0xE0001, 0xE0001, prN}, // Cf LANGUAGE TAG
{0xE0020, 0xE007F, prN}, // Cf [96] TAG SPACE..CANCEL TAG {0xE0020, 0xE007F, prN}, // Cf [96] TAG SPACE..CANCEL TAG
{0xE0100, 0xE01EF, prA}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 {0xE0100, 0xE01EF, prA}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

View file

@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT. // Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// emojiPresentation are taken from // emojiPresentation are taken from
// //
// and // and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt // https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only) // ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement. // license agreement.
var emojiPresentation = [][3]int{ var emojiPresentation = [][3]int{
{0x231A, 0x231B, prEmojiPresentation}, // E0.6 [2] (⌚..⌛) watch..hourglass done {0x231A, 0x231B, prEmojiPresentation}, // E0.6 [2] (⌚..⌛) watch..hourglass done
@ -211,6 +211,7 @@ var emojiPresentation = [][3]int{
{0x1F6D1, 0x1F6D2, prEmojiPresentation}, // E3.0 [2] (🛑..🛒) stop sign..shopping cart {0x1F6D1, 0x1F6D2, prEmojiPresentation}, // E3.0 [2] (🛑..🛒) stop sign..shopping cart
{0x1F6D5, 0x1F6D5, prEmojiPresentation}, // E12.0 [1] (🛕) hindu temple {0x1F6D5, 0x1F6D5, prEmojiPresentation}, // E12.0 [1] (🛕) hindu temple
{0x1F6D6, 0x1F6D7, prEmojiPresentation}, // E13.0 [2] (🛖..🛗) hut..elevator {0x1F6D6, 0x1F6D7, prEmojiPresentation}, // E13.0 [2] (🛖..🛗) hut..elevator
{0x1F6DC, 0x1F6DC, prEmojiPresentation}, // E15.0 [1] (🛜) wireless
{0x1F6DD, 0x1F6DF, prEmojiPresentation}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy {0x1F6DD, 0x1F6DF, prEmojiPresentation}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy
{0x1F6EB, 0x1F6EC, prEmojiPresentation}, // E1.0 [2] (🛫..🛬) airplane departure..airplane arrival {0x1F6EB, 0x1F6EC, prEmojiPresentation}, // E1.0 [2] (🛫..🛬) airplane departure..airplane arrival
{0x1F6F4, 0x1F6F6, prEmojiPresentation}, // E3.0 [3] (🛴..🛶) kick scooter..canoe {0x1F6F4, 0x1F6F6, prEmojiPresentation}, // E3.0 [3] (🛴..🛶) kick scooter..canoe
@ -267,19 +268,28 @@ var emojiPresentation = [][3]int{
{0x1F9E7, 0x1F9FF, prEmojiPresentation}, // E11.0 [25] (🧧..🧿) red envelope..nazar amulet {0x1F9E7, 0x1F9FF, prEmojiPresentation}, // E11.0 [25] (🧧..🧿) red envelope..nazar amulet
{0x1FA70, 0x1FA73, prEmojiPresentation}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts {0x1FA70, 0x1FA73, prEmojiPresentation}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts
{0x1FA74, 0x1FA74, prEmojiPresentation}, // E13.0 [1] (🩴) thong sandal {0x1FA74, 0x1FA74, prEmojiPresentation}, // E13.0 [1] (🩴) thong sandal
{0x1FA75, 0x1FA77, prEmojiPresentation}, // E15.0 [3] (🩵..🩷) light blue heart..pink heart
{0x1FA78, 0x1FA7A, prEmojiPresentation}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope {0x1FA78, 0x1FA7A, prEmojiPresentation}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope
{0x1FA7B, 0x1FA7C, prEmojiPresentation}, // E14.0 [2] (🩻..🩼) x-ray..crutch {0x1FA7B, 0x1FA7C, prEmojiPresentation}, // E14.0 [2] (🩻..🩼) x-ray..crutch
{0x1FA80, 0x1FA82, prEmojiPresentation}, // E12.0 [3] (🪀..🪂) yo-yo..parachute {0x1FA80, 0x1FA82, prEmojiPresentation}, // E12.0 [3] (🪀..🪂) yo-yo..parachute
{0x1FA83, 0x1FA86, prEmojiPresentation}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls {0x1FA83, 0x1FA86, prEmojiPresentation}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls
{0x1FA87, 0x1FA88, prEmojiPresentation}, // E15.0 [2] (🪇..🪈) maracas..flute
{0x1FA90, 0x1FA95, prEmojiPresentation}, // E12.0 [6] (🪐..🪕) ringed planet..banjo {0x1FA90, 0x1FA95, prEmojiPresentation}, // E12.0 [6] (🪐..🪕) ringed planet..banjo
{0x1FA96, 0x1FAA8, prEmojiPresentation}, // E13.0 [19] (🪖..🪨) military helmet..rock {0x1FA96, 0x1FAA8, prEmojiPresentation}, // E13.0 [19] (🪖..🪨) military helmet..rock
{0x1FAA9, 0x1FAAC, prEmojiPresentation}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa {0x1FAA9, 0x1FAAC, prEmojiPresentation}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa
{0x1FAAD, 0x1FAAF, prEmojiPresentation}, // E15.0 [3] (🪭..🪯) folding hand fan..khanda
{0x1FAB0, 0x1FAB6, prEmojiPresentation}, // E13.0 [7] (🪰..🪶) fly..feather {0x1FAB0, 0x1FAB6, prEmojiPresentation}, // E13.0 [7] (🪰..🪶) fly..feather
{0x1FAB7, 0x1FABA, prEmojiPresentation}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs {0x1FAB7, 0x1FABA, prEmojiPresentation}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs
{0x1FABB, 0x1FABD, prEmojiPresentation}, // E15.0 [3] (🪻..🪽) hyacinth..wing
{0x1FABF, 0x1FABF, prEmojiPresentation}, // E15.0 [1] (🪿) goose
{0x1FAC0, 0x1FAC2, prEmojiPresentation}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging {0x1FAC0, 0x1FAC2, prEmojiPresentation}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging
{0x1FAC3, 0x1FAC5, prEmojiPresentation}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown {0x1FAC3, 0x1FAC5, prEmojiPresentation}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown
{0x1FACE, 0x1FACF, prEmojiPresentation}, // E15.0 [2] (🫎..🫏) moose..donkey
{0x1FAD0, 0x1FAD6, prEmojiPresentation}, // E13.0 [7] (🫐..🫖) blueberries..teapot {0x1FAD0, 0x1FAD6, prEmojiPresentation}, // E13.0 [7] (🫐..🫖) blueberries..teapot
{0x1FAD7, 0x1FAD9, prEmojiPresentation}, // E14.0 [3] (🫗..🫙) pouring liquid..jar {0x1FAD7, 0x1FAD9, prEmojiPresentation}, // E14.0 [3] (🫗..🫙) pouring liquid..jar
{0x1FADA, 0x1FADB, prEmojiPresentation}, // E15.0 [2] (🫚..🫛) ginger root..pea pod
{0x1FAE0, 0x1FAE7, prEmojiPresentation}, // E14.0 [8] (🫠..🫧) melting face..bubbles {0x1FAE0, 0x1FAE7, prEmojiPresentation}, // E14.0 [8] (🫠..🫧) melting face..bubbles
{0x1FAE8, 0x1FAE8, prEmojiPresentation}, // E15.0 [1] (🫨) shaking face
{0x1FAF0, 0x1FAF6, prEmojiPresentation}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands {0x1FAF0, 0x1FAF6, prEmojiPresentation}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
{0x1FAF7, 0x1FAF8, prEmojiPresentation}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand
} }

View file

@ -32,7 +32,7 @@ import (
// We want to test against a specific version rather than the latest. When the // We want to test against a specific version rather than the latest. When the
// package is upgraded to a new version, change these to generate new tests. // package is upgraded to a new version, change these to generate new tests.
const ( const (
testCaseURL = `https://www.unicode.org/Public/14.0.0/ucd/auxiliary/%s.txt` testCaseURL = `https://www.unicode.org/Public/15.0.0/ucd/auxiliary/%s.txt`
) )
func main() { func main() {
@ -76,9 +76,9 @@ func parse(url string) ([]byte, error) {
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
buf.Grow(120 << 10) buf.Grow(120 << 10)
buf.WriteString(`package uniseg buf.WriteString(`// Code generated via go generate from gen_breaktest.go. DO NOT EDIT.
// Code generated via go generate from gen_breaktest.go. DO NOT EDIT. package uniseg
// ` + os.Args[3] + ` are Grapheme testcases taken from // ` + os.Args[3] + ` are Grapheme testcases taken from
// ` + url + ` // ` + url + `
@ -136,7 +136,9 @@ var (
// //
// E.g. for the input b="÷ 0020 × 0308 ÷ 1F1E6 ÷" // E.g. for the input b="÷ 0020 × 0308 ÷ 1F1E6 ÷"
// it will append // it will append
//
// "\u0020\u0308\U0001F1E6" // "\u0020\u0308\U0001F1E6"
//
// and "[][]rune{{0x0020,0x0308},{0x1F1E6},}" // and "[][]rune{{0x0020,0x0308},{0x1F1E6},}"
// to orig and exp respectively. // to orig and exp respectively.
// //

View file

@ -41,8 +41,8 @@ import (
// We want to test against a specific version rather than the latest. When the // We want to test against a specific version rather than the latest. When the
// package is upgraded to a new version, change these to generate new tests. // package is upgraded to a new version, change these to generate new tests.
const ( const (
propertyURL = `https://www.unicode.org/Public/14.0.0/ucd/%s.txt` propertyURL = `https://www.unicode.org/Public/15.0.0/ucd/%s.txt`
emojiURL = `https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt` emojiURL = `https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt`
) )
// The regular expression for a line containing a code point range property. // The regular expression for a line containing a code point range property.
@ -178,6 +178,11 @@ func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (stri
} }
} }
// Avoid overflow during binary search.
if len(properties) >= 1<<31 {
return "", errors.New("too many properties")
}
// Sort properties. // Sort properties.
sort.Slice(properties, func(i, j int) bool { sort.Slice(properties, func(i, j int) bool {
left, _ := strconv.ParseUint(properties[i][0], 16, 64) left, _ := strconv.ParseUint(properties[i][0], 16, 64)
@ -200,9 +205,9 @@ func parse(propertyURL, emojiProperty string, includeGeneralCategory bool) (stri
// ` + emojiURL + ` // ` + emojiURL + `
// ("Extended_Pictographic" only)` // ("Extended_Pictographic" only)`
} }
buf.WriteString(`package uniseg buf.WriteString(`// Code generated via go generate from gen_properties.go. DO NOT EDIT.
// Code generated via go generate from gen_properties.go. DO NOT EDIT. package uniseg
// ` + os.Args[3] + ` are taken from // ` + os.Args[3] + ` are taken from
// ` + propertyURL + emojiComment + ` // ` + propertyURL + emojiComment + `

View file

@ -222,7 +222,7 @@ func FirstGraphemeCluster(b []byte, state int) (cluster, rest []byte, width, new
if len(b) <= length { // If we're already past the end, there is nothing else to parse. if len(b) <= length { // If we're already past the end, there is nothing else to parse.
var prop int var prop int
if state < 0 { if state < 0 {
prop = property(graphemeCodePoints, r) prop = propertyGraphemes(r)
} else { } else {
prop = state >> shiftGraphemePropState prop = state >> shiftGraphemePropState
} }
@ -284,7 +284,7 @@ func FirstGraphemeClusterInString(str string, state int) (cluster, rest string,
if len(str) <= length { // If we're already past the end, there is nothing else to parse. if len(str) <= length { // If we're already past the end, there is nothing else to parse.
var prop int var prop int
if state < 0 { if state < 0 {
prop = property(graphemeCodePoints, r) prop = propertyGraphemes(r)
} else { } else {
prop = state >> shiftGraphemePropState prop = state >> shiftGraphemePropState
} }

View file

@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT. // Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// graphemeCodePoints are taken from // graphemeCodePoints are taken from
// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/GraphemeBreakProperty.txt // https://www.unicode.org/Public/15.0.0/ucd/auxiliary/GraphemeBreakProperty.txt
// and // and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt // https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only) // ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement. // license agreement.
var graphemeCodePoints = [][3]int{ var graphemeCodePoints = [][3]int{
{0x0000, 0x0009, prControl}, // Cc [10] <control-0000>..<control-0009> {0x0000, 0x0009, prControl}, // Cc [10] <control-0000>..<control-0009>
@ -143,6 +143,7 @@ var graphemeCodePoints = [][3]int{
{0x0CCC, 0x0CCD, prExtend}, // Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA {0x0CCC, 0x0CCD, prExtend}, // Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA
{0x0CD5, 0x0CD6, prExtend}, // Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK {0x0CD5, 0x0CD6, prExtend}, // Mc [2] KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
{0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CF3, 0x0CF3, prSpacingMark}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prSpacingMark}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D02, 0x0D03, prSpacingMark}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D3B, 0x0D3C, prExtend}, // Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA {0x0D3B, 0x0D3C, prExtend}, // Mn [2] MALAYALAM SIGN VERTICAL BAR VIRAMA..MALAYALAM SIGN CIRCULAR VIRAMA
@ -172,7 +173,7 @@ var graphemeCodePoints = [][3]int{
{0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN {0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN
{0x0EB3, 0x0EB3, prSpacingMark}, // Lo LAO VOWEL SIGN AM {0x0EB3, 0x0EB3, prSpacingMark}, // Lo LAO VOWEL SIGN AM
{0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO {0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
{0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA {0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS {0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
{0x0F35, 0x0F35, prExtend}, // Mn TIBETAN MARK NGAS BZUNG NYI ZLA {0x0F35, 0x0F35, prExtend}, // Mn TIBETAN MARK NGAS BZUNG NYI ZLA
{0x0F37, 0x0F37, prExtend}, // Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS {0x0F37, 0x0F37, prExtend}, // Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS
@ -1336,6 +1337,7 @@ var graphemeCodePoints = [][3]int{
{0x10AE5, 0x10AE6, prExtend}, // Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW {0x10AE5, 0x10AE6, prExtend}, // Mn [2] MANICHAEAN ABBREVIATION MARK ABOVE..MANICHAEAN ABBREVIATION MARK BELOW
{0x10D24, 0x10D27, prExtend}, // Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI {0x10D24, 0x10D27, prExtend}, // Mn [4] HANIFI ROHINGYA SIGN HARBAHAY..HANIFI ROHINGYA SIGN TASSI
{0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F46, 0x10F50, prExtend}, // Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW {0x10F46, 0x10F50, prExtend}, // Mn [11] SOGDIAN COMBINING DOT BELOW..SOGDIAN COMBINING STROKE BELOW
{0x10F82, 0x10F85, prExtend}, // Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW {0x10F82, 0x10F85, prExtend}, // Mn [4] OLD UYGHUR COMBINING DOT ABOVE..OLD UYGHUR COMBINING TWO DOTS BELOW
{0x11000, 0x11000, prSpacingMark}, // Mc BRAHMI SIGN CANDRABINDU {0x11000, 0x11000, prSpacingMark}, // Mc BRAHMI SIGN CANDRABINDU
@ -1375,6 +1377,7 @@ var graphemeCodePoints = [][3]int{
{0x11235, 0x11235, prSpacingMark}, // Mc KHOJKI SIGN VIRAMA {0x11235, 0x11235, prSpacingMark}, // Mc KHOJKI SIGN VIRAMA
{0x11236, 0x11237, prExtend}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA {0x11236, 0x11237, prExtend}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
{0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN {0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN
{0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R
{0x112DF, 0x112DF, prExtend}, // Mn KHUDAWADI SIGN ANUSVARA {0x112DF, 0x112DF, prExtend}, // Mn KHUDAWADI SIGN ANUSVARA
{0x112E0, 0x112E2, prSpacingMark}, // Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II {0x112E0, 0x112E2, prSpacingMark}, // Mc [3] KHUDAWADI VOWEL SIGN AA..KHUDAWADI VOWEL SIGN II
{0x112E3, 0x112EA, prExtend}, // Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA {0x112E3, 0x112EA, prExtend}, // Mn [8] KHUDAWADI VOWEL SIGN U..KHUDAWADI SIGN VIRAMA
@ -1494,7 +1497,18 @@ var graphemeCodePoints = [][3]int{
{0x11D97, 0x11D97, prExtend}, // Mn GUNJALA GONDI VIRAMA {0x11D97, 0x11D97, prExtend}, // Mn GUNJALA GONDI VIRAMA
{0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prSpacingMark}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O {0x11EF5, 0x11EF6, prSpacingMark}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x13430, 0x13438, prControl}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT {0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prPrepend}, // Lo KAWI SIGN REPHA
{0x11F03, 0x11F03, prSpacingMark}, // Mc KAWI SIGN VISARGA
{0x11F34, 0x11F35, prSpacingMark}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prSpacingMark}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prSpacingMark}, // Mc KAWI SIGN KILLER
{0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER
{0x13430, 0x1343F, prControl}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x16AF0, 0x16AF4, prExtend}, // Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE {0x16AF0, 0x16AF4, prExtend}, // Mn [5] BASSA VAH COMBINING HIGH TONE..BASSA VAH COMBINING HIGH-LOW TONE
{0x16B30, 0x16B36, prExtend}, // Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM {0x16B30, 0x16B36, prExtend}, // Mn [7] PAHAWH HMONG MARK CIM TUB..PAHAWH HMONG MARK CIM TAUM
{0x16F4F, 0x16F4F, prExtend}, // Mn MIAO SIGN CONSONANT MODIFIER BAR {0x16F4F, 0x16F4F, prExtend}, // Mn MIAO SIGN CONSONANT MODIFIER BAR
@ -1527,9 +1541,11 @@ var graphemeCodePoints = [][3]int{
{0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA {0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E2AE, 0x1E2AE, prExtend}, // Mn TOTO SIGN RISING TONE {0x1E2AE, 0x1E2AE, prExtend}, // Mn TOTO SIGN RISING TONE
{0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E8D0, 0x1E8D6, prExtend}, // Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS {0x1E8D0, 0x1E8D6, prExtend}, // Mn [7] MENDE KIKAKUI COMBINING NUMBER TEENS..MENDE KIKAKUI COMBINING NUMBER MILLIONS
{0x1E944, 0x1E94A, prExtend}, // Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA {0x1E944, 0x1E94A, prExtend}, // Mn [7] ADLAM ALIF LENGTHENER..ADLAM NUKTA
{0x1F000, 0x1F003, prExtendedPictographic}, // E0.0 [4] (🀀..🀃) MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND {0x1F000, 0x1F003, prExtendedPictographic}, // E0.0 [4] (🀀..🀃) MAHJONG TILE EAST WIND..MAHJONG TILE NORTH WIND
@ -1780,7 +1796,8 @@ var graphemeCodePoints = [][3]int{
{0x1F6D3, 0x1F6D4, prExtendedPictographic}, // E0.0 [2] (🛓..🛔) STUPA..PAGODA {0x1F6D3, 0x1F6D4, prExtendedPictographic}, // E0.0 [2] (🛓..🛔) STUPA..PAGODA
{0x1F6D5, 0x1F6D5, prExtendedPictographic}, // E12.0 [1] (🛕) hindu temple {0x1F6D5, 0x1F6D5, prExtendedPictographic}, // E12.0 [1] (🛕) hindu temple
{0x1F6D6, 0x1F6D7, prExtendedPictographic}, // E13.0 [2] (🛖..🛗) hut..elevator {0x1F6D6, 0x1F6D7, prExtendedPictographic}, // E13.0 [2] (🛖..🛗) hut..elevator
{0x1F6D8, 0x1F6DC, prExtendedPictographic}, // E0.0 [5] (🛘..🛜) <reserved-1F6D8>..<reserved-1F6DC> {0x1F6D8, 0x1F6DB, prExtendedPictographic}, // E0.0 [4] (🛘..🛛) <reserved-1F6D8>..<reserved-1F6DB>
{0x1F6DC, 0x1F6DC, prExtendedPictographic}, // E15.0 [1] (🛜) wireless
{0x1F6DD, 0x1F6DF, prExtendedPictographic}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy {0x1F6DD, 0x1F6DF, prExtendedPictographic}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy
{0x1F6E0, 0x1F6E5, prExtendedPictographic}, // E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat {0x1F6E0, 0x1F6E5, prExtendedPictographic}, // E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat
{0x1F6E6, 0x1F6E8, prExtendedPictographic}, // E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE {0x1F6E6, 0x1F6E8, prExtendedPictographic}, // E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE
@ -1797,7 +1814,7 @@ var graphemeCodePoints = [][3]int{
{0x1F6FA, 0x1F6FA, prExtendedPictographic}, // E12.0 [1] (🛺) auto rickshaw {0x1F6FA, 0x1F6FA, prExtendedPictographic}, // E12.0 [1] (🛺) auto rickshaw
{0x1F6FB, 0x1F6FC, prExtendedPictographic}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate {0x1F6FB, 0x1F6FC, prExtendedPictographic}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate
{0x1F6FD, 0x1F6FF, prExtendedPictographic}, // E0.0 [3] (🛽..🛿) <reserved-1F6FD>..<reserved-1F6FF> {0x1F6FD, 0x1F6FF, prExtendedPictographic}, // E0.0 [3] (🛽..🛿) <reserved-1F6FD>..<reserved-1F6FF>
{0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) <reserved-1F774>..<reserved-1F77F> {0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) LOT OF FORTUNE..ORCUS
{0x1F7D5, 0x1F7DF, prExtendedPictographic}, // E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<reserved-1F7DF> {0x1F7D5, 0x1F7DF, prExtendedPictographic}, // E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<reserved-1F7DF>
{0x1F7E0, 0x1F7EB, prExtendedPictographic}, // E12.0 [12] (🟠..🟫) orange circle..brown square {0x1F7E0, 0x1F7EB, prExtendedPictographic}, // E12.0 [12] (🟠..🟫) orange circle..brown square
{0x1F7EC, 0x1F7EF, prExtendedPictographic}, // E0.0 [4] (🟬..🟯) <reserved-1F7EC>..<reserved-1F7EF> {0x1F7EC, 0x1F7EF, prExtendedPictographic}, // E0.0 [4] (🟬..🟯) <reserved-1F7EC>..<reserved-1F7EF>
@ -1856,30 +1873,37 @@ var graphemeCodePoints = [][3]int{
{0x1FA00, 0x1FA6F, prExtendedPictographic}, // E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING..<reserved-1FA6F> {0x1FA00, 0x1FA6F, prExtendedPictographic}, // E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING..<reserved-1FA6F>
{0x1FA70, 0x1FA73, prExtendedPictographic}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts {0x1FA70, 0x1FA73, prExtendedPictographic}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts
{0x1FA74, 0x1FA74, prExtendedPictographic}, // E13.0 [1] (🩴) thong sandal {0x1FA74, 0x1FA74, prExtendedPictographic}, // E13.0 [1] (🩴) thong sandal
{0x1FA75, 0x1FA77, prExtendedPictographic}, // E0.0 [3] (🩵..🩷) <reserved-1FA75>..<reserved-1FA77> {0x1FA75, 0x1FA77, prExtendedPictographic}, // E15.0 [3] (🩵..🩷) light blue heart..pink heart
{0x1FA78, 0x1FA7A, prExtendedPictographic}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope {0x1FA78, 0x1FA7A, prExtendedPictographic}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope
{0x1FA7B, 0x1FA7C, prExtendedPictographic}, // E14.0 [2] (🩻..🩼) x-ray..crutch {0x1FA7B, 0x1FA7C, prExtendedPictographic}, // E14.0 [2] (🩻..🩼) x-ray..crutch
{0x1FA7D, 0x1FA7F, prExtendedPictographic}, // E0.0 [3] (🩽..🩿) <reserved-1FA7D>..<reserved-1FA7F> {0x1FA7D, 0x1FA7F, prExtendedPictographic}, // E0.0 [3] (🩽..🩿) <reserved-1FA7D>..<reserved-1FA7F>
{0x1FA80, 0x1FA82, prExtendedPictographic}, // E12.0 [3] (🪀..🪂) yo-yo..parachute {0x1FA80, 0x1FA82, prExtendedPictographic}, // E12.0 [3] (🪀..🪂) yo-yo..parachute
{0x1FA83, 0x1FA86, prExtendedPictographic}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls {0x1FA83, 0x1FA86, prExtendedPictographic}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls
{0x1FA87, 0x1FA8F, prExtendedPictographic}, // E0.0 [9] (🪇..🪏) <reserved-1FA87>..<reserved-1FA8F> {0x1FA87, 0x1FA88, prExtendedPictographic}, // E15.0 [2] (🪇..🪈) maracas..flute
{0x1FA89, 0x1FA8F, prExtendedPictographic}, // E0.0 [7] (🪉..🪏) <reserved-1FA89>..<reserved-1FA8F>
{0x1FA90, 0x1FA95, prExtendedPictographic}, // E12.0 [6] (🪐..🪕) ringed planet..banjo {0x1FA90, 0x1FA95, prExtendedPictographic}, // E12.0 [6] (🪐..🪕) ringed planet..banjo
{0x1FA96, 0x1FAA8, prExtendedPictographic}, // E13.0 [19] (🪖..🪨) military helmet..rock {0x1FA96, 0x1FAA8, prExtendedPictographic}, // E13.0 [19] (🪖..🪨) military helmet..rock
{0x1FAA9, 0x1FAAC, prExtendedPictographic}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa {0x1FAA9, 0x1FAAC, prExtendedPictographic}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa
{0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E0.0 [3] (🪭..🪯) <reserved-1FAAD>..<reserved-1FAAF> {0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E15.0 [3] (🪭..🪯) folding hand fan..khanda
{0x1FAB0, 0x1FAB6, prExtendedPictographic}, // E13.0 [7] (🪰..🪶) fly..feather {0x1FAB0, 0x1FAB6, prExtendedPictographic}, // E13.0 [7] (🪰..🪶) fly..feather
{0x1FAB7, 0x1FABA, prExtendedPictographic}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs {0x1FAB7, 0x1FABA, prExtendedPictographic}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs
{0x1FABB, 0x1FABF, prExtendedPictographic}, // E0.0 [5] (🪻..🪿) <reserved-1FABB>..<reserved-1FABF> {0x1FABB, 0x1FABD, prExtendedPictographic}, // E15.0 [3] (🪻..🪽) hyacinth..wing
{0x1FABE, 0x1FABE, prExtendedPictographic}, // E0.0 [1] (🪾) <reserved-1FABE>
{0x1FABF, 0x1FABF, prExtendedPictographic}, // E15.0 [1] (🪿) goose
{0x1FAC0, 0x1FAC2, prExtendedPictographic}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging {0x1FAC0, 0x1FAC2, prExtendedPictographic}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging
{0x1FAC3, 0x1FAC5, prExtendedPictographic}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown {0x1FAC3, 0x1FAC5, prExtendedPictographic}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown
{0x1FAC6, 0x1FACF, prExtendedPictographic}, // E0.0 [10] (🫆..🫏) <reserved-1FAC6>..<reserved-1FACF> {0x1FAC6, 0x1FACD, prExtendedPictographic}, // E0.0 [8] (🫆..🫍) <reserved-1FAC6>..<reserved-1FACD>
{0x1FACE, 0x1FACF, prExtendedPictographic}, // E15.0 [2] (🫎..🫏) moose..donkey
{0x1FAD0, 0x1FAD6, prExtendedPictographic}, // E13.0 [7] (🫐..🫖) blueberries..teapot {0x1FAD0, 0x1FAD6, prExtendedPictographic}, // E13.0 [7] (🫐..🫖) blueberries..teapot
{0x1FAD7, 0x1FAD9, prExtendedPictographic}, // E14.0 [3] (🫗..🫙) pouring liquid..jar {0x1FAD7, 0x1FAD9, prExtendedPictographic}, // E14.0 [3] (🫗..🫙) pouring liquid..jar
{0x1FADA, 0x1FADF, prExtendedPictographic}, // E0.0 [6] (🫚..🫟) <reserved-1FADA>..<reserved-1FADF> {0x1FADA, 0x1FADB, prExtendedPictographic}, // E15.0 [2] (🫚..🫛) ginger root..pea pod
{0x1FADC, 0x1FADF, prExtendedPictographic}, // E0.0 [4] (🫜..🫟) <reserved-1FADC>..<reserved-1FADF>
{0x1FAE0, 0x1FAE7, prExtendedPictographic}, // E14.0 [8] (🫠..🫧) melting face..bubbles {0x1FAE0, 0x1FAE7, prExtendedPictographic}, // E14.0 [8] (🫠..🫧) melting face..bubbles
{0x1FAE8, 0x1FAEF, prExtendedPictographic}, // E0.0 [8] (🫨..🫯) <reserved-1FAE8>..<reserved-1FAEF> {0x1FAE8, 0x1FAE8, prExtendedPictographic}, // E15.0 [1] (🫨) shaking face
{0x1FAE9, 0x1FAEF, prExtendedPictographic}, // E0.0 [7] (🫩..🫯) <reserved-1FAE9>..<reserved-1FAEF>
{0x1FAF0, 0x1FAF6, prExtendedPictographic}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands {0x1FAF0, 0x1FAF6, prExtendedPictographic}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
{0x1FAF7, 0x1FAFF, prExtendedPictographic}, // E0.0 [9] (🫷..🫿) <reserved-1FAF7>..<reserved-1FAFF> {0x1FAF7, 0x1FAF8, prExtendedPictographic}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand
{0x1FAF9, 0x1FAFF, prExtendedPictographic}, // E0.0 [7] (🫹..🫿) <reserved-1FAF9>..<reserved-1FAFF>
{0x1FC00, 0x1FFFD, prExtendedPictographic}, // E0.0[1022] (🰀..🿽) <reserved-1FC00>..<reserved-1FFFD> {0x1FC00, 0x1FFFD, prExtendedPictographic}, // E0.0[1022] (🰀..🿽) <reserved-1FC00>..<reserved-1FFFD>
{0xE0000, 0xE0000, prControl}, // Cn <reserved-E0000> {0xE0000, 0xE0000, prControl}, // Cn <reserved-E0000>
{0xE0001, 0xE0001, prControl}, // Cf LANGUAGE TAG {0xE0001, 0xE0001, prControl}, // Cf LANGUAGE TAG

View file

@ -21,11 +21,12 @@ const (
grBoundary grBoundary
) )
// The grapheme cluster parser's state transitions. Maps (state, property) to // grTransitions implements the grapheme cluster parser's state transitions.
// (new state, breaking instruction, rule number). The breaking instruction // Maps state and property to a new state, a breaking instruction, and rule
// always refers to the boundary between the last and next code point. // number. The breaking instruction always refers to the boundary between the
// last and next code point. Returns negative values if no transition is found.
// //
// This map is queried as follows: // This function is used as follows:
// //
// 1. Find specific state + specific property. Stop if found. // 1. Find specific state + specific property. Stop if found.
// 2. Find specific state + any property. // 2. Find specific state + any property.
@ -36,59 +37,96 @@ const (
// are equal. Stop. // are equal. Stop.
// 6. Assume grAny and grBoundary. // 6. Assume grAny and grBoundary.
// //
// Unicode version 14.0.0. // Unicode version 15.0.0.
var grTransitions = map[[2]int][3]int{ func grTransitions(state, prop int) (newState int, newProp int, boundary int) {
// It turns out that using a big switch statement is much faster than using
// a map.
switch uint64(state) | uint64(prop)<<32 {
// GB5 // GB5
{grAny, prCR}: {grCR, grBoundary, 50}, case grAny | prCR<<32:
{grAny, prLF}: {grControlLF, grBoundary, 50}, return grCR, grBoundary, 50
{grAny, prControl}: {grControlLF, grBoundary, 50}, case grAny | prLF<<32:
return grControlLF, grBoundary, 50
case grAny | prControl<<32:
return grControlLF, grBoundary, 50
// GB4 // GB4
{grCR, prAny}: {grAny, grBoundary, 40}, case grCR | prAny<<32:
{grControlLF, prAny}: {grAny, grBoundary, 40}, return grAny, grBoundary, 40
case grControlLF | prAny<<32:
return grAny, grBoundary, 40
// GB3. // GB3
{grCR, prLF}: {grControlLF, grNoBoundary, 30}, case grCR | prLF<<32:
return grControlLF, grNoBoundary, 30
// GB6. // GB6
{grAny, prL}: {grL, grBoundary, 9990}, case grAny | prL<<32:
{grL, prL}: {grL, grNoBoundary, 60}, return grL, grBoundary, 9990
{grL, prV}: {grLVV, grNoBoundary, 60}, case grL | prL<<32:
{grL, prLV}: {grLVV, grNoBoundary, 60}, return grL, grNoBoundary, 60
{grL, prLVT}: {grLVTT, grNoBoundary, 60}, case grL | prV<<32:
return grLVV, grNoBoundary, 60
case grL | prLV<<32:
return grLVV, grNoBoundary, 60
case grL | prLVT<<32:
return grLVTT, grNoBoundary, 60
// GB7. // GB7
{grAny, prLV}: {grLVV, grBoundary, 9990}, case grAny | prLV<<32:
{grAny, prV}: {grLVV, grBoundary, 9990}, return grLVV, grBoundary, 9990
{grLVV, prV}: {grLVV, grNoBoundary, 70}, case grAny | prV<<32:
{grLVV, prT}: {grLVTT, grNoBoundary, 70}, return grLVV, grBoundary, 9990
case grLVV | prV<<32:
return grLVV, grNoBoundary, 70
case grLVV | prT<<32:
return grLVTT, grNoBoundary, 70
// GB8. // GB8
{grAny, prLVT}: {grLVTT, grBoundary, 9990}, case grAny | prLVT<<32:
{grAny, prT}: {grLVTT, grBoundary, 9990}, return grLVTT, grBoundary, 9990
{grLVTT, prT}: {grLVTT, grNoBoundary, 80}, case grAny | prT<<32:
return grLVTT, grBoundary, 9990
case grLVTT | prT<<32:
return grLVTT, grNoBoundary, 80
// GB9. // GB9
{grAny, prExtend}: {grAny, grNoBoundary, 90}, case grAny | prExtend<<32:
{grAny, prZWJ}: {grAny, grNoBoundary, 90}, return grAny, grNoBoundary, 90
case grAny | prZWJ<<32:
return grAny, grNoBoundary, 90
// GB9a. // GB9a
{grAny, prSpacingMark}: {grAny, grNoBoundary, 91}, case grAny | prSpacingMark<<32:
return grAny, grNoBoundary, 91
// GB9b. // GB9b
{grAny, prPrepend}: {grPrepend, grBoundary, 9990}, case grAny | prPrepend<<32:
{grPrepend, prAny}: {grAny, grNoBoundary, 92}, return grPrepend, grBoundary, 9990
case grPrepend | prAny<<32:
return grAny, grNoBoundary, 92
// GB11. // GB11
{grAny, prExtendedPictographic}: {grExtendedPictographic, grBoundary, 9990}, case grAny | prExtendedPictographic<<32:
{grExtendedPictographic, prExtend}: {grExtendedPictographic, grNoBoundary, 110}, return grExtendedPictographic, grBoundary, 9990
{grExtendedPictographic, prZWJ}: {grExtendedPictographicZWJ, grNoBoundary, 110}, case grExtendedPictographic | prExtend<<32:
{grExtendedPictographicZWJ, prExtendedPictographic}: {grExtendedPictographic, grNoBoundary, 110}, return grExtendedPictographic, grNoBoundary, 110
case grExtendedPictographic | prZWJ<<32:
return grExtendedPictographicZWJ, grNoBoundary, 110
case grExtendedPictographicZWJ | prExtendedPictographic<<32:
return grExtendedPictographic, grNoBoundary, 110
// GB12 / GB13. // GB12 / GB13
{grAny, prRegionalIndicator}: {grRIOdd, grBoundary, 9990}, case grAny | prRegionalIndicator<<32:
{grRIOdd, prRegionalIndicator}: {grRIEven, grNoBoundary, 120}, return grRIOdd, grBoundary, 9990
{grRIEven, prRegionalIndicator}: {grRIOdd, grBoundary, 120}, case grRIOdd | prRegionalIndicator<<32:
return grRIEven, grNoBoundary, 120
case grRIEven | prRegionalIndicator<<32:
return grRIOdd, grBoundary, 120
default:
return -1, -1, -1
}
} }
// transitionGraphemeState determines the new state of the grapheme cluster // transitionGraphemeState determines the new state of the grapheme cluster
@ -97,40 +135,40 @@ var grTransitions = map[[2]int][3]int{
// table) and whether a cluster boundary was detected. // table) and whether a cluster boundary was detected.
func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) { func transitionGraphemeState(state int, r rune) (newState, prop int, boundary bool) {
// Determine the property of the next character. // Determine the property of the next character.
prop = property(graphemeCodePoints, r) prop = propertyGraphemes(r)
// Find the applicable transition. // Find the applicable transition.
transition, ok := grTransitions[[2]int{state, prop}] nextState, nextProp, _ := grTransitions(state, prop)
if ok { if nextState >= 0 {
// We have a specific transition. We'll use it. // We have a specific transition. We'll use it.
return transition[0], prop, transition[1] == grBoundary return nextState, prop, nextProp == grBoundary
} }
// No specific transition found. Try the less specific ones. // No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := grTransitions[[2]int{state, prAny}] anyPropState, anyPropProp, anyPropRule := grTransitions(state, prAny)
transAnyState, okAnyState := grTransitions[[2]int{grAny, prop}] anyStateState, anyStateProp, anyStateRule := grTransitions(grAny, prop)
if okAnyProp && okAnyState { if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions). // Both apply. We'll use a mix (see comments for grTransitions).
newState = transAnyState[0] newState = anyStateState
boundary = transAnyState[1] == grBoundary boundary = anyStateProp == grBoundary
if transAnyProp[2] < transAnyState[2] { if anyPropRule < anyStateRule {
boundary = transAnyProp[1] == grBoundary boundary = anyPropProp == grBoundary
} }
return return
} }
if okAnyProp { if anyPropState >= 0 {
// We only have a specific state. // We only have a specific state.
return transAnyProp[0], prop, transAnyProp[1] == grBoundary return anyPropState, prop, anyPropProp == grBoundary
// This branch will probably never be reached because okAnyState will // This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here // always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be // for future modifications to the transition map where this may not be
// true anymore. // true anymore.
} }
if okAnyState { if anyStateState >= 0 {
// We only have a specific property. // We only have a specific property.
return transAnyState[0], prop, transAnyState[1] == grBoundary return anyStateState, prop, anyStateProp == grBoundary
} }
// No known transition. GB999: Any ÷ Any. // No known transition. GB999: Any ÷ Any.

View file

@ -80,7 +80,7 @@ func FirstLineSegment(b []byte, state int) (segment, rest []byte, mustBreak bool
} }
} }
// FirstLineSegmentInString is like FirstLineSegment() but its input and outputs // FirstLineSegmentInString is like [FirstLineSegment] but its input and outputs
// are strings. // are strings.
func FirstLineSegmentInString(str string, state int) (segment, rest string, mustBreak bool, newState int) { func FirstLineSegmentInString(str string, state int) (segment, rest string, mustBreak bool, newState int) {
// An empty byte slice returns nothing. // An empty byte slice returns nothing.
@ -122,13 +122,13 @@ func FirstLineSegmentInString(str string, state int) (segment, rest string, must
// [UAX #14]: https://www.unicode.org/reports/tr14/#Algorithm // [UAX #14]: https://www.unicode.org/reports/tr14/#Algorithm
func HasTrailingLineBreak(b []byte) bool { func HasTrailingLineBreak(b []byte) bool {
r, _ := utf8.DecodeLastRune(b) r, _ := utf8.DecodeLastRune(b)
property, _ := propertyWithGenCat(lineBreakCodePoints, r) property, _ := propertyLineBreak(r)
return property == lbBK || property == lbCR || property == lbLF || property == lbNL return property == prBK || property == prCR || property == prLF || property == prNL
} }
// HasTrailingLineBreakInString is like [HasTrailingLineBreak] but for a string. // HasTrailingLineBreakInString is like [HasTrailingLineBreak] but for a string.
func HasTrailingLineBreakInString(str string) bool { func HasTrailingLineBreakInString(str string) bool {
r, _ := utf8.DecodeLastRuneInString(str) r, _ := utf8.DecodeLastRuneInString(str)
property, _ := propertyWithGenCat(lineBreakCodePoints, r) property, _ := propertyLineBreak(r)
return property == lbBK || property == lbCR || property == lbLF || property == lbNL return property == prBK || property == prCR || property == prLF || property == prNL
} }

View file

@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT. // Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// lineBreakCodePoints are taken from // lineBreakCodePoints are taken from
// https://www.unicode.org/Public/14.0.0/ucd/LineBreak.txt // https://www.unicode.org/Public/15.0.0/ucd/LineBreak.txt
// and // and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt // https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only) // ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement. // license agreement.
var lineBreakCodePoints = [][4]int{ var lineBreakCodePoints = [][4]int{
{0x0000, 0x0008, prCM, gcCc}, // [9] <control-0000>..<control-0008> {0x0000, 0x0008, prCM, gcCc}, // [9] <control-0000>..<control-0008>
@ -439,6 +439,7 @@ var lineBreakCodePoints = [][4]int{
{0x0CE2, 0x0CE3, prCM, gcMn}, // [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE2, 0x0CE3, prCM, gcMn}, // [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CE6, 0x0CEF, prNU, gcNd}, // [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE {0x0CE6, 0x0CEF, prNU, gcNd}, // [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
{0x0CF1, 0x0CF2, prAL, gcLo}, // [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA {0x0CF1, 0x0CF2, prAL, gcLo}, // [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
{0x0CF3, 0x0CF3, prCM, gcMc}, // KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prCM, gcMn}, // [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D00, 0x0D01, prCM, gcMn}, // [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prCM, gcMc}, // [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D02, 0x0D03, prCM, gcMc}, // [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D04, 0x0D0C, prAL, gcLo}, // [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L {0x0D04, 0x0D0C, prAL, gcLo}, // [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@ -500,7 +501,7 @@ var lineBreakCodePoints = [][4]int{
{0x0EBD, 0x0EBD, prSA, gcLo}, // LAO SEMIVOWEL SIGN NYO {0x0EBD, 0x0EBD, prSA, gcLo}, // LAO SEMIVOWEL SIGN NYO
{0x0EC0, 0x0EC4, prSA, gcLo}, // [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI {0x0EC0, 0x0EC4, prSA, gcLo}, // [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
{0x0EC6, 0x0EC6, prSA, gcLm}, // LAO KO LA {0x0EC6, 0x0EC6, prSA, gcLm}, // LAO KO LA
{0x0EC8, 0x0ECD, prSA, gcMn}, // [6] LAO TONE MAI EK..LAO NIGGAHITA {0x0EC8, 0x0ECE, prSA, gcMn}, // [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0ED0, 0x0ED9, prNU, gcNd}, // [10] LAO DIGIT ZERO..LAO DIGIT NINE {0x0ED0, 0x0ED9, prNU, gcNd}, // [10] LAO DIGIT ZERO..LAO DIGIT NINE
{0x0EDC, 0x0EDF, prSA, gcLo}, // [4] LAO HO NO..LAO LETTER KHMU NYO {0x0EDC, 0x0EDF, prSA, gcLo}, // [4] LAO HO NO..LAO LETTER KHMU NYO
{0x0F00, 0x0F00, prAL, gcLo}, // TIBETAN SYLLABLE OM {0x0F00, 0x0F00, prAL, gcLo}, // TIBETAN SYLLABLE OM
@ -813,7 +814,11 @@ var lineBreakCodePoints = [][4]int{
{0x1D79, 0x1D7F, prAL, gcLl}, // [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE {0x1D79, 0x1D7F, prAL, gcLl}, // [7] LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER UPSILON WITH STROKE
{0x1D80, 0x1D9A, prAL, gcLl}, // [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK {0x1D80, 0x1D9A, prAL, gcLl}, // [27] LATIN SMALL LETTER B WITH PALATAL HOOK..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
{0x1D9B, 0x1DBF, prAL, gcLm}, // [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA {0x1D9B, 0x1DBF, prAL, gcLm}, // [37] MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
{0x1DC0, 0x1DFF, prCM, gcMn}, // [64] COMBINING DOTTED GRAVE ACCENT..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW {0x1DC0, 0x1DCC, prCM, gcMn}, // [13] COMBINING DOTTED GRAVE ACCENT..COMBINING MACRON-BREVE
{0x1DCD, 0x1DCD, prGL, gcMn}, // COMBINING DOUBLE CIRCUMFLEX ABOVE
{0x1DCE, 0x1DFB, prCM, gcMn}, // [46] COMBINING OGONEK ABOVE..COMBINING DELETION MARK
{0x1DFC, 0x1DFC, prGL, gcMn}, // COMBINING DOUBLE INVERTED BREVE BELOW
{0x1DFD, 0x1DFF, prCM, gcMn}, // [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
{0x1E00, 0x1EFF, prAL, gcLC}, // [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP {0x1E00, 0x1EFF, prAL, gcLC}, // [256] LATIN CAPITAL LETTER A WITH RING BELOW..LATIN SMALL LETTER Y WITH LOOP
{0x1F00, 0x1F15, prAL, gcLC}, // [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA {0x1F00, 0x1F15, prAL, gcLC}, // [22] GREEK SMALL LETTER ALPHA WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
{0x1F18, 0x1F1D, prAL, gcLu}, // [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA {0x1F18, 0x1F1D, prAL, gcLu}, // [6] GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
@ -889,7 +894,7 @@ var lineBreakCodePoints = [][4]int{
{0x2054, 0x2054, prAL, gcPc}, // INVERTED UNDERTIE {0x2054, 0x2054, prAL, gcPc}, // INVERTED UNDERTIE
{0x2055, 0x2055, prAL, gcPo}, // FLOWER PUNCTUATION MARK {0x2055, 0x2055, prAL, gcPo}, // FLOWER PUNCTUATION MARK
{0x2056, 0x2056, prBA, gcPo}, // THREE DOT PUNCTUATION {0x2056, 0x2056, prBA, gcPo}, // THREE DOT PUNCTUATION
{0x2057, 0x2057, prAL, gcPo}, // QUADRUPLE PRIME {0x2057, 0x2057, prPO, gcPo}, // QUADRUPLE PRIME
{0x2058, 0x205B, prBA, gcPo}, // [4] FOUR DOT PUNCTUATION..FOUR DOT MARK {0x2058, 0x205B, prBA, gcPo}, // [4] FOUR DOT PUNCTUATION..FOUR DOT MARK
{0x205C, 0x205C, prAL, gcPo}, // DOTTED CROSS {0x205C, 0x205C, prAL, gcPo}, // DOTTED CROSS
{0x205D, 0x205E, prBA, gcPo}, // [2] TRICOLON..VERTICAL FOUR DOTS {0x205D, 0x205E, prBA, gcPo}, // [2] TRICOLON..VERTICAL FOUR DOTS
@ -2751,6 +2756,7 @@ var lineBreakCodePoints = [][4]int{
{0x10EAB, 0x10EAC, prCM, gcMn}, // [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EAB, 0x10EAC, prCM, gcMn}, // [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EAD, 0x10EAD, prBA, gcPd}, // YEZIDI HYPHENATION MARK {0x10EAD, 0x10EAD, prBA, gcPd}, // YEZIDI HYPHENATION MARK
{0x10EB0, 0x10EB1, prAL, gcLo}, // [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE {0x10EB0, 0x10EB1, prAL, gcLo}, // [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
{0x10EFD, 0x10EFF, prCM, gcMn}, // [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F00, 0x10F1C, prAL, gcLo}, // [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL {0x10F00, 0x10F1C, prAL, gcLo}, // [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
{0x10F1D, 0x10F26, prAL, gcNo}, // [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF {0x10F1D, 0x10F26, prAL, gcNo}, // [10] OLD SOGDIAN NUMBER ONE..OLD SOGDIAN FRACTION ONE HALF
{0x10F27, 0x10F27, prAL, gcLo}, // OLD SOGDIAN LIGATURE AYIN-DALETH {0x10F27, 0x10F27, prAL, gcLo}, // OLD SOGDIAN LIGATURE AYIN-DALETH
@ -2840,6 +2846,8 @@ var lineBreakCodePoints = [][4]int{
{0x1123B, 0x1123C, prBA, gcPo}, // [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK {0x1123B, 0x1123C, prBA, gcPo}, // [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
{0x1123D, 0x1123D, prAL, gcPo}, // KHOJKI ABBREVIATION SIGN {0x1123D, 0x1123D, prAL, gcPo}, // KHOJKI ABBREVIATION SIGN
{0x1123E, 0x1123E, prCM, gcMn}, // KHOJKI SIGN SUKUN {0x1123E, 0x1123E, prCM, gcMn}, // KHOJKI SIGN SUKUN
{0x1123F, 0x11240, prAL, gcLo}, // [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
{0x11241, 0x11241, prCM, gcMn}, // KHOJKI VOWEL SIGN VOCALIC R
{0x11280, 0x11286, prAL, gcLo}, // [7] MULTANI LETTER A..MULTANI LETTER GA {0x11280, 0x11286, prAL, gcLo}, // [7] MULTANI LETTER A..MULTANI LETTER GA
{0x11288, 0x11288, prAL, gcLo}, // MULTANI LETTER GHA {0x11288, 0x11288, prAL, gcLo}, // MULTANI LETTER GHA
{0x1128A, 0x1128D, prAL, gcLo}, // [4] MULTANI LETTER CA..MULTANI LETTER JJA {0x1128A, 0x1128D, prAL, gcLo}, // [4] MULTANI LETTER CA..MULTANI LETTER JJA
@ -3013,6 +3021,7 @@ var lineBreakCodePoints = [][4]int{
{0x11AA1, 0x11AA2, prBA, gcPo}, // [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2 {0x11AA1, 0x11AA2, prBA, gcPo}, // [2] SOYOMBO TERMINAL MARK-1..SOYOMBO TERMINAL MARK-2
{0x11AB0, 0x11ABF, prAL, gcLo}, // [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA {0x11AB0, 0x11ABF, prAL, gcLo}, // [16] CANADIAN SYLLABICS NATTILIK HI..CANADIAN SYLLABICS SPA
{0x11AC0, 0x11AF8, prAL, gcLo}, // [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL {0x11AC0, 0x11AF8, prAL, gcLo}, // [57] PAU CIN HAU LETTER PA..PAU CIN HAU GLOTTAL STOP FINAL
{0x11B00, 0x11B09, prBB, gcPo}, // [10] DEVANAGARI HEAD MARK..DEVANAGARI SIGN MINDU
{0x11C00, 0x11C08, prAL, gcLo}, // [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L {0x11C00, 0x11C08, prAL, gcLo}, // [9] BHAIKSUKI LETTER A..BHAIKSUKI LETTER VOCALIC L
{0x11C0A, 0x11C2E, prAL, gcLo}, // [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA {0x11C0A, 0x11C2E, prAL, gcLo}, // [37] BHAIKSUKI LETTER E..BHAIKSUKI LETTER HA
{0x11C2F, 0x11C2F, prCM, gcMc}, // BHAIKSUKI VOWEL SIGN AA {0x11C2F, 0x11C2F, prCM, gcMc}, // BHAIKSUKI VOWEL SIGN AA
@ -3059,6 +3068,20 @@ var lineBreakCodePoints = [][4]int{
{0x11EF3, 0x11EF4, prCM, gcMn}, // [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF3, 0x11EF4, prCM, gcMn}, // [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prCM, gcMc}, // [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O {0x11EF5, 0x11EF6, prCM, gcMc}, // [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x11EF7, 0x11EF8, prAL, gcPo}, // [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION {0x11EF7, 0x11EF8, prAL, gcPo}, // [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
{0x11F00, 0x11F01, prCM, gcMn}, // [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prAL, gcLo}, // KAWI SIGN REPHA
{0x11F03, 0x11F03, prCM, gcMc}, // KAWI SIGN VISARGA
{0x11F04, 0x11F10, prAL, gcLo}, // [13] KAWI LETTER A..KAWI LETTER O
{0x11F12, 0x11F33, prAL, gcLo}, // [34] KAWI LETTER KA..KAWI LETTER JNYA
{0x11F34, 0x11F35, prCM, gcMc}, // [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prCM, gcMn}, // [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prCM, gcMc}, // [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prCM, gcMn}, // KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prCM, gcMc}, // KAWI SIGN KILLER
{0x11F42, 0x11F42, prCM, gcMn}, // KAWI CONJOINER
{0x11F43, 0x11F44, prBA, gcPo}, // [2] KAWI DANDA..KAWI DOUBLE DANDA
{0x11F45, 0x11F4F, prID, gcPo}, // [11] KAWI PUNCTUATION SECTION MARKER..KAWI PUNCTUATION CLOSING SPIRAL
{0x11F50, 0x11F59, prNU, gcNd}, // [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
{0x11FB0, 0x11FB0, prAL, gcLo}, // LISU LETTER YHA {0x11FB0, 0x11FB0, prAL, gcLo}, // LISU LETTER YHA
{0x11FC0, 0x11FD4, prAL, gcNo}, // [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH {0x11FC0, 0x11FD4, prAL, gcNo}, // [21] TAMIL FRACTION ONE THREE-HUNDRED-AND-TWENTIETH..TAMIL FRACTION DOWNSCALING FACTOR KIIZH
{0x11FD5, 0x11FDC, prAL, gcSo}, // [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI {0x11FD5, 0x11FDC, prAL, gcSo}, // [8] TAMIL SIGN NEL..TAMIL SIGN MUKKURUNI
@ -3084,10 +3107,18 @@ var lineBreakCodePoints = [][4]int{
{0x1328A, 0x13378, prAL, gcLo}, // [239] EGYPTIAN HIEROGLYPH O037..EGYPTIAN HIEROGLYPH V011 {0x1328A, 0x13378, prAL, gcLo}, // [239] EGYPTIAN HIEROGLYPH O037..EGYPTIAN HIEROGLYPH V011
{0x13379, 0x13379, prOP, gcLo}, // EGYPTIAN HIEROGLYPH V011A {0x13379, 0x13379, prOP, gcLo}, // EGYPTIAN HIEROGLYPH V011A
{0x1337A, 0x1337B, prCL, gcLo}, // [2] EGYPTIAN HIEROGLYPH V011B..EGYPTIAN HIEROGLYPH V011C {0x1337A, 0x1337B, prCL, gcLo}, // [2] EGYPTIAN HIEROGLYPH V011B..EGYPTIAN HIEROGLYPH V011C
{0x1337C, 0x1342E, prAL, gcLo}, // [179] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH AA032 {0x1337C, 0x1342F, prAL, gcLo}, // [180] EGYPTIAN HIEROGLYPH V012..EGYPTIAN HIEROGLYPH V011D
{0x13430, 0x13436, prGL, gcCf}, // [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE {0x13430, 0x13436, prGL, gcCf}, // [7] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH OVERLAY MIDDLE
{0x13437, 0x13437, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN SEGMENT {0x13437, 0x13437, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN SEGMENT
{0x13438, 0x13438, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END SEGMENT {0x13438, 0x13438, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END SEGMENT
{0x13439, 0x1343B, prGL, gcCf}, // [3] EGYPTIAN HIEROGLYPH INSERT AT MIDDLE..EGYPTIAN HIEROGLYPH INSERT AT BOTTOM
{0x1343C, 0x1343C, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN ENCLOSURE
{0x1343D, 0x1343D, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END ENCLOSURE
{0x1343E, 0x1343E, prOP, gcCf}, // EGYPTIAN HIEROGLYPH BEGIN WALLED ENCLOSURE
{0x1343F, 0x1343F, prCL, gcCf}, // EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prCM, gcMn}, // EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13441, 0x13446, prAL, gcLo}, // [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
{0x13447, 0x13455, prCM, gcMn}, // [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x14400, 0x145CD, prAL, gcLo}, // [462] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A409 {0x14400, 0x145CD, prAL, gcLo}, // [462] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A409
{0x145CE, 0x145CE, prOP, gcLo}, // ANATOLIAN HIEROGLYPH A410 BEGIN LOGOGRAM MARK {0x145CE, 0x145CE, prOP, gcLo}, // ANATOLIAN HIEROGLYPH A410 BEGIN LOGOGRAM MARK
{0x145CF, 0x145CF, prCL, gcLo}, // ANATOLIAN HIEROGLYPH A410A END LOGOGRAM MARK {0x145CF, 0x145CF, prCL, gcLo}, // ANATOLIAN HIEROGLYPH A410A END LOGOGRAM MARK
@ -3137,7 +3168,9 @@ var lineBreakCodePoints = [][4]int{
{0x1AFFD, 0x1AFFE, prAL, gcLm}, // [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 {0x1AFFD, 0x1AFFE, prAL, gcLm}, // [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
{0x1B000, 0x1B0FF, prID, gcLo}, // [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2 {0x1B000, 0x1B0FF, prID, gcLo}, // [256] KATAKANA LETTER ARCHAIC E..HENTAIGANA LETTER RE-2
{0x1B100, 0x1B122, prID, gcLo}, // [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU {0x1B100, 0x1B122, prID, gcLo}, // [35] HENTAIGANA LETTER RE-3..KATAKANA LETTER ARCHAIC WU
{0x1B132, 0x1B132, prCJ, gcLo}, // HIRAGANA LETTER SMALL KO
{0x1B150, 0x1B152, prCJ, gcLo}, // [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO {0x1B150, 0x1B152, prCJ, gcLo}, // [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
{0x1B155, 0x1B155, prCJ, gcLo}, // KATAKANA LETTER SMALL KO
{0x1B164, 0x1B167, prCJ, gcLo}, // [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N {0x1B164, 0x1B167, prCJ, gcLo}, // [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
{0x1B170, 0x1B2FB, prID, gcLo}, // [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB {0x1B170, 0x1B2FB, prID, gcLo}, // [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
{0x1BC00, 0x1BC6A, prAL, gcLo}, // [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M {0x1BC00, 0x1BC6A, prAL, gcLo}, // [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
@ -3168,6 +3201,7 @@ var lineBreakCodePoints = [][4]int{
{0x1D200, 0x1D241, prAL, gcSo}, // [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 {0x1D200, 0x1D241, prAL, gcSo}, // [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54
{0x1D242, 0x1D244, prCM, gcMn}, // [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME {0x1D242, 0x1D244, prCM, gcMn}, // [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME
{0x1D245, 0x1D245, prAL, gcSo}, // GREEK MUSICAL LEIMMA {0x1D245, 0x1D245, prAL, gcSo}, // GREEK MUSICAL LEIMMA
{0x1D2C0, 0x1D2D3, prAL, gcNo}, // [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN
{0x1D2E0, 0x1D2F3, prAL, gcNo}, // [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN {0x1D2E0, 0x1D2F3, prAL, gcNo}, // [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN
{0x1D300, 0x1D356, prAL, gcSo}, // [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING {0x1D300, 0x1D356, prAL, gcSo}, // [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
{0x1D360, 0x1D378, prAL, gcNo}, // [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE {0x1D360, 0x1D378, prAL, gcNo}, // [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE
@ -3228,11 +3262,14 @@ var lineBreakCodePoints = [][4]int{
{0x1DF00, 0x1DF09, prAL, gcLl}, // [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK {0x1DF00, 0x1DF09, prAL, gcLl}, // [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
{0x1DF0A, 0x1DF0A, prAL, gcLo}, // LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK {0x1DF0A, 0x1DF0A, prAL, gcLo}, // LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
{0x1DF0B, 0x1DF1E, prAL, gcLl}, // [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL {0x1DF0B, 0x1DF1E, prAL, gcLl}, // [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
{0x1DF25, 0x1DF2A, prAL, gcLl}, // [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
{0x1E000, 0x1E006, prCM, gcMn}, // [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE {0x1E000, 0x1E006, prCM, gcMn}, // [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
{0x1E008, 0x1E018, prCM, gcMn}, // [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU {0x1E008, 0x1E018, prCM, gcMn}, // [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
{0x1E01B, 0x1E021, prCM, gcMn}, // [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E01B, 0x1E021, prCM, gcMn}, // [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prCM, gcMn}, // [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E023, 0x1E024, prCM, gcMn}, // [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prCM, gcMn}, // [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA {0x1E026, 0x1E02A, prCM, gcMn}, // [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E030, 0x1E06D, prAL, gcLm}, // [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
{0x1E08F, 0x1E08F, prCM, gcMn}, // COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E100, 0x1E12C, prAL, gcLo}, // [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W {0x1E100, 0x1E12C, prAL, gcLo}, // [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
{0x1E130, 0x1E136, prCM, gcMn}, // [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E130, 0x1E136, prCM, gcMn}, // [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E137, 0x1E13D, prAL, gcLm}, // [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER {0x1E137, 0x1E13D, prAL, gcLm}, // [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@ -3245,6 +3282,10 @@ var lineBreakCodePoints = [][4]int{
{0x1E2EC, 0x1E2EF, prCM, gcMn}, // [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2EC, 0x1E2EF, prCM, gcMn}, // [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E2F0, 0x1E2F9, prNU, gcNd}, // [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE {0x1E2F0, 0x1E2F9, prNU, gcNd}, // [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
{0x1E2FF, 0x1E2FF, prPR, gcSc}, // WANCHO NGUN SIGN {0x1E2FF, 0x1E2FF, prPR, gcSc}, // WANCHO NGUN SIGN
{0x1E4D0, 0x1E4EA, prAL, gcLo}, // [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
{0x1E4EB, 0x1E4EB, prAL, gcLm}, // NAG MUNDARI SIGN OJOD
{0x1E4EC, 0x1E4EF, prCM, gcMn}, // [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E4F0, 0x1E4F9, prNU, gcNd}, // [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
{0x1E7E0, 0x1E7E6, prAL, gcLo}, // [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO {0x1E7E0, 0x1E7E6, prAL, gcLo}, // [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
{0x1E7E8, 0x1E7EB, prAL, gcLo}, // [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE {0x1E7E8, 0x1E7EB, prAL, gcLo}, // [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
{0x1E7ED, 0x1E7EE, prAL, gcLo}, // [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE {0x1E7ED, 0x1E7EE, prAL, gcLo}, // [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@ -3412,16 +3453,18 @@ var lineBreakCodePoints = [][4]int{
{0x1F6C1, 0x1F6CB, prID, gcSo}, // [11] BATHTUB..COUCH AND LAMP {0x1F6C1, 0x1F6CB, prID, gcSo}, // [11] BATHTUB..COUCH AND LAMP
{0x1F6CC, 0x1F6CC, prEB, gcSo}, // SLEEPING ACCOMMODATION {0x1F6CC, 0x1F6CC, prEB, gcSo}, // SLEEPING ACCOMMODATION
{0x1F6CD, 0x1F6D7, prID, gcSo}, // [11] SHOPPING BAGS..ELEVATOR {0x1F6CD, 0x1F6D7, prID, gcSo}, // [11] SHOPPING BAGS..ELEVATOR
{0x1F6D8, 0x1F6DC, prID, gcCn}, // [5] <reserved-1F6D8>..<reserved-1F6DC> {0x1F6D8, 0x1F6DB, prID, gcCn}, // [4] <reserved-1F6D8>..<reserved-1F6DB>
{0x1F6DD, 0x1F6EC, prID, gcSo}, // [16] PLAYGROUND SLIDE..AIRPLANE ARRIVING {0x1F6DC, 0x1F6EC, prID, gcSo}, // [17] WIRELESS..AIRPLANE ARRIVING
{0x1F6ED, 0x1F6EF, prID, gcCn}, // [3] <reserved-1F6ED>..<reserved-1F6EF> {0x1F6ED, 0x1F6EF, prID, gcCn}, // [3] <reserved-1F6ED>..<reserved-1F6EF>
{0x1F6F0, 0x1F6FC, prID, gcSo}, // [13] SATELLITE..ROLLER SKATE {0x1F6F0, 0x1F6FC, prID, gcSo}, // [13] SATELLITE..ROLLER SKATE
{0x1F6FD, 0x1F6FF, prID, gcCn}, // [3] <reserved-1F6FD>..<reserved-1F6FF> {0x1F6FD, 0x1F6FF, prID, gcCn}, // [3] <reserved-1F6FD>..<reserved-1F6FF>
{0x1F700, 0x1F773, prAL, gcSo}, // [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE {0x1F700, 0x1F773, prAL, gcSo}, // [116] ALCHEMICAL SYMBOL FOR QUINTESSENCE..ALCHEMICAL SYMBOL FOR HALF OUNCE
{0x1F774, 0x1F77F, prID, gcCn}, // [12] <reserved-1F774>..<reserved-1F77F> {0x1F774, 0x1F776, prID, gcSo}, // [3] LOT OF FORTUNE..LUNAR ECLIPSE
{0x1F777, 0x1F77A, prID, gcCn}, // [4] <reserved-1F777>..<reserved-1F77A>
{0x1F77B, 0x1F77F, prID, gcSo}, // [5] HAUMEA..ORCUS
{0x1F780, 0x1F7D4, prAL, gcSo}, // [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR {0x1F780, 0x1F7D4, prAL, gcSo}, // [85] BLACK LEFT-POINTING ISOSCELES RIGHT TRIANGLE..HEAVY TWELVE POINTED PINWHEEL STAR
{0x1F7D5, 0x1F7D8, prID, gcSo}, // [4] CIRCLED TRIANGLE..NEGATIVE CIRCLED SQUARE {0x1F7D5, 0x1F7D9, prID, gcSo}, // [5] CIRCLED TRIANGLE..NINE POINTED WHITE STAR
{0x1F7D9, 0x1F7DF, prID, gcCn}, // [7] <reserved-1F7D9>..<reserved-1F7DF> {0x1F7DA, 0x1F7DF, prID, gcCn}, // [6] <reserved-1F7DA>..<reserved-1F7DF>
{0x1F7E0, 0x1F7EB, prID, gcSo}, // [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE {0x1F7E0, 0x1F7EB, prID, gcSo}, // [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
{0x1F7EC, 0x1F7EF, prID, gcCn}, // [4] <reserved-1F7EC>..<reserved-1F7EF> {0x1F7EC, 0x1F7EF, prID, gcCn}, // [4] <reserved-1F7EC>..<reserved-1F7EF>
{0x1F7F0, 0x1F7F0, prID, gcSo}, // HEAVY EQUALS SIGN {0x1F7F0, 0x1F7F0, prID, gcSo}, // HEAVY EQUALS SIGN
@ -3467,33 +3510,29 @@ var lineBreakCodePoints = [][4]int{
{0x1FA54, 0x1FA5F, prID, gcCn}, // [12] <reserved-1FA54>..<reserved-1FA5F> {0x1FA54, 0x1FA5F, prID, gcCn}, // [12] <reserved-1FA54>..<reserved-1FA5F>
{0x1FA60, 0x1FA6D, prID, gcSo}, // [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER {0x1FA60, 0x1FA6D, prID, gcSo}, // [14] XIANGQI RED GENERAL..XIANGQI BLACK SOLDIER
{0x1FA6E, 0x1FA6F, prID, gcCn}, // [2] <reserved-1FA6E>..<reserved-1FA6F> {0x1FA6E, 0x1FA6F, prID, gcCn}, // [2] <reserved-1FA6E>..<reserved-1FA6F>
{0x1FA70, 0x1FA74, prID, gcSo}, // [5] BALLET SHOES..THONG SANDAL {0x1FA70, 0x1FA7C, prID, gcSo}, // [13] BALLET SHOES..CRUTCH
{0x1FA75, 0x1FA77, prID, gcCn}, // [3] <reserved-1FA75>..<reserved-1FA77>
{0x1FA78, 0x1FA7C, prID, gcSo}, // [5] DROP OF BLOOD..CRUTCH
{0x1FA7D, 0x1FA7F, prID, gcCn}, // [3] <reserved-1FA7D>..<reserved-1FA7F> {0x1FA7D, 0x1FA7F, prID, gcCn}, // [3] <reserved-1FA7D>..<reserved-1FA7F>
{0x1FA80, 0x1FA86, prID, gcSo}, // [7] YO-YO..NESTING DOLLS {0x1FA80, 0x1FA88, prID, gcSo}, // [9] YO-YO..FLUTE
{0x1FA87, 0x1FA8F, prID, gcCn}, // [9] <reserved-1FA87>..<reserved-1FA8F> {0x1FA89, 0x1FA8F, prID, gcCn}, // [7] <reserved-1FA89>..<reserved-1FA8F>
{0x1FA90, 0x1FAAC, prID, gcSo}, // [29] RINGED PLANET..HAMSA {0x1FA90, 0x1FABD, prID, gcSo}, // [46] RINGED PLANET..WING
{0x1FAAD, 0x1FAAF, prID, gcCn}, // [3] <reserved-1FAAD>..<reserved-1FAAF> {0x1FABE, 0x1FABE, prID, gcCn}, // <reserved-1FABE>
{0x1FAB0, 0x1FABA, prID, gcSo}, // [11] FLY..NEST WITH EGGS {0x1FABF, 0x1FAC2, prID, gcSo}, // [4] GOOSE..PEOPLE HUGGING
{0x1FABB, 0x1FABF, prID, gcCn}, // [5] <reserved-1FABB>..<reserved-1FABF>
{0x1FAC0, 0x1FAC2, prID, gcSo}, // [3] ANATOMICAL HEART..PEOPLE HUGGING
{0x1FAC3, 0x1FAC5, prEB, gcSo}, // [3] PREGNANT MAN..PERSON WITH CROWN {0x1FAC3, 0x1FAC5, prEB, gcSo}, // [3] PREGNANT MAN..PERSON WITH CROWN
{0x1FAC6, 0x1FACF, prID, gcCn}, // [10] <reserved-1FAC6>..<reserved-1FACF> {0x1FAC6, 0x1FACD, prID, gcCn}, // [8] <reserved-1FAC6>..<reserved-1FACD>
{0x1FAD0, 0x1FAD9, prID, gcSo}, // [10] BLUEBERRIES..JAR {0x1FACE, 0x1FADB, prID, gcSo}, // [14] MOOSE..PEA POD
{0x1FADA, 0x1FADF, prID, gcCn}, // [6] <reserved-1FADA>..<reserved-1FADF> {0x1FADC, 0x1FADF, prID, gcCn}, // [4] <reserved-1FADC>..<reserved-1FADF>
{0x1FAE0, 0x1FAE7, prID, gcSo}, // [8] MELTING FACE..BUBBLES {0x1FAE0, 0x1FAE8, prID, gcSo}, // [9] MELTING FACE..SHAKING FACE
{0x1FAE8, 0x1FAEF, prID, gcCn}, // [8] <reserved-1FAE8>..<reserved-1FAEF> {0x1FAE9, 0x1FAEF, prID, gcCn}, // [7] <reserved-1FAE9>..<reserved-1FAEF>
{0x1FAF0, 0x1FAF6, prEB, gcSo}, // [7] HAND WITH INDEX FINGER AND THUMB CROSSED..HEART HANDS {0x1FAF0, 0x1FAF8, prEB, gcSo}, // [9] HAND WITH INDEX FINGER AND THUMB CROSSED..RIGHTWARDS PUSHING HAND
{0x1FAF7, 0x1FAFF, prID, gcCn}, // [9] <reserved-1FAF7>..<reserved-1FAFF> {0x1FAF9, 0x1FAFF, prID, gcCn}, // [7] <reserved-1FAF9>..<reserved-1FAFF>
{0x1FB00, 0x1FB92, prAL, gcSo}, // [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK {0x1FB00, 0x1FB92, prAL, gcSo}, // [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
{0x1FB94, 0x1FBCA, prAL, gcSo}, // [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON {0x1FB94, 0x1FBCA, prAL, gcSo}, // [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON
{0x1FBF0, 0x1FBF9, prNU, gcNd}, // [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE {0x1FBF0, 0x1FBF9, prNU, gcNd}, // [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
{0x1FC00, 0x1FFFD, prID, gcCn}, // [1022] <reserved-1FC00>..<reserved-1FFFD> {0x1FC00, 0x1FFFD, prID, gcCn}, // [1022] <reserved-1FC00>..<reserved-1FFFD>
{0x20000, 0x2A6DF, prID, gcLo}, // [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF {0x20000, 0x2A6DF, prID, gcLo}, // [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
{0x2A6E0, 0x2A6FF, prID, gcCn}, // [32] <reserved-2A6E0>..<reserved-2A6FF> {0x2A6E0, 0x2A6FF, prID, gcCn}, // [32] <reserved-2A6E0>..<reserved-2A6FF>
{0x2A700, 0x2B738, prID, gcLo}, // [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 {0x2A700, 0x2B739, prID, gcLo}, // [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739
{0x2B739, 0x2B73F, prID, gcCn}, // [7] <reserved-2B739>..<reserved-2B73F> {0x2B73A, 0x2B73F, prID, gcCn}, // [6] <reserved-2B73A>..<reserved-2B73F>
{0x2B740, 0x2B81D, prID, gcLo}, // [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D {0x2B740, 0x2B81D, prID, gcLo}, // [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
{0x2B81E, 0x2B81F, prID, gcCn}, // [2] <reserved-2B81E>..<reserved-2B81F> {0x2B81E, 0x2B81F, prID, gcCn}, // [2] <reserved-2B81E>..<reserved-2B81F>
{0x2B820, 0x2CEA1, prID, gcLo}, // [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 {0x2B820, 0x2CEA1, prID, gcLo}, // [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
@ -3504,7 +3543,9 @@ var lineBreakCodePoints = [][4]int{
{0x2FA1E, 0x2FA1F, prID, gcCn}, // [2] <reserved-2FA1E>..<reserved-2FA1F> {0x2FA1E, 0x2FA1F, prID, gcCn}, // [2] <reserved-2FA1E>..<reserved-2FA1F>
{0x2FA20, 0x2FFFD, prID, gcCn}, // [1502] <reserved-2FA20>..<reserved-2FFFD> {0x2FA20, 0x2FFFD, prID, gcCn}, // [1502] <reserved-2FA20>..<reserved-2FFFD>
{0x30000, 0x3134A, prID, gcLo}, // [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A {0x30000, 0x3134A, prID, gcLo}, // [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
{0x3134B, 0x3FFFD, prID, gcCn}, // [60595] <reserved-3134B>..<reserved-3FFFD> {0x3134B, 0x3134F, prID, gcCn}, // [5] <reserved-3134B>..<reserved-3134F>
{0x31350, 0x323AF, prID, gcLo}, // [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
{0x323B0, 0x3FFFD, prID, gcCn}, // [56398] <reserved-323B0>..<reserved-3FFFD>
{0xE0001, 0xE0001, prCM, gcCf}, // LANGUAGE TAG {0xE0001, 0xE0001, prCM, gcCf}, // LANGUAGE TAG
{0xE0020, 0xE007F, prCM, gcCf}, // [96] TAG SPACE..CANCEL TAG {0xE0020, 0xE007F, prCM, gcCf}, // [96] TAG SPACE..CANCEL TAG
{0xE0100, 0xE01EF, prCM, gcMn}, // [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 {0xE0100, 0xE01EF, prCM, gcMn}, // [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

View file

@ -64,222 +64,381 @@ const (
LineMustBreak // You must break the line here. LineMustBreak // You must break the line here.
) )
// The line break parser's state transitions. It's anologous to grTransitions, // lbTransitions implements the line break parser's state transitions. It's
// see comments there for details. Unicode version 14.0.0. // anologous to [grTransitions], see comments there for details.
var lbTransitions = map[[2]int][3]int{ //
// Unicode version 15.0.0.
func lbTransitions(state, prop int) (newState, lineBreak, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// LB4. // LB4.
{lbAny, prBK}: {lbBK, LineCanBreak, 310}, case lbBK | prAny<<32:
{lbBK, prAny}: {lbAny, LineMustBreak, 40}, return lbAny, LineMustBreak, 40
// LB5. // LB5.
{lbAny, prCR}: {lbCR, LineCanBreak, 310}, case lbCR | prLF<<32:
{lbAny, prLF}: {lbLF, LineCanBreak, 310}, return lbLF, LineDontBreak, 50
{lbAny, prNL}: {lbNL, LineCanBreak, 310}, case lbCR | prAny<<32:
{lbCR, prLF}: {lbLF, LineDontBreak, 50}, return lbAny, LineMustBreak, 50
{lbCR, prAny}: {lbAny, LineMustBreak, 50}, case lbLF | prAny<<32:
{lbLF, prAny}: {lbAny, LineMustBreak, 50}, return lbAny, LineMustBreak, 50
{lbNL, prAny}: {lbAny, LineMustBreak, 50}, case lbNL | prAny<<32:
return lbAny, LineMustBreak, 50
// LB6. // LB6.
{lbAny, prBK}: {lbBK, LineDontBreak, 60}, case lbAny | prBK<<32:
{lbAny, prCR}: {lbCR, LineDontBreak, 60}, return lbBK, LineDontBreak, 60
{lbAny, prLF}: {lbLF, LineDontBreak, 60}, case lbAny | prCR<<32:
{lbAny, prNL}: {lbNL, LineDontBreak, 60}, return lbCR, LineDontBreak, 60
case lbAny | prLF<<32:
return lbLF, LineDontBreak, 60
case lbAny | prNL<<32:
return lbNL, LineDontBreak, 60
// LB7. // LB7.
{lbAny, prSP}: {lbSP, LineDontBreak, 70}, case lbAny | prSP<<32:
{lbAny, prZW}: {lbZW, LineDontBreak, 70}, return lbSP, LineDontBreak, 70
case lbAny | prZW<<32:
return lbZW, LineDontBreak, 70
// LB8. // LB8.
{lbZW, prSP}: {lbZW, LineDontBreak, 70}, case lbZW | prSP<<32:
{lbZW, prAny}: {lbAny, LineCanBreak, 80}, return lbZW, LineDontBreak, 70
case lbZW | prAny<<32:
return lbAny, LineCanBreak, 80
// LB11. // LB11.
{lbAny, prWJ}: {lbWJ, LineDontBreak, 110}, case lbAny | prWJ<<32:
{lbWJ, prAny}: {lbAny, LineDontBreak, 110}, return lbWJ, LineDontBreak, 110
case lbWJ | prAny<<32:
return lbAny, LineDontBreak, 110
// LB12. // LB12.
{lbAny, prGL}: {lbGL, LineCanBreak, 310}, case lbAny | prGL<<32:
{lbGL, prAny}: {lbAny, LineDontBreak, 120}, return lbGL, LineCanBreak, 310
case lbGL | prAny<<32:
return lbAny, LineDontBreak, 120
// LB13 (simple transitions). // LB13 (simple transitions).
{lbAny, prCL}: {lbCL, LineCanBreak, 310}, case lbAny | prCL<<32:
{lbAny, prCP}: {lbCP, LineCanBreak, 310}, return lbCL, LineCanBreak, 310
{lbAny, prEX}: {lbEX, LineDontBreak, 130}, case lbAny | prCP<<32:
{lbAny, prIS}: {lbIS, LineCanBreak, 310}, return lbCP, LineCanBreak, 310
{lbAny, prSY}: {lbSY, LineCanBreak, 310}, case lbAny | prEX<<32:
return lbEX, LineDontBreak, 130
case lbAny | prIS<<32:
return lbIS, LineCanBreak, 310
case lbAny | prSY<<32:
return lbSY, LineCanBreak, 310
// LB14. // LB14.
{lbAny, prOP}: {lbOP, LineCanBreak, 310}, case lbAny | prOP<<32:
{lbOP, prSP}: {lbOP, LineDontBreak, 70}, return lbOP, LineCanBreak, 310
{lbOP, prAny}: {lbAny, LineDontBreak, 140}, case lbOP | prSP<<32:
return lbOP, LineDontBreak, 70
case lbOP | prAny<<32:
return lbAny, LineDontBreak, 140
// LB15. // LB15.
{lbQU, prSP}: {lbQUSP, LineDontBreak, 70}, case lbQU | prSP<<32:
{lbQU, prOP}: {lbOP, LineDontBreak, 150}, return lbQUSP, LineDontBreak, 70
{lbQUSP, prOP}: {lbOP, LineDontBreak, 150}, case lbQU | prOP<<32:
return lbOP, LineDontBreak, 150
case lbQUSP | prOP<<32:
return lbOP, LineDontBreak, 150
// LB16. // LB16.
{lbCL, prSP}: {lbCLCPSP, LineDontBreak, 70}, case lbCL | prSP<<32:
{lbNUCL, prSP}: {lbCLCPSP, LineDontBreak, 70}, return lbCLCPSP, LineDontBreak, 70
{lbCP, prSP}: {lbCLCPSP, LineDontBreak, 70}, case lbNUCL | prSP<<32:
{lbNUCP, prSP}: {lbCLCPSP, LineDontBreak, 70}, return lbCLCPSP, LineDontBreak, 70
{lbCL, prNS}: {lbNS, LineDontBreak, 160}, case lbCP | prSP<<32:
{lbNUCL, prNS}: {lbNS, LineDontBreak, 160}, return lbCLCPSP, LineDontBreak, 70
{lbCP, prNS}: {lbNS, LineDontBreak, 160}, case lbNUCP | prSP<<32:
{lbNUCP, prNS}: {lbNS, LineDontBreak, 160}, return lbCLCPSP, LineDontBreak, 70
{lbCLCPSP, prNS}: {lbNS, LineDontBreak, 160}, case lbCL | prNS<<32:
return lbNS, LineDontBreak, 160
case lbNUCL | prNS<<32:
return lbNS, LineDontBreak, 160
case lbCP | prNS<<32:
return lbNS, LineDontBreak, 160
case lbNUCP | prNS<<32:
return lbNS, LineDontBreak, 160
case lbCLCPSP | prNS<<32:
return lbNS, LineDontBreak, 160
// LB17. // LB17.
{lbAny, prB2}: {lbB2, LineCanBreak, 310}, case lbAny | prB2<<32:
{lbB2, prSP}: {lbB2SP, LineDontBreak, 70}, return lbB2, LineCanBreak, 310
{lbB2, prB2}: {lbB2, LineDontBreak, 170}, case lbB2 | prSP<<32:
{lbB2SP, prB2}: {lbB2, LineDontBreak, 170}, return lbB2SP, LineDontBreak, 70
case lbB2 | prB2<<32:
return lbB2, LineDontBreak, 170
case lbB2SP | prB2<<32:
return lbB2, LineDontBreak, 170
// LB18. // LB18.
{lbSP, prAny}: {lbAny, LineCanBreak, 180}, case lbSP | prAny<<32:
{lbQUSP, prAny}: {lbAny, LineCanBreak, 180}, return lbAny, LineCanBreak, 180
{lbCLCPSP, prAny}: {lbAny, LineCanBreak, 180}, case lbQUSP | prAny<<32:
{lbB2SP, prAny}: {lbAny, LineCanBreak, 180}, return lbAny, LineCanBreak, 180
case lbCLCPSP | prAny<<32:
return lbAny, LineCanBreak, 180
case lbB2SP | prAny<<32:
return lbAny, LineCanBreak, 180
// LB19. // LB19.
{lbAny, prQU}: {lbQU, LineDontBreak, 190}, case lbAny | prQU<<32:
{lbQU, prAny}: {lbAny, LineDontBreak, 190}, return lbQU, LineDontBreak, 190
case lbQU | prAny<<32:
return lbAny, LineDontBreak, 190
// LB20. // LB20.
{lbAny, prCB}: {lbCB, LineCanBreak, 200}, case lbAny | prCB<<32:
{lbCB, prAny}: {lbAny, LineCanBreak, 200}, return lbCB, LineCanBreak, 200
case lbCB | prAny<<32:
return lbAny, LineCanBreak, 200
// LB21. // LB21.
{lbAny, prBA}: {lbBA, LineDontBreak, 210}, case lbAny | prBA<<32:
{lbAny, prHY}: {lbHY, LineDontBreak, 210}, return lbBA, LineDontBreak, 210
{lbAny, prNS}: {lbNS, LineDontBreak, 210}, case lbAny | prHY<<32:
{lbAny, prBB}: {lbBB, LineCanBreak, 310}, return lbHY, LineDontBreak, 210
{lbBB, prAny}: {lbAny, LineDontBreak, 210}, case lbAny | prNS<<32:
return lbNS, LineDontBreak, 210
case lbAny | prBB<<32:
return lbBB, LineCanBreak, 310
case lbBB | prAny<<32:
return lbAny, LineDontBreak, 210
// LB21a. // LB21a.
{lbAny, prHL}: {lbHL, LineCanBreak, 310}, case lbAny | prHL<<32:
{lbHL, prHY}: {lbLB21a, LineDontBreak, 210}, return lbHL, LineCanBreak, 310
{lbHL, prBA}: {lbLB21a, LineDontBreak, 210}, case lbHL | prHY<<32:
{lbLB21a, prAny}: {lbAny, LineDontBreak, 211}, return lbLB21a, LineDontBreak, 210
case lbHL | prBA<<32:
return lbLB21a, LineDontBreak, 210
case lbLB21a | prAny<<32:
return lbAny, LineDontBreak, 211
// LB21b. // LB21b.
{lbSY, prHL}: {lbHL, LineDontBreak, 212}, case lbSY | prHL<<32:
{lbNUSY, prHL}: {lbHL, LineDontBreak, 212}, return lbHL, LineDontBreak, 212
case lbNUSY | prHL<<32:
return lbHL, LineDontBreak, 212
// LB22. // LB22.
{lbAny, prIN}: {lbAny, LineDontBreak, 220}, case lbAny | prIN<<32:
return lbAny, LineDontBreak, 220
// LB23. // LB23.
{lbAny, prAL}: {lbAL, LineCanBreak, 310}, case lbAny | prAL<<32:
{lbAny, prNU}: {lbNU, LineCanBreak, 310}, return lbAL, LineCanBreak, 310
{lbAL, prNU}: {lbNU, LineDontBreak, 230}, case lbAny | prNU<<32:
{lbHL, prNU}: {lbNU, LineDontBreak, 230}, return lbNU, LineCanBreak, 310
{lbNU, prAL}: {lbAL, LineDontBreak, 230}, case lbAL | prNU<<32:
{lbNU, prHL}: {lbHL, LineDontBreak, 230}, return lbNU, LineDontBreak, 230
{lbNUNU, prAL}: {lbAL, LineDontBreak, 230}, case lbHL | prNU<<32:
{lbNUNU, prHL}: {lbHL, LineDontBreak, 230}, return lbNU, LineDontBreak, 230
case lbNU | prAL<<32:
return lbAL, LineDontBreak, 230
case lbNU | prHL<<32:
return lbHL, LineDontBreak, 230
case lbNUNU | prAL<<32:
return lbAL, LineDontBreak, 230
case lbNUNU | prHL<<32:
return lbHL, LineDontBreak, 230
// LB23a. // LB23a.
{lbAny, prPR}: {lbPR, LineCanBreak, 310}, case lbAny | prPR<<32:
{lbAny, prID}: {lbIDEM, LineCanBreak, 310}, return lbPR, LineCanBreak, 310
{lbAny, prEB}: {lbEB, LineCanBreak, 310}, case lbAny | prID<<32:
{lbAny, prEM}: {lbIDEM, LineCanBreak, 310}, return lbIDEM, LineCanBreak, 310
{lbPR, prID}: {lbIDEM, LineDontBreak, 231}, case lbAny | prEB<<32:
{lbPR, prEB}: {lbEB, LineDontBreak, 231}, return lbEB, LineCanBreak, 310
{lbPR, prEM}: {lbIDEM, LineDontBreak, 231}, case lbAny | prEM<<32:
{lbIDEM, prPO}: {lbPO, LineDontBreak, 231}, return lbIDEM, LineCanBreak, 310
{lbEB, prPO}: {lbPO, LineDontBreak, 231}, case lbPR | prID<<32:
return lbIDEM, LineDontBreak, 231
case lbPR | prEB<<32:
return lbEB, LineDontBreak, 231
case lbPR | prEM<<32:
return lbIDEM, LineDontBreak, 231
case lbIDEM | prPO<<32:
return lbPO, LineDontBreak, 231
case lbEB | prPO<<32:
return lbPO, LineDontBreak, 231
// LB24. // LB24.
{lbAny, prPO}: {lbPO, LineCanBreak, 310}, case lbAny | prPO<<32:
{lbPR, prAL}: {lbAL, LineDontBreak, 240}, return lbPO, LineCanBreak, 310
{lbPR, prHL}: {lbHL, LineDontBreak, 240}, case lbPR | prAL<<32:
{lbPO, prAL}: {lbAL, LineDontBreak, 240}, return lbAL, LineDontBreak, 240
{lbPO, prHL}: {lbHL, LineDontBreak, 240}, case lbPR | prHL<<32:
{lbAL, prPR}: {lbPR, LineDontBreak, 240}, return lbHL, LineDontBreak, 240
{lbAL, prPO}: {lbPO, LineDontBreak, 240}, case lbPO | prAL<<32:
{lbHL, prPR}: {lbPR, LineDontBreak, 240}, return lbAL, LineDontBreak, 240
{lbHL, prPO}: {lbPO, LineDontBreak, 240}, case lbPO | prHL<<32:
return lbHL, LineDontBreak, 240
case lbAL | prPR<<32:
return lbPR, LineDontBreak, 240
case lbAL | prPO<<32:
return lbPO, LineDontBreak, 240
case lbHL | prPR<<32:
return lbPR, LineDontBreak, 240
case lbHL | prPO<<32:
return lbPO, LineDontBreak, 240
// LB25 (simple transitions). // LB25 (simple transitions).
{lbPR, prNU}: {lbNU, LineDontBreak, 250}, case lbPR | prNU<<32:
{lbPO, prNU}: {lbNU, LineDontBreak, 250}, return lbNU, LineDontBreak, 250
{lbOP, prNU}: {lbNU, LineDontBreak, 250}, case lbPO | prNU<<32:
{lbHY, prNU}: {lbNU, LineDontBreak, 250}, return lbNU, LineDontBreak, 250
{lbNU, prNU}: {lbNUNU, LineDontBreak, 250}, case lbOP | prNU<<32:
{lbNU, prSY}: {lbNUSY, LineDontBreak, 250}, return lbNU, LineDontBreak, 250
{lbNU, prIS}: {lbNUIS, LineDontBreak, 250}, case lbHY | prNU<<32:
{lbNUNU, prNU}: {lbNUNU, LineDontBreak, 250}, return lbNU, LineDontBreak, 250
{lbNUNU, prSY}: {lbNUSY, LineDontBreak, 250}, case lbNU | prNU<<32:
{lbNUNU, prIS}: {lbNUIS, LineDontBreak, 250}, return lbNUNU, LineDontBreak, 250
{lbNUSY, prNU}: {lbNUNU, LineDontBreak, 250}, case lbNU | prSY<<32:
{lbNUSY, prSY}: {lbNUSY, LineDontBreak, 250}, return lbNUSY, LineDontBreak, 250
{lbNUSY, prIS}: {lbNUIS, LineDontBreak, 250}, case lbNU | prIS<<32:
{lbNUIS, prNU}: {lbNUNU, LineDontBreak, 250}, return lbNUIS, LineDontBreak, 250
{lbNUIS, prSY}: {lbNUSY, LineDontBreak, 250}, case lbNUNU | prNU<<32:
{lbNUIS, prIS}: {lbNUIS, LineDontBreak, 250}, return lbNUNU, LineDontBreak, 250
{lbNU, prCL}: {lbNUCL, LineDontBreak, 250}, case lbNUNU | prSY<<32:
{lbNU, prCP}: {lbNUCP, LineDontBreak, 250}, return lbNUSY, LineDontBreak, 250
{lbNUNU, prCL}: {lbNUCL, LineDontBreak, 250}, case lbNUNU | prIS<<32:
{lbNUNU, prCP}: {lbNUCP, LineDontBreak, 250}, return lbNUIS, LineDontBreak, 250
{lbNUSY, prCL}: {lbNUCL, LineDontBreak, 250}, case lbNUSY | prNU<<32:
{lbNUSY, prCP}: {lbNUCP, LineDontBreak, 250}, return lbNUNU, LineDontBreak, 250
{lbNUIS, prCL}: {lbNUCL, LineDontBreak, 250}, case lbNUSY | prSY<<32:
{lbNUIS, prCP}: {lbNUCP, LineDontBreak, 250}, return lbNUSY, LineDontBreak, 250
{lbNU, prPO}: {lbPO, LineDontBreak, 250}, case lbNUSY | prIS<<32:
{lbNUNU, prPO}: {lbPO, LineDontBreak, 250}, return lbNUIS, LineDontBreak, 250
{lbNUSY, prPO}: {lbPO, LineDontBreak, 250}, case lbNUIS | prNU<<32:
{lbNUIS, prPO}: {lbPO, LineDontBreak, 250}, return lbNUNU, LineDontBreak, 250
{lbNUCL, prPO}: {lbPO, LineDontBreak, 250}, case lbNUIS | prSY<<32:
{lbNUCP, prPO}: {lbPO, LineDontBreak, 250}, return lbNUSY, LineDontBreak, 250
{lbNU, prPR}: {lbPR, LineDontBreak, 250}, case lbNUIS | prIS<<32:
{lbNUNU, prPR}: {lbPR, LineDontBreak, 250}, return lbNUIS, LineDontBreak, 250
{lbNUSY, prPR}: {lbPR, LineDontBreak, 250}, case lbNU | prCL<<32:
{lbNUIS, prPR}: {lbPR, LineDontBreak, 250}, return lbNUCL, LineDontBreak, 250
{lbNUCL, prPR}: {lbPR, LineDontBreak, 250}, case lbNU | prCP<<32:
{lbNUCP, prPR}: {lbPR, LineDontBreak, 250}, return lbNUCP, LineDontBreak, 250
case lbNUNU | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUNU | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNUSY | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUSY | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNUIS | prCL<<32:
return lbNUCL, LineDontBreak, 250
case lbNUIS | prCP<<32:
return lbNUCP, LineDontBreak, 250
case lbNU | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUNU | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUSY | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUIS | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUCL | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNUCP | prPO<<32:
return lbPO, LineDontBreak, 250
case lbNU | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUNU | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUSY | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUIS | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUCL | prPR<<32:
return lbPR, LineDontBreak, 250
case lbNUCP | prPR<<32:
return lbPR, LineDontBreak, 250
// LB26. // LB26.
{lbAny, prJL}: {lbJL, LineCanBreak, 310}, case lbAny | prJL<<32:
{lbAny, prJV}: {lbJV, LineCanBreak, 310}, return lbJL, LineCanBreak, 310
{lbAny, prJT}: {lbJT, LineCanBreak, 310}, case lbAny | prJV<<32:
{lbAny, prH2}: {lbH2, LineCanBreak, 310}, return lbJV, LineCanBreak, 310
{lbAny, prH3}: {lbH3, LineCanBreak, 310}, case lbAny | prJT<<32:
{lbJL, prJL}: {lbJL, LineDontBreak, 260}, return lbJT, LineCanBreak, 310
{lbJL, prJV}: {lbJV, LineDontBreak, 260}, case lbAny | prH2<<32:
{lbJL, prH2}: {lbH2, LineDontBreak, 260}, return lbH2, LineCanBreak, 310
{lbJL, prH3}: {lbH3, LineDontBreak, 260}, case lbAny | prH3<<32:
{lbJV, prJV}: {lbJV, LineDontBreak, 260}, return lbH3, LineCanBreak, 310
{lbJV, prJT}: {lbJT, LineDontBreak, 260}, case lbJL | prJL<<32:
{lbH2, prJV}: {lbJV, LineDontBreak, 260}, return lbJL, LineDontBreak, 260
{lbH2, prJT}: {lbJT, LineDontBreak, 260}, case lbJL | prJV<<32:
{lbJT, prJT}: {lbJT, LineDontBreak, 260}, return lbJV, LineDontBreak, 260
{lbH3, prJT}: {lbJT, LineDontBreak, 260}, case lbJL | prH2<<32:
return lbH2, LineDontBreak, 260
case lbJL | prH3<<32:
return lbH3, LineDontBreak, 260
case lbJV | prJV<<32:
return lbJV, LineDontBreak, 260
case lbJV | prJT<<32:
return lbJT, LineDontBreak, 260
case lbH2 | prJV<<32:
return lbJV, LineDontBreak, 260
case lbH2 | prJT<<32:
return lbJT, LineDontBreak, 260
case lbJT | prJT<<32:
return lbJT, LineDontBreak, 260
case lbH3 | prJT<<32:
return lbJT, LineDontBreak, 260
// LB27. // LB27.
{lbJL, prPO}: {lbPO, LineDontBreak, 270}, case lbJL | prPO<<32:
{lbJV, prPO}: {lbPO, LineDontBreak, 270}, return lbPO, LineDontBreak, 270
{lbJT, prPO}: {lbPO, LineDontBreak, 270}, case lbJV | prPO<<32:
{lbH2, prPO}: {lbPO, LineDontBreak, 270}, return lbPO, LineDontBreak, 270
{lbH3, prPO}: {lbPO, LineDontBreak, 270}, case lbJT | prPO<<32:
{lbPR, prJL}: {lbJL, LineDontBreak, 270}, return lbPO, LineDontBreak, 270
{lbPR, prJV}: {lbJV, LineDontBreak, 270}, case lbH2 | prPO<<32:
{lbPR, prJT}: {lbJT, LineDontBreak, 270}, return lbPO, LineDontBreak, 270
{lbPR, prH2}: {lbH2, LineDontBreak, 270}, case lbH3 | prPO<<32:
{lbPR, prH3}: {lbH3, LineDontBreak, 270}, return lbPO, LineDontBreak, 270
case lbPR | prJL<<32:
return lbJL, LineDontBreak, 270
case lbPR | prJV<<32:
return lbJV, LineDontBreak, 270
case lbPR | prJT<<32:
return lbJT, LineDontBreak, 270
case lbPR | prH2<<32:
return lbH2, LineDontBreak, 270
case lbPR | prH3<<32:
return lbH3, LineDontBreak, 270
// LB28. // LB28.
{lbAL, prAL}: {lbAL, LineDontBreak, 280}, case lbAL | prAL<<32:
{lbAL, prHL}: {lbHL, LineDontBreak, 280}, return lbAL, LineDontBreak, 280
{lbHL, prAL}: {lbAL, LineDontBreak, 280}, case lbAL | prHL<<32:
{lbHL, prHL}: {lbHL, LineDontBreak, 280}, return lbHL, LineDontBreak, 280
case lbHL | prAL<<32:
return lbAL, LineDontBreak, 280
case lbHL | prHL<<32:
return lbHL, LineDontBreak, 280
// LB29. // LB29.
{lbIS, prAL}: {lbAL, LineDontBreak, 290}, case lbIS | prAL<<32:
{lbIS, prHL}: {lbHL, LineDontBreak, 290}, return lbAL, LineDontBreak, 290
{lbNUIS, prAL}: {lbAL, LineDontBreak, 290}, case lbIS | prHL<<32:
{lbNUIS, prHL}: {lbHL, LineDontBreak, 290}, return lbHL, LineDontBreak, 290
case lbNUIS | prAL<<32:
return lbAL, LineDontBreak, 290
case lbNUIS | prHL<<32:
return lbHL, LineDontBreak, 290
default:
return -1, -1, -1
}
} }
// transitionLineBreakState determines the new state of the line break parser // transitionLineBreakState determines the new state of the line break parser
@ -290,7 +449,7 @@ var lbTransitions = map[[2]int][3]int{
// further lookups. // further lookups.
func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) { func transitionLineBreakState(state int, r rune, b []byte, str string) (newState int, lineBreak int) {
// Determine the property of the next character. // Determine the property of the next character.
nextProperty, generalCategory := propertyWithGenCat(lineBreakCodePoints, r) nextProperty, generalCategory := propertyLineBreak(r)
// Prepare. // Prepare.
var forceNoBreak, isCPeaFWH bool var forceNoBreak, isCPeaFWH bool
@ -306,7 +465,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
defer func() { defer func() {
// Transition into LB30. // Transition into LB30.
if newState == lbCP || newState == lbNUCP { if newState == lbCP || newState == lbNUCP {
ea := property(eastAsianWidth, r) ea := propertyEastAsianWidth(r)
if ea != prF && ea != prW && ea != prH { if ea != prF && ea != prW && ea != prH {
newState |= lbCPeaFWHBit newState |= lbCPeaFWHBit
} }
@ -352,30 +511,27 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
// Find the applicable transition in the table. // Find the applicable transition in the table.
var rule int var rule int
transition, ok := lbTransitions[[2]int{state, nextProperty}] newState, lineBreak, rule = lbTransitions(state, nextProperty)
if ok { if newState < 0 {
// We have a specific transition. We'll use it.
newState, lineBreak, rule = transition[0], transition[1], transition[2]
} else {
// No specific transition found. Try the less specific ones. // No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := lbTransitions[[2]int{state, prAny}] anyPropProp, anyPropLineBreak, anyPropRule := lbTransitions(state, prAny)
transAnyState, okAnyState := lbTransitions[[2]int{lbAny, nextProperty}] anyStateProp, anyStateLineBreak, anyStateRule := lbTransitions(lbAny, nextProperty)
if okAnyProp && okAnyState { if anyPropProp >= 0 && anyStateProp >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions). // Both apply. We'll use a mix (see comments for grTransitions).
newState, lineBreak, rule = transAnyState[0], transAnyState[1], transAnyState[2] newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
if transAnyProp[2] < transAnyState[2] { if anyPropRule < anyStateRule {
lineBreak, rule = transAnyProp[1], transAnyProp[2] lineBreak, rule = anyPropLineBreak, anyPropRule
} }
} else if okAnyProp { } else if anyPropProp >= 0 {
// We only have a specific state. // We only have a specific state.
newState, lineBreak, rule = transAnyProp[0], transAnyProp[1], transAnyProp[2] newState, lineBreak, rule = anyPropProp, anyPropLineBreak, anyPropRule
// This branch will probably never be reached because okAnyState will // This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here // always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be // for future modifications to the transition map where this may not be
// true anymore. // true anymore.
} else if okAnyState { } else if anyStateProp >= 0 {
// We only have a specific property. // We only have a specific property.
newState, lineBreak, rule = transAnyState[0], transAnyState[1], transAnyState[2] newState, lineBreak, rule = anyStateProp, anyStateLineBreak, anyStateRule
} else { } else {
// No known transition. LB31: ALL ÷ ALL. // No known transition. LB31: ALL ÷ ALL.
newState, lineBreak, rule = lbAny, LineCanBreak, 310 newState, lineBreak, rule = lbAny, LineCanBreak, 310
@ -414,7 +570,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
r, _ = utf8.DecodeRuneInString(str) r, _ = utf8.DecodeRuneInString(str)
} }
if r != utf8.RuneError { if r != utf8.RuneError {
pr, _ := propertyWithGenCat(lineBreakCodePoints, r) pr, _ := propertyLineBreak(r)
if pr == prNU { if pr == prNU {
return lbNU, LineDontBreak return lbNU, LineDontBreak
} }
@ -424,7 +580,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
// LB30 (part one). // LB30 (part one).
if rule > 300 { if rule > 300 {
if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP { if (state == lbAL || state == lbHL || state == lbNU || state == lbNUNU) && nextProperty == prOP {
ea := property(eastAsianWidth, r) ea := propertyEastAsianWidth(r)
if ea != prF && ea != prW && ea != prH { if ea != prF && ea != prW && ea != prH {
return lbOP, LineDontBreak return lbOP, LineDontBreak
} }
@ -460,7 +616,7 @@ func transitionLineBreakState(state int, r rune, b []byte, str string) (newState
return prAny, LineDontBreak return prAny, LineDontBreak
} }
} }
graphemeProperty := property(graphemeCodePoints, r) graphemeProperty := propertyGraphemes(r)
if graphemeProperty == prExtendedPictographic && generalCategory == gcCn { if graphemeProperty == prExtendedPictographic && generalCategory == gcCn {
return lbExtPicCn, LineCanBreak return lbExtPicCn, LineCanBreak
} }

View file

@ -160,9 +160,49 @@ func property(dictionary [][3]int, r rune) int {
return propertySearch(dictionary, r)[2] return propertySearch(dictionary, r)[2]
} }
// propertyWithGenCat returns the Unicode property value and General Category // propertyLineBreak returns the Unicode property value and General Category
// (see constants above) of the given code point. // (see constants above) of the given code point, as listed in the line break
func propertyWithGenCat(dictionary [][4]int, r rune) (property, generalCategory int) { // code points table, while fast tracking ASCII digits and letters.
entry := propertySearch(dictionary, r) func propertyLineBreak(r rune) (property, generalCategory int) {
if r >= 'a' && r <= 'z' {
return prAL, gcLl
}
if r >= 'A' && r <= 'Z' {
return prAL, gcLu
}
if r >= '0' && r <= '9' {
return prNU, gcNd
}
entry := propertySearch(lineBreakCodePoints, r)
return entry[2], entry[3] return entry[2], entry[3]
} }
// propertyGraphemes returns the Unicode grapheme cluster property value of the
// given code point while fast tracking ASCII characters.
func propertyGraphemes(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prAny
}
if r == 0x0a {
return prLF
}
if r == 0x0d {
return prCR
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prControl
}
return property(graphemeCodePoints, r)
}
// propertyEastAsianWidth returns the Unicode East Asian Width property value of
// the given code point while fast tracking ASCII characters.
func propertyEastAsianWidth(r rune) int {
if r >= 0x20 && r <= 0x7e {
return prNa
}
if r >= 0 && r <= 0x1f || r == 0x7f {
return prN
}
return property(eastAsianWidth, r)
}

View file

@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT. // Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// sentenceBreakCodePoints are taken from // sentenceBreakCodePoints are taken from
// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/SentenceBreakProperty.txt // https://www.unicode.org/Public/15.0.0/ucd/auxiliary/SentenceBreakProperty.txt
// and // and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt // https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only) // ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement. // license agreement.
var sentenceBreakCodePoints = [][3]int{ var sentenceBreakCodePoints = [][3]int{
{0x0009, 0x0009, prSp}, // Cc <control-0009> {0x0009, 0x0009, prSp}, // Cc <control-0009>
@ -843,6 +843,7 @@ var sentenceBreakCodePoints = [][3]int{
{0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CE6, 0x0CEF, prNumeric}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE {0x0CE6, 0x0CEF, prNumeric}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
{0x0CF1, 0x0CF2, prOLetter}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA {0x0CF1, 0x0CF2, prOLetter}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
{0x0CF3, 0x0CF3, prExtend}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prExtend}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D02, 0x0D03, prExtend}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D04, 0x0D0C, prOLetter}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L {0x0D04, 0x0D0C, prOLetter}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@ -896,7 +897,7 @@ var sentenceBreakCodePoints = [][3]int{
{0x0EBD, 0x0EBD, prOLetter}, // Lo LAO SEMIVOWEL SIGN NYO {0x0EBD, 0x0EBD, prOLetter}, // Lo LAO SEMIVOWEL SIGN NYO
{0x0EC0, 0x0EC4, prOLetter}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI {0x0EC0, 0x0EC4, prOLetter}, // Lo [5] LAO VOWEL SIGN E..LAO VOWEL SIGN AI
{0x0EC6, 0x0EC6, prOLetter}, // Lm LAO KO LA {0x0EC6, 0x0EC6, prOLetter}, // Lm LAO KO LA
{0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA {0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0ED0, 0x0ED9, prNumeric}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE {0x0ED0, 0x0ED9, prNumeric}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
{0x0EDC, 0x0EDF, prOLetter}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO {0x0EDC, 0x0EDF, prOLetter}, // Lo [4] LAO HO NO..LAO LETTER KHMU NYO
{0x0F00, 0x0F00, prOLetter}, // Lo TIBETAN SYLLABLE OM {0x0F00, 0x0F00, prOLetter}, // Lo TIBETAN SYLLABLE OM
@ -958,7 +959,7 @@ var sentenceBreakCodePoints = [][3]int{
{0x10C7, 0x10C7, prUpper}, // L& GEORGIAN CAPITAL LETTER YN {0x10C7, 0x10C7, prUpper}, // L& GEORGIAN CAPITAL LETTER YN
{0x10CD, 0x10CD, prUpper}, // L& GEORGIAN CAPITAL LETTER AEN {0x10CD, 0x10CD, prUpper}, // L& GEORGIAN CAPITAL LETTER AEN
{0x10D0, 0x10FA, prOLetter}, // L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN {0x10D0, 0x10FA, prOLetter}, // L& [43] GEORGIAN LETTER AN..GEORGIAN LETTER AIN
{0x10FC, 0x10FC, prOLetter}, // Lm MODIFIER LETTER GEORGIAN NAR {0x10FC, 0x10FC, prLower}, // Lm MODIFIER LETTER GEORGIAN NAR
{0x10FD, 0x10FF, prOLetter}, // L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN {0x10FD, 0x10FF, prOLetter}, // L& [3] GEORGIAN LETTER AEN..GEORGIAN LETTER LABIAL SIGN
{0x1100, 0x1248, prOLetter}, // Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA {0x1100, 0x1248, prOLetter}, // Lo [329] HANGUL CHOSEONG KIYEOK..ETHIOPIC SYLLABLE QWA
{0x124A, 0x124D, prOLetter}, // Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE {0x124A, 0x124D, prOLetter}, // Lo [4] ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
@ -2034,7 +2035,7 @@ var sentenceBreakCodePoints = [][3]int{
{0xA7D7, 0xA7D7, prLower}, // L& LATIN SMALL LETTER MIDDLE SCOTS S {0xA7D7, 0xA7D7, prLower}, // L& LATIN SMALL LETTER MIDDLE SCOTS S
{0xA7D8, 0xA7D8, prUpper}, // L& LATIN CAPITAL LETTER SIGMOID S {0xA7D8, 0xA7D8, prUpper}, // L& LATIN CAPITAL LETTER SIGMOID S
{0xA7D9, 0xA7D9, prLower}, // L& LATIN SMALL LETTER SIGMOID S {0xA7D9, 0xA7D9, prLower}, // L& LATIN SMALL LETTER SIGMOID S
{0xA7F2, 0xA7F4, prOLetter}, // Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q {0xA7F2, 0xA7F4, prLower}, // Lm [3] MODIFIER LETTER CAPITAL C..MODIFIER LETTER CAPITAL Q
{0xA7F5, 0xA7F5, prUpper}, // L& LATIN CAPITAL LETTER REVERSED HALF H {0xA7F5, 0xA7F5, prUpper}, // L& LATIN CAPITAL LETTER REVERSED HALF H
{0xA7F6, 0xA7F6, prLower}, // L& LATIN SMALL LETTER REVERSED HALF H {0xA7F6, 0xA7F6, prLower}, // L& LATIN SMALL LETTER REVERSED HALF H
{0xA7F7, 0xA7F7, prOLetter}, // Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I {0xA7F7, 0xA7F7, prOLetter}, // Lo LATIN EPIGRAPHIC LETTER SIDEWAYS I
@ -2140,7 +2141,7 @@ var sentenceBreakCodePoints = [][3]int{
{0xAB30, 0xAB5A, prLower}, // L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG {0xAB30, 0xAB5A, prLower}, // L& [43] LATIN SMALL LETTER BARRED ALPHA..LATIN SMALL LETTER Y WITH SHORT RIGHT LEG
{0xAB5C, 0xAB5F, prLower}, // Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK {0xAB5C, 0xAB5F, prLower}, // Lm [4] MODIFIER LETTER SMALL HENG..MODIFIER LETTER SMALL U WITH LEFT HOOK
{0xAB60, 0xAB68, prLower}, // L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE {0xAB60, 0xAB68, prLower}, // L& [9] LATIN SMALL LETTER SAKHA YAT..LATIN SMALL LETTER TURNED R WITH MIDDLE TILDE
{0xAB69, 0xAB69, prOLetter}, // Lm MODIFIER LETTER SMALL TURNED W {0xAB69, 0xAB69, prLower}, // Lm MODIFIER LETTER SMALL TURNED W
{0xAB70, 0xABBF, prLower}, // L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA {0xAB70, 0xABBF, prLower}, // L& [80] CHEROKEE SMALL LETTER A..CHEROKEE SMALL LETTER YA
{0xABC0, 0xABE2, prOLetter}, // Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM {0xABC0, 0xABE2, prOLetter}, // Lo [35] MEETEI MAYEK LETTER KOK..MEETEI MAYEK LETTER I LONSUM
{0xABE3, 0xABE4, prExtend}, // Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP {0xABE3, 0xABE4, prExtend}, // Mc [2] MEETEI MAYEK VOWEL SIGN ONAP..MEETEI MAYEK VOWEL SIGN INAP
@ -2334,6 +2335,7 @@ var sentenceBreakCodePoints = [][3]int{
{0x10E80, 0x10EA9, prOLetter}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET {0x10E80, 0x10EA9, prOLetter}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
{0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EB0, 0x10EB1, prOLetter}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE {0x10EB0, 0x10EB1, prOLetter}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
{0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F00, 0x10F1C, prOLetter}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL {0x10F00, 0x10F1C, prOLetter}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
{0x10F27, 0x10F27, prOLetter}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH {0x10F27, 0x10F27, prOLetter}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH
{0x10F30, 0x10F45, prOLetter}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN {0x10F30, 0x10F45, prOLetter}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@ -2408,6 +2410,8 @@ var sentenceBreakCodePoints = [][3]int{
{0x11238, 0x11239, prSTerm}, // Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA {0x11238, 0x11239, prSTerm}, // Po [2] KHOJKI DANDA..KHOJKI DOUBLE DANDA
{0x1123B, 0x1123C, prSTerm}, // Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK {0x1123B, 0x1123C, prSTerm}, // Po [2] KHOJKI SECTION MARK..KHOJKI DOUBLE SECTION MARK
{0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN {0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN
{0x1123F, 0x11240, prOLetter}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
{0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R
{0x11280, 0x11286, prOLetter}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA {0x11280, 0x11286, prOLetter}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA
{0x11288, 0x11288, prOLetter}, // Lo MULTANI LETTER GHA {0x11288, 0x11288, prOLetter}, // Lo MULTANI LETTER GHA
{0x1128A, 0x1128D, prOLetter}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA {0x1128A, 0x1128D, prOLetter}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA
@ -2603,13 +2607,29 @@ var sentenceBreakCodePoints = [][3]int{
{0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prExtend}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O {0x11EF5, 0x11EF6, prExtend}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x11EF7, 0x11EF8, prSTerm}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION {0x11EF7, 0x11EF8, prSTerm}, // Po [2] MAKASAR PASSIMBANG..MAKASAR END OF SECTION
{0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prOLetter}, // Lo KAWI SIGN REPHA
{0x11F03, 0x11F03, prExtend}, // Mc KAWI SIGN VISARGA
{0x11F04, 0x11F10, prOLetter}, // Lo [13] KAWI LETTER A..KAWI LETTER O
{0x11F12, 0x11F33, prOLetter}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA
{0x11F34, 0x11F35, prExtend}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prExtend}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prExtend}, // Mc KAWI SIGN KILLER
{0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER
{0x11F43, 0x11F44, prSTerm}, // Po [2] KAWI DANDA..KAWI DOUBLE DANDA
{0x11F50, 0x11F59, prNumeric}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
{0x11FB0, 0x11FB0, prOLetter}, // Lo LISU LETTER YHA {0x11FB0, 0x11FB0, prOLetter}, // Lo LISU LETTER YHA
{0x12000, 0x12399, prOLetter}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U {0x12000, 0x12399, prOLetter}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
{0x12400, 0x1246E, prOLetter}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM {0x12400, 0x1246E, prOLetter}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
{0x12480, 0x12543, prOLetter}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU {0x12480, 0x12543, prOLetter}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
{0x12F90, 0x12FF0, prOLetter}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 {0x12F90, 0x12FF0, prOLetter}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
{0x13000, 0x1342E, prOLetter}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 {0x13000, 0x1342F, prOLetter}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
{0x13430, 0x13438, prFormat}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT {0x13430, 0x1343F, prFormat}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13441, 0x13446, prOLetter}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
{0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x14400, 0x14646, prOLetter}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 {0x14400, 0x14646, prOLetter}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
{0x16800, 0x16A38, prOLetter}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ {0x16800, 0x16A38, prOLetter}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
{0x16A40, 0x16A5E, prOLetter}, // Lo [31] MRO LETTER TA..MRO LETTER TEK {0x16A40, 0x16A5E, prOLetter}, // Lo [31] MRO LETTER TA..MRO LETTER TEK
@ -2648,7 +2668,9 @@ var sentenceBreakCodePoints = [][3]int{
{0x1AFF5, 0x1AFFB, prOLetter}, // Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5 {0x1AFF5, 0x1AFFB, prOLetter}, // Lm [7] KATAKANA LETTER MINNAN TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-5
{0x1AFFD, 0x1AFFE, prOLetter}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 {0x1AFFD, 0x1AFFE, prOLetter}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
{0x1B000, 0x1B122, prOLetter}, // Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU {0x1B000, 0x1B122, prOLetter}, // Lo [291] KATAKANA LETTER ARCHAIC E..KATAKANA LETTER ARCHAIC WU
{0x1B132, 0x1B132, prOLetter}, // Lo HIRAGANA LETTER SMALL KO
{0x1B150, 0x1B152, prOLetter}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO {0x1B150, 0x1B152, prOLetter}, // Lo [3] HIRAGANA LETTER SMALL WI..HIRAGANA LETTER SMALL WO
{0x1B155, 0x1B155, prOLetter}, // Lo KATAKANA LETTER SMALL KO
{0x1B164, 0x1B167, prOLetter}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N {0x1B164, 0x1B167, prOLetter}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
{0x1B170, 0x1B2FB, prOLetter}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB {0x1B170, 0x1B2FB, prOLetter}, // Lo [396] NUSHU CHARACTER-1B170..NUSHU CHARACTER-1B2FB
{0x1BC00, 0x1BC6A, prOLetter}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M {0x1BC00, 0x1BC6A, prOLetter}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
@ -2738,11 +2760,14 @@ var sentenceBreakCodePoints = [][3]int{
{0x1DF00, 0x1DF09, prLower}, // L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK {0x1DF00, 0x1DF09, prLower}, // L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
{0x1DF0A, 0x1DF0A, prOLetter}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK {0x1DF0A, 0x1DF0A, prOLetter}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
{0x1DF0B, 0x1DF1E, prLower}, // L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL {0x1DF0B, 0x1DF1E, prLower}, // L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
{0x1DF25, 0x1DF2A, prLower}, // L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
{0x1E000, 0x1E006, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE {0x1E000, 0x1E006, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
{0x1E008, 0x1E018, prExtend}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU {0x1E008, 0x1E018, prExtend}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
{0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA {0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E030, 0x1E06D, prLower}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
{0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E100, 0x1E12C, prOLetter}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W {0x1E100, 0x1E12C, prOLetter}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
{0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E137, 0x1E13D, prOLetter}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER {0x1E137, 0x1E13D, prOLetter}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@ -2753,6 +2778,10 @@ var sentenceBreakCodePoints = [][3]int{
{0x1E2C0, 0x1E2EB, prOLetter}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH {0x1E2C0, 0x1E2EB, prOLetter}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
{0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E2F0, 0x1E2F9, prNumeric}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE {0x1E2F0, 0x1E2F9, prNumeric}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
{0x1E4D0, 0x1E4EA, prOLetter}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
{0x1E4EB, 0x1E4EB, prOLetter}, // Lm NAG MUNDARI SIGN OJOD
{0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E4F0, 0x1E4F9, prNumeric}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
{0x1E7E0, 0x1E7E6, prOLetter}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO {0x1E7E0, 0x1E7E6, prOLetter}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
{0x1E7E8, 0x1E7EB, prOLetter}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE {0x1E7E8, 0x1E7EB, prOLetter}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
{0x1E7ED, 0x1E7EE, prOLetter}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE {0x1E7ED, 0x1E7EE, prOLetter}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@ -2803,12 +2832,13 @@ var sentenceBreakCodePoints = [][3]int{
{0x1F676, 0x1F678, prClose}, // So [3] SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT..SANS-SERIF HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT {0x1F676, 0x1F678, prClose}, // So [3] SANS-SERIF HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT..SANS-SERIF HEAVY LOW DOUBLE COMMA QUOTATION MARK ORNAMENT
{0x1FBF0, 0x1FBF9, prNumeric}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE {0x1FBF0, 0x1FBF9, prNumeric}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
{0x20000, 0x2A6DF, prOLetter}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF {0x20000, 0x2A6DF, prOLetter}, // Lo [42720] CJK UNIFIED IDEOGRAPH-20000..CJK UNIFIED IDEOGRAPH-2A6DF
{0x2A700, 0x2B738, prOLetter}, // Lo [4153] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B738 {0x2A700, 0x2B739, prOLetter}, // Lo [4154] CJK UNIFIED IDEOGRAPH-2A700..CJK UNIFIED IDEOGRAPH-2B739
{0x2B740, 0x2B81D, prOLetter}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D {0x2B740, 0x2B81D, prOLetter}, // Lo [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
{0x2B820, 0x2CEA1, prOLetter}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1 {0x2B820, 0x2CEA1, prOLetter}, // Lo [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
{0x2CEB0, 0x2EBE0, prOLetter}, // Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0 {0x2CEB0, 0x2EBE0, prOLetter}, // Lo [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
{0x2F800, 0x2FA1D, prOLetter}, // Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D {0x2F800, 0x2FA1D, prOLetter}, // Lo [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
{0x30000, 0x3134A, prOLetter}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A {0x30000, 0x3134A, prOLetter}, // Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
{0x31350, 0x323AF, prOLetter}, // Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF
{0xE0001, 0xE0001, prFormat}, // Cf LANGUAGE TAG {0xE0001, 0xE0001, prFormat}, // Cf LANGUAGE TAG
{0xE0020, 0xE007F, prExtend}, // Cf [96] TAG SPACE..CANCEL TAG {0xE0020, 0xE007F, prExtend}, // Cf [96] TAG SPACE..CANCEL TAG
{0xE0100, 0xE01EF, prExtend}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 {0xE0100, 0xE01EF, prExtend}, // Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256

View file

@ -18,104 +18,178 @@ const (
sbSB8aSp sbSB8aSp
) )
// The sentence break parser's breaking instructions. // sbTransitions implements the sentence break parser's state transitions. It's
const ( // anologous to [grTransitions], see comments there for details.
sbDontBreak = iota //
sbBreak // Unicode version 15.0.0.
) func sbTransitions(state, prop int) (newState int, sentenceBreak bool, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// The sentence break parser's state transitions. It's anologous to
// grTransitions, see comments there for details. Unicode version 14.0.0.
var sbTransitions = map[[2]int][3]int{
// SB3. // SB3.
{sbAny, prCR}: {sbCR, sbDontBreak, 9990}, case sbAny | prCR<<32:
{sbCR, prLF}: {sbParaSep, sbDontBreak, 30}, return sbCR, false, 9990
case sbCR | prLF<<32:
return sbParaSep, false, 30
// SB4. // SB4.
{sbAny, prSep}: {sbParaSep, sbDontBreak, 9990}, case sbAny | prSep<<32:
{sbAny, prLF}: {sbParaSep, sbDontBreak, 9990}, return sbParaSep, false, 9990
{sbParaSep, prAny}: {sbAny, sbBreak, 40}, case sbAny | prLF<<32:
{sbCR, prAny}: {sbAny, sbBreak, 40}, return sbParaSep, false, 9990
case sbParaSep | prAny<<32:
return sbAny, true, 40
case sbCR | prAny<<32:
return sbAny, true, 40
// SB6. // SB6.
{sbAny, prATerm}: {sbATerm, sbDontBreak, 9990}, case sbAny | prATerm<<32:
{sbATerm, prNumeric}: {sbAny, sbDontBreak, 60}, return sbATerm, false, 9990
{sbSB7, prNumeric}: {sbAny, sbDontBreak, 60}, // Because ATerm also appears in SB7. case sbATerm | prNumeric<<32:
return sbAny, false, 60
case sbSB7 | prNumeric<<32:
return sbAny, false, 60 // Because ATerm also appears in SB7.
// SB7. // SB7.
{sbAny, prUpper}: {sbUpper, sbDontBreak, 9990}, case sbAny | prUpper<<32:
{sbAny, prLower}: {sbLower, sbDontBreak, 9990}, return sbUpper, false, 9990
{sbUpper, prATerm}: {sbSB7, sbDontBreak, 70}, case sbAny | prLower<<32:
{sbLower, prATerm}: {sbSB7, sbDontBreak, 70}, return sbLower, false, 9990
{sbSB7, prUpper}: {sbUpper, sbDontBreak, 70}, case sbUpper | prATerm<<32:
return sbSB7, false, 70
case sbLower | prATerm<<32:
return sbSB7, false, 70
case sbSB7 | prUpper<<32:
return sbUpper, false, 70
// SB8a. // SB8a.
{sbAny, prSTerm}: {sbSTerm, sbDontBreak, 9990}, case sbAny | prSTerm<<32:
{sbATerm, prSContinue}: {sbAny, sbDontBreak, 81}, return sbSTerm, false, 9990
{sbATerm, prATerm}: {sbATerm, sbDontBreak, 81}, case sbATerm | prSContinue<<32:
{sbATerm, prSTerm}: {sbSTerm, sbDontBreak, 81}, return sbAny, false, 81
{sbSB7, prSContinue}: {sbAny, sbDontBreak, 81}, case sbATerm | prATerm<<32:
{sbSB7, prATerm}: {sbATerm, sbDontBreak, 81}, return sbATerm, false, 81
{sbSB7, prSTerm}: {sbSTerm, sbDontBreak, 81}, case sbATerm | prSTerm<<32:
{sbSB8Close, prSContinue}: {sbAny, sbDontBreak, 81}, return sbSTerm, false, 81
{sbSB8Close, prATerm}: {sbATerm, sbDontBreak, 81}, case sbSB7 | prSContinue<<32:
{sbSB8Close, prSTerm}: {sbSTerm, sbDontBreak, 81}, return sbAny, false, 81
{sbSB8Sp, prSContinue}: {sbAny, sbDontBreak, 81}, case sbSB7 | prATerm<<32:
{sbSB8Sp, prATerm}: {sbATerm, sbDontBreak, 81}, return sbATerm, false, 81
{sbSB8Sp, prSTerm}: {sbSTerm, sbDontBreak, 81}, case sbSB7 | prSTerm<<32:
{sbSTerm, prSContinue}: {sbAny, sbDontBreak, 81}, return sbSTerm, false, 81
{sbSTerm, prATerm}: {sbATerm, sbDontBreak, 81}, case sbSB8Close | prSContinue<<32:
{sbSTerm, prSTerm}: {sbSTerm, sbDontBreak, 81}, return sbAny, false, 81
{sbSB8aClose, prSContinue}: {sbAny, sbDontBreak, 81}, case sbSB8Close | prATerm<<32:
{sbSB8aClose, prATerm}: {sbATerm, sbDontBreak, 81}, return sbATerm, false, 81
{sbSB8aClose, prSTerm}: {sbSTerm, sbDontBreak, 81}, case sbSB8Close | prSTerm<<32:
{sbSB8aSp, prSContinue}: {sbAny, sbDontBreak, 81}, return sbSTerm, false, 81
{sbSB8aSp, prATerm}: {sbATerm, sbDontBreak, 81}, case sbSB8Sp | prSContinue<<32:
{sbSB8aSp, prSTerm}: {sbSTerm, sbDontBreak, 81}, return sbAny, false, 81
case sbSB8Sp | prATerm<<32:
return sbATerm, false, 81
case sbSB8Sp | prSTerm<<32:
return sbSTerm, false, 81
case sbSTerm | prSContinue<<32:
return sbAny, false, 81
case sbSTerm | prATerm<<32:
return sbATerm, false, 81
case sbSTerm | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8aClose | prSContinue<<32:
return sbAny, false, 81
case sbSB8aClose | prATerm<<32:
return sbATerm, false, 81
case sbSB8aClose | prSTerm<<32:
return sbSTerm, false, 81
case sbSB8aSp | prSContinue<<32:
return sbAny, false, 81
case sbSB8aSp | prATerm<<32:
return sbATerm, false, 81
case sbSB8aSp | prSTerm<<32:
return sbSTerm, false, 81
// SB9. // SB9.
{sbATerm, prClose}: {sbSB8Close, sbDontBreak, 90}, case sbATerm | prClose<<32:
{sbSB7, prClose}: {sbSB8Close, sbDontBreak, 90}, return sbSB8Close, false, 90
{sbSB8Close, prClose}: {sbSB8Close, sbDontBreak, 90}, case sbSB7 | prClose<<32:
{sbATerm, prSp}: {sbSB8Sp, sbDontBreak, 90}, return sbSB8Close, false, 90
{sbSB7, prSp}: {sbSB8Sp, sbDontBreak, 90}, case sbSB8Close | prClose<<32:
{sbSB8Close, prSp}: {sbSB8Sp, sbDontBreak, 90}, return sbSB8Close, false, 90
{sbSTerm, prClose}: {sbSB8aClose, sbDontBreak, 90}, case sbATerm | prSp<<32:
{sbSB8aClose, prClose}: {sbSB8aClose, sbDontBreak, 90}, return sbSB8Sp, false, 90
{sbSTerm, prSp}: {sbSB8aSp, sbDontBreak, 90}, case sbSB7 | prSp<<32:
{sbSB8aClose, prSp}: {sbSB8aSp, sbDontBreak, 90}, return sbSB8Sp, false, 90
{sbATerm, prSep}: {sbParaSep, sbDontBreak, 90}, case sbSB8Close | prSp<<32:
{sbATerm, prCR}: {sbParaSep, sbDontBreak, 90}, return sbSB8Sp, false, 90
{sbATerm, prLF}: {sbParaSep, sbDontBreak, 90}, case sbSTerm | prClose<<32:
{sbSB7, prSep}: {sbParaSep, sbDontBreak, 90}, return sbSB8aClose, false, 90
{sbSB7, prCR}: {sbParaSep, sbDontBreak, 90}, case sbSB8aClose | prClose<<32:
{sbSB7, prLF}: {sbParaSep, sbDontBreak, 90}, return sbSB8aClose, false, 90
{sbSB8Close, prSep}: {sbParaSep, sbDontBreak, 90}, case sbSTerm | prSp<<32:
{sbSB8Close, prCR}: {sbParaSep, sbDontBreak, 90}, return sbSB8aSp, false, 90
{sbSB8Close, prLF}: {sbParaSep, sbDontBreak, 90}, case sbSB8aClose | prSp<<32:
{sbSTerm, prSep}: {sbParaSep, sbDontBreak, 90}, return sbSB8aSp, false, 90
{sbSTerm, prCR}: {sbParaSep, sbDontBreak, 90}, case sbATerm | prSep<<32:
{sbSTerm, prLF}: {sbParaSep, sbDontBreak, 90}, return sbParaSep, false, 90
{sbSB8aClose, prSep}: {sbParaSep, sbDontBreak, 90}, case sbATerm | prCR<<32:
{sbSB8aClose, prCR}: {sbParaSep, sbDontBreak, 90}, return sbParaSep, false, 90
{sbSB8aClose, prLF}: {sbParaSep, sbDontBreak, 90}, case sbATerm | prLF<<32:
return sbParaSep, false, 90
case sbSB7 | prSep<<32:
return sbParaSep, false, 90
case sbSB7 | prCR<<32:
return sbParaSep, false, 90
case sbSB7 | prLF<<32:
return sbParaSep, false, 90
case sbSB8Close | prSep<<32:
return sbParaSep, false, 90
case sbSB8Close | prCR<<32:
return sbParaSep, false, 90
case sbSB8Close | prLF<<32:
return sbParaSep, false, 90
case sbSTerm | prSep<<32:
return sbParaSep, false, 90
case sbSTerm | prCR<<32:
return sbParaSep, false, 90
case sbSTerm | prLF<<32:
return sbParaSep, false, 90
case sbSB8aClose | prSep<<32:
return sbParaSep, false, 90
case sbSB8aClose | prCR<<32:
return sbParaSep, false, 90
case sbSB8aClose | prLF<<32:
return sbParaSep, false, 90
// SB10. // SB10.
{sbSB8Sp, prSp}: {sbSB8Sp, sbDontBreak, 100}, case sbSB8Sp | prSp<<32:
{sbSB8aSp, prSp}: {sbSB8aSp, sbDontBreak, 100}, return sbSB8Sp, false, 100
{sbSB8Sp, prSep}: {sbParaSep, sbDontBreak, 100}, case sbSB8aSp | prSp<<32:
{sbSB8Sp, prCR}: {sbParaSep, sbDontBreak, 100}, return sbSB8aSp, false, 100
{sbSB8Sp, prLF}: {sbParaSep, sbDontBreak, 100}, case sbSB8Sp | prSep<<32:
return sbParaSep, false, 100
case sbSB8Sp | prCR<<32:
return sbParaSep, false, 100
case sbSB8Sp | prLF<<32:
return sbParaSep, false, 100
// SB11. // SB11.
{sbATerm, prAny}: {sbAny, sbBreak, 110}, case sbATerm | prAny<<32:
{sbSB7, prAny}: {sbAny, sbBreak, 110}, return sbAny, true, 110
{sbSB8Close, prAny}: {sbAny, sbBreak, 110}, case sbSB7 | prAny<<32:
{sbSB8Sp, prAny}: {sbAny, sbBreak, 110}, return sbAny, true, 110
{sbSTerm, prAny}: {sbAny, sbBreak, 110}, case sbSB8Close | prAny<<32:
{sbSB8aClose, prAny}: {sbAny, sbBreak, 110}, return sbAny, true, 110
{sbSB8aSp, prAny}: {sbAny, sbBreak, 110}, case sbSB8Sp | prAny<<32:
return sbAny, true, 110
case sbSTerm | prAny<<32:
return sbAny, true, 110
case sbSB8aClose | prAny<<32:
return sbAny, true, 110
case sbSB8aSp | prAny<<32:
return sbAny, true, 110
// We'll always break after ParaSep due to SB4. // We'll always break after ParaSep due to SB4.
default:
return -1, false, -1
}
} }
// transitionSentenceBreakState determines the new state of the sentence break // transitionSentenceBreakState determines the new state of the sentence break
@ -141,30 +215,27 @@ func transitionSentenceBreakState(state int, r rune, b []byte, str string) (newS
// Find the applicable transition in the table. // Find the applicable transition in the table.
var rule int var rule int
transition, ok := sbTransitions[[2]int{state, nextProperty}] newState, sentenceBreak, rule = sbTransitions(state, nextProperty)
if ok { if newState < 0 {
// We have a specific transition. We'll use it.
newState, sentenceBreak, rule = transition[0], transition[1] == sbBreak, transition[2]
} else {
// No specific transition found. Try the less specific ones. // No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := sbTransitions[[2]int{state, prAny}] anyPropState, anyPropProp, anyPropRule := sbTransitions(state, prAny)
transAnyState, okAnyState := sbTransitions[[2]int{sbAny, nextProperty}] anyStateState, anyStateProp, anyStateRule := sbTransitions(sbAny, nextProperty)
if okAnyProp && okAnyState { if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions). // Both apply. We'll use a mix (see comments for grTransitions).
newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2] newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
if transAnyProp[2] < transAnyState[2] { if anyPropRule < anyStateRule {
sentenceBreak, rule = transAnyProp[1] == sbBreak, transAnyProp[2] sentenceBreak, rule = anyPropProp, anyPropRule
} }
} else if okAnyProp { } else if anyPropState >= 0 {
// We only have a specific state. // We only have a specific state.
newState, sentenceBreak, rule = transAnyProp[0], transAnyProp[1] == sbBreak, transAnyProp[2] newState, sentenceBreak, rule = anyPropState, anyPropProp, anyPropRule
// This branch will probably never be reached because okAnyState will // This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here // always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be // for future modifications to the transition map where this may not be
// true anymore. // true anymore.
} else if okAnyState { } else if anyStateState >= 0 {
// We only have a specific property. // We only have a specific property.
newState, sentenceBreak, rule = transAnyState[0], transAnyState[1] == sbBreak, transAnyState[2] newState, sentenceBreak, rule = anyStateState, anyStateProp, anyStateRule
} else { } else {
// No known transition. SB999: Any × Any. // No known transition. SB999: Any × Any.
newState, sentenceBreak, rule = sbAny, false, 9990 newState, sentenceBreak, rule = sbAny, false, 9990

View file

@ -100,7 +100,7 @@ func Step(b []byte, state int) (cluster, rest []byte, boundaries int, newState i
if len(b) <= length { // If we're already past the end, there is nothing else to parse. if len(b) <= length { // If we're already past the end, there is nothing else to parse.
var prop int var prop int
if state < 0 { if state < 0 {
prop = property(graphemeCodePoints, r) prop = propertyGraphemes(r)
} else { } else {
prop = state >> shiftPropState prop = state >> shiftPropState
} }
@ -179,7 +179,7 @@ func StepString(str string, state int) (cluster, rest string, boundaries int, ne
// Extract the first rune. // Extract the first rune.
r, length := utf8.DecodeRuneInString(str) r, length := utf8.DecodeRuneInString(str)
if len(str) <= length { // If we're already past the end, there is nothing else to parse. if len(str) <= length { // If we're already past the end, there is nothing else to parse.
prop := property(graphemeCodePoints, r) prop := propertyGraphemes(r)
return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState) return str, "", LineMustBreak | (1 << shiftWord) | (1 << shiftSentence) | (runeWidth(r, prop) << ShiftWidth), grAny | (wbAny << shiftWordState) | (sbAny << shiftSentenceState) | (lbAny << shiftLineState)
} }

View file

@ -1,5 +1,10 @@
package uniseg package uniseg
// EastAsianAmbiguousWidth specifies the monospace width for East Asian
// characters classified as Ambiguous. The default is 1 but some rare fonts
// render them with a width of 2.
var EastAsianAmbiguousWidth = 1
// runeWidth returns the monospace width for the given rune. The provided // runeWidth returns the monospace width for the given rune. The provided
// grapheme property is a value mapped by the [graphemeCodePoints] table. // grapheme property is a value mapped by the [graphemeCodePoints] table.
// //
@ -33,9 +38,11 @@ func runeWidth(r rune, graphemeProperty int) int {
return 4 return 4
} }
switch property(eastAsianWidth, r) { switch propertyEastAsianWidth(r) {
case prW, prF: case prW, prF:
return 2 return 2
case prA:
return EastAsianAmbiguousWidth
} }
return 1 return 1

View file

@ -1,13 +1,13 @@
package uniseg
// Code generated via go generate from gen_properties.go. DO NOT EDIT. // Code generated via go generate from gen_properties.go. DO NOT EDIT.
package uniseg
// workBreakCodePoints are taken from // workBreakCodePoints are taken from
// https://www.unicode.org/Public/14.0.0/ucd/auxiliary/WordBreakProperty.txt // https://www.unicode.org/Public/15.0.0/ucd/auxiliary/WordBreakProperty.txt
// and // and
// https://unicode.org/Public/14.0.0/ucd/emoji/emoji-data.txt // https://unicode.org/Public/15.0.0/ucd/emoji/emoji-data.txt
// ("Extended_Pictographic" only) // ("Extended_Pictographic" only)
// on September 10, 2022. See https://www.unicode.org/license.html for the Unicode // on September 5, 2023. See https://www.unicode.org/license.html for the Unicode
// license agreement. // license agreement.
var workBreakCodePoints = [][3]int{ var workBreakCodePoints = [][3]int{
{0x000A, 0x000A, prLF}, // Cc <control-000A> {0x000A, 0x000A, prLF}, // Cc <control-000A>
@ -318,6 +318,7 @@ var workBreakCodePoints = [][3]int{
{0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL {0x0CE2, 0x0CE3, prExtend}, // Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL
{0x0CE6, 0x0CEF, prNumeric}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE {0x0CE6, 0x0CEF, prNumeric}, // Nd [10] KANNADA DIGIT ZERO..KANNADA DIGIT NINE
{0x0CF1, 0x0CF2, prALetter}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA {0x0CF1, 0x0CF2, prALetter}, // Lo [2] KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
{0x0CF3, 0x0CF3, prExtend}, // Mc KANNADA SIGN COMBINING ANUSVARA ABOVE RIGHT
{0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU {0x0D00, 0x0D01, prExtend}, // Mn [2] MALAYALAM SIGN COMBINING ANUSVARA ABOVE..MALAYALAM SIGN CANDRABINDU
{0x0D02, 0x0D03, prExtend}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA {0x0D02, 0x0D03, prExtend}, // Mc [2] MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
{0x0D04, 0x0D0C, prALetter}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L {0x0D04, 0x0D0C, prALetter}, // Lo [9] MALAYALAM LETTER VEDIC ANUSVARA..MALAYALAM LETTER VOCALIC L
@ -357,7 +358,7 @@ var workBreakCodePoints = [][3]int{
{0x0E50, 0x0E59, prNumeric}, // Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE {0x0E50, 0x0E59, prNumeric}, // Nd [10] THAI DIGIT ZERO..THAI DIGIT NINE
{0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN {0x0EB1, 0x0EB1, prExtend}, // Mn LAO VOWEL SIGN MAI KAN
{0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO {0x0EB4, 0x0EBC, prExtend}, // Mn [9] LAO VOWEL SIGN I..LAO SEMIVOWEL SIGN LO
{0x0EC8, 0x0ECD, prExtend}, // Mn [6] LAO TONE MAI EK..LAO NIGGAHITA {0x0EC8, 0x0ECE, prExtend}, // Mn [7] LAO TONE MAI EK..LAO YAMAKKAN
{0x0ED0, 0x0ED9, prNumeric}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE {0x0ED0, 0x0ED9, prNumeric}, // Nd [10] LAO DIGIT ZERO..LAO DIGIT NINE
{0x0F00, 0x0F00, prALetter}, // Lo TIBETAN SYLLABLE OM {0x0F00, 0x0F00, prALetter}, // Lo TIBETAN SYLLABLE OM
{0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS {0x0F18, 0x0F19, prExtend}, // Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
@ -1093,6 +1094,7 @@ var workBreakCodePoints = [][3]int{
{0x10E80, 0x10EA9, prALetter}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET {0x10E80, 0x10EA9, prALetter}, // Lo [42] YEZIDI LETTER ELIF..YEZIDI LETTER ET
{0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK {0x10EAB, 0x10EAC, prExtend}, // Mn [2] YEZIDI COMBINING HAMZA MARK..YEZIDI COMBINING MADDA MARK
{0x10EB0, 0x10EB1, prALetter}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE {0x10EB0, 0x10EB1, prALetter}, // Lo [2] YEZIDI LETTER LAM WITH DOT ABOVE..YEZIDI LETTER YOT WITH CIRCUMFLEX ABOVE
{0x10EFD, 0x10EFF, prExtend}, // Mn [3] ARABIC SMALL LOW WORD SAKTA..ARABIC SMALL LOW WORD MADDA
{0x10F00, 0x10F1C, prALetter}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL {0x10F00, 0x10F1C, prALetter}, // Lo [29] OLD SOGDIAN LETTER ALEPH..OLD SOGDIAN LETTER FINAL TAW WITH VERTICAL TAIL
{0x10F27, 0x10F27, prALetter}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH {0x10F27, 0x10F27, prALetter}, // Lo OLD SOGDIAN LIGATURE AYIN-DALETH
{0x10F30, 0x10F45, prALetter}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN {0x10F30, 0x10F45, prALetter}, // Lo [22] SOGDIAN LETTER ALEPH..SOGDIAN INDEPENDENT SHIN
@ -1157,6 +1159,8 @@ var workBreakCodePoints = [][3]int{
{0x11235, 0x11235, prExtend}, // Mc KHOJKI SIGN VIRAMA {0x11235, 0x11235, prExtend}, // Mc KHOJKI SIGN VIRAMA
{0x11236, 0x11237, prExtend}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA {0x11236, 0x11237, prExtend}, // Mn [2] KHOJKI SIGN NUKTA..KHOJKI SIGN SHADDA
{0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN {0x1123E, 0x1123E, prExtend}, // Mn KHOJKI SIGN SUKUN
{0x1123F, 0x11240, prALetter}, // Lo [2] KHOJKI LETTER QA..KHOJKI LETTER SHORT I
{0x11241, 0x11241, prExtend}, // Mn KHOJKI VOWEL SIGN VOCALIC R
{0x11280, 0x11286, prALetter}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA {0x11280, 0x11286, prALetter}, // Lo [7] MULTANI LETTER A..MULTANI LETTER GA
{0x11288, 0x11288, prALetter}, // Lo MULTANI LETTER GHA {0x11288, 0x11288, prALetter}, // Lo MULTANI LETTER GHA
{0x1128A, 0x1128D, prALetter}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA {0x1128A, 0x1128D, prALetter}, // Lo [4] MULTANI LETTER CA..MULTANI LETTER JJA
@ -1337,13 +1341,28 @@ var workBreakCodePoints = [][3]int{
{0x11EE0, 0x11EF2, prALetter}, // Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA {0x11EE0, 0x11EF2, prALetter}, // Lo [19] MAKASAR LETTER KA..MAKASAR ANGKA
{0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U {0x11EF3, 0x11EF4, prExtend}, // Mn [2] MAKASAR VOWEL SIGN I..MAKASAR VOWEL SIGN U
{0x11EF5, 0x11EF6, prExtend}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O {0x11EF5, 0x11EF6, prExtend}, // Mc [2] MAKASAR VOWEL SIGN E..MAKASAR VOWEL SIGN O
{0x11F00, 0x11F01, prExtend}, // Mn [2] KAWI SIGN CANDRABINDU..KAWI SIGN ANUSVARA
{0x11F02, 0x11F02, prALetter}, // Lo KAWI SIGN REPHA
{0x11F03, 0x11F03, prExtend}, // Mc KAWI SIGN VISARGA
{0x11F04, 0x11F10, prALetter}, // Lo [13] KAWI LETTER A..KAWI LETTER O
{0x11F12, 0x11F33, prALetter}, // Lo [34] KAWI LETTER KA..KAWI LETTER JNYA
{0x11F34, 0x11F35, prExtend}, // Mc [2] KAWI VOWEL SIGN AA..KAWI VOWEL SIGN ALTERNATE AA
{0x11F36, 0x11F3A, prExtend}, // Mn [5] KAWI VOWEL SIGN I..KAWI VOWEL SIGN VOCALIC R
{0x11F3E, 0x11F3F, prExtend}, // Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI
{0x11F40, 0x11F40, prExtend}, // Mn KAWI VOWEL SIGN EU
{0x11F41, 0x11F41, prExtend}, // Mc KAWI SIGN KILLER
{0x11F42, 0x11F42, prExtend}, // Mn KAWI CONJOINER
{0x11F50, 0x11F59, prNumeric}, // Nd [10] KAWI DIGIT ZERO..KAWI DIGIT NINE
{0x11FB0, 0x11FB0, prALetter}, // Lo LISU LETTER YHA {0x11FB0, 0x11FB0, prALetter}, // Lo LISU LETTER YHA
{0x12000, 0x12399, prALetter}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U {0x12000, 0x12399, prALetter}, // Lo [922] CUNEIFORM SIGN A..CUNEIFORM SIGN U U
{0x12400, 0x1246E, prALetter}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM {0x12400, 0x1246E, prALetter}, // Nl [111] CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN NINE U VARIANT FORM
{0x12480, 0x12543, prALetter}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU {0x12480, 0x12543, prALetter}, // Lo [196] CUNEIFORM SIGN AB TIMES NUN TENU..CUNEIFORM SIGN ZU5 TIMES THREE DISH TENU
{0x12F90, 0x12FF0, prALetter}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114 {0x12F90, 0x12FF0, prALetter}, // Lo [97] CYPRO-MINOAN SIGN CM001..CYPRO-MINOAN SIGN CM114
{0x13000, 0x1342E, prALetter}, // Lo [1071] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032 {0x13000, 0x1342F, prALetter}, // Lo [1072] EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH V011D
{0x13430, 0x13438, prFormat}, // Cf [9] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END SEGMENT {0x13430, 0x1343F, prFormat}, // Cf [16] EGYPTIAN HIEROGLYPH VERTICAL JOINER..EGYPTIAN HIEROGLYPH END WALLED ENCLOSURE
{0x13440, 0x13440, prExtend}, // Mn EGYPTIAN HIEROGLYPH MIRROR HORIZONTALLY
{0x13441, 0x13446, prALetter}, // Lo [6] EGYPTIAN HIEROGLYPH FULL BLANK..EGYPTIAN HIEROGLYPH WIDE LOST SIGN
{0x13447, 0x13455, prExtend}, // Mn [15] EGYPTIAN HIEROGLYPH MODIFIER DAMAGED AT TOP START..EGYPTIAN HIEROGLYPH MODIFIER DAMAGED
{0x14400, 0x14646, prALetter}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530 {0x14400, 0x14646, prALetter}, // Lo [583] ANATOLIAN HIEROGLYPH A001..ANATOLIAN HIEROGLYPH A530
{0x16800, 0x16A38, prALetter}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ {0x16800, 0x16A38, prALetter}, // Lo [569] BAMUM LETTER PHASE-A NGKUE MFON..BAMUM LETTER PHASE-F VUEQ
{0x16A40, 0x16A5E, prALetter}, // Lo [31] MRO LETTER TA..MRO LETTER TEK {0x16A40, 0x16A5E, prALetter}, // Lo [31] MRO LETTER TA..MRO LETTER TEK
@ -1374,6 +1393,7 @@ var workBreakCodePoints = [][3]int{
{0x1AFFD, 0x1AFFE, prKatakana}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8 {0x1AFFD, 0x1AFFE, prKatakana}, // Lm [2] KATAKANA LETTER MINNAN NASALIZED TONE-7..KATAKANA LETTER MINNAN NASALIZED TONE-8
{0x1B000, 0x1B000, prKatakana}, // Lo KATAKANA LETTER ARCHAIC E {0x1B000, 0x1B000, prKatakana}, // Lo KATAKANA LETTER ARCHAIC E
{0x1B120, 0x1B122, prKatakana}, // Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU {0x1B120, 0x1B122, prKatakana}, // Lo [3] KATAKANA LETTER ARCHAIC YI..KATAKANA LETTER ARCHAIC WU
{0x1B155, 0x1B155, prKatakana}, // Lo KATAKANA LETTER SMALL KO
{0x1B164, 0x1B167, prKatakana}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N {0x1B164, 0x1B167, prKatakana}, // Lo [4] KATAKANA LETTER SMALL WI..KATAKANA LETTER SMALL N
{0x1BC00, 0x1BC6A, prALetter}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M {0x1BC00, 0x1BC6A, prALetter}, // Lo [107] DUPLOYAN LETTER H..DUPLOYAN LETTER VOCALIC M
{0x1BC70, 0x1BC7C, prALetter}, // Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK {0x1BC70, 0x1BC7C, prALetter}, // Lo [13] DUPLOYAN AFFIX LEFT HORIZONTAL SECANT..DUPLOYAN AFFIX ATTACHED TANGENT HOOK
@ -1431,11 +1451,14 @@ var workBreakCodePoints = [][3]int{
{0x1DF00, 0x1DF09, prALetter}, // L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK {0x1DF00, 0x1DF09, prALetter}, // L& [10] LATIN SMALL LETTER FENG DIGRAPH WITH TRILL..LATIN SMALL LETTER T WITH HOOK AND RETROFLEX HOOK
{0x1DF0A, 0x1DF0A, prALetter}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK {0x1DF0A, 0x1DF0A, prALetter}, // Lo LATIN LETTER RETROFLEX CLICK WITH RETROFLEX HOOK
{0x1DF0B, 0x1DF1E, prALetter}, // L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL {0x1DF0B, 0x1DF1E, prALetter}, // L& [20] LATIN SMALL LETTER ESH WITH DOUBLE BAR..LATIN SMALL LETTER S WITH CURL
{0x1DF25, 0x1DF2A, prALetter}, // L& [6] LATIN SMALL LETTER D WITH MID-HEIGHT LEFT HOOK..LATIN SMALL LETTER T WITH MID-HEIGHT LEFT HOOK
{0x1E000, 0x1E006, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE {0x1E000, 0x1E006, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER AZU..COMBINING GLAGOLITIC LETTER ZHIVETE
{0x1E008, 0x1E018, prExtend}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU {0x1E008, 0x1E018, prExtend}, // Mn [17] COMBINING GLAGOLITIC LETTER ZEMLJA..COMBINING GLAGOLITIC LETTER HERU
{0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI {0x1E01B, 0x1E021, prExtend}, // Mn [7] COMBINING GLAGOLITIC LETTER SHTA..COMBINING GLAGOLITIC LETTER YATI
{0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS {0x1E023, 0x1E024, prExtend}, // Mn [2] COMBINING GLAGOLITIC LETTER YU..COMBINING GLAGOLITIC LETTER SMALL YUS
{0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA {0x1E026, 0x1E02A, prExtend}, // Mn [5] COMBINING GLAGOLITIC LETTER YO..COMBINING GLAGOLITIC LETTER FITA
{0x1E030, 0x1E06D, prALetter}, // Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE
{0x1E08F, 0x1E08F, prExtend}, // Mn COMBINING CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
{0x1E100, 0x1E12C, prALetter}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W {0x1E100, 0x1E12C, prALetter}, // Lo [45] NYIAKENG PUACHUE HMONG LETTER MA..NYIAKENG PUACHUE HMONG LETTER W
{0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D {0x1E130, 0x1E136, prExtend}, // Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D
{0x1E137, 0x1E13D, prALetter}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER {0x1E137, 0x1E13D, prALetter}, // Lm [7] NYIAKENG PUACHUE HMONG SIGN FOR PERSON..NYIAKENG PUACHUE HMONG SYLLABLE LENGTHENER
@ -1446,6 +1469,10 @@ var workBreakCodePoints = [][3]int{
{0x1E2C0, 0x1E2EB, prALetter}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH {0x1E2C0, 0x1E2EB, prALetter}, // Lo [44] WANCHO LETTER AA..WANCHO LETTER YIH
{0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI {0x1E2EC, 0x1E2EF, prExtend}, // Mn [4] WANCHO TONE TUP..WANCHO TONE KOINI
{0x1E2F0, 0x1E2F9, prNumeric}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE {0x1E2F0, 0x1E2F9, prNumeric}, // Nd [10] WANCHO DIGIT ZERO..WANCHO DIGIT NINE
{0x1E4D0, 0x1E4EA, prALetter}, // Lo [27] NAG MUNDARI LETTER O..NAG MUNDARI LETTER ELL
{0x1E4EB, 0x1E4EB, prALetter}, // Lm NAG MUNDARI SIGN OJOD
{0x1E4EC, 0x1E4EF, prExtend}, // Mn [4] NAG MUNDARI SIGN MUHOR..NAG MUNDARI SIGN SUTUH
{0x1E4F0, 0x1E4F9, prNumeric}, // Nd [10] NAG MUNDARI DIGIT ZERO..NAG MUNDARI DIGIT NINE
{0x1E7E0, 0x1E7E6, prALetter}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO {0x1E7E0, 0x1E7E6, prALetter}, // Lo [7] ETHIOPIC SYLLABLE HHYA..ETHIOPIC SYLLABLE HHYO
{0x1E7E8, 0x1E7EB, prALetter}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE {0x1E7E8, 0x1E7EB, prALetter}, // Lo [4] ETHIOPIC SYLLABLE GURAGE HHWA..ETHIOPIC SYLLABLE HHWE
{0x1E7ED, 0x1E7EE, prALetter}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE {0x1E7ED, 0x1E7EE, prALetter}, // Lo [2] ETHIOPIC SYLLABLE GURAGE MWI..ETHIOPIC SYLLABLE GURAGE MWEE
@ -1740,7 +1767,8 @@ var workBreakCodePoints = [][3]int{
{0x1F6D3, 0x1F6D4, prExtendedPictographic}, // E0.0 [2] (🛓..🛔) STUPA..PAGODA {0x1F6D3, 0x1F6D4, prExtendedPictographic}, // E0.0 [2] (🛓..🛔) STUPA..PAGODA
{0x1F6D5, 0x1F6D5, prExtendedPictographic}, // E12.0 [1] (🛕) hindu temple {0x1F6D5, 0x1F6D5, prExtendedPictographic}, // E12.0 [1] (🛕) hindu temple
{0x1F6D6, 0x1F6D7, prExtendedPictographic}, // E13.0 [2] (🛖..🛗) hut..elevator {0x1F6D6, 0x1F6D7, prExtendedPictographic}, // E13.0 [2] (🛖..🛗) hut..elevator
{0x1F6D8, 0x1F6DC, prExtendedPictographic}, // E0.0 [5] (🛘..🛜) <reserved-1F6D8>..<reserved-1F6DC> {0x1F6D8, 0x1F6DB, prExtendedPictographic}, // E0.0 [4] (🛘..🛛) <reserved-1F6D8>..<reserved-1F6DB>
{0x1F6DC, 0x1F6DC, prExtendedPictographic}, // E15.0 [1] (🛜) wireless
{0x1F6DD, 0x1F6DF, prExtendedPictographic}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy {0x1F6DD, 0x1F6DF, prExtendedPictographic}, // E14.0 [3] (🛝..🛟) playground slide..ring buoy
{0x1F6E0, 0x1F6E5, prExtendedPictographic}, // E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat {0x1F6E0, 0x1F6E5, prExtendedPictographic}, // E0.7 [6] (🛠️..🛥️) hammer and wrench..motor boat
{0x1F6E6, 0x1F6E8, prExtendedPictographic}, // E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE {0x1F6E6, 0x1F6E8, prExtendedPictographic}, // E0.0 [3] (🛦..🛨) UP-POINTING MILITARY AIRPLANE..UP-POINTING SMALL AIRPLANE
@ -1757,7 +1785,7 @@ var workBreakCodePoints = [][3]int{
{0x1F6FA, 0x1F6FA, prExtendedPictographic}, // E12.0 [1] (🛺) auto rickshaw {0x1F6FA, 0x1F6FA, prExtendedPictographic}, // E12.0 [1] (🛺) auto rickshaw
{0x1F6FB, 0x1F6FC, prExtendedPictographic}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate {0x1F6FB, 0x1F6FC, prExtendedPictographic}, // E13.0 [2] (🛻..🛼) pickup truck..roller skate
{0x1F6FD, 0x1F6FF, prExtendedPictographic}, // E0.0 [3] (🛽..🛿) <reserved-1F6FD>..<reserved-1F6FF> {0x1F6FD, 0x1F6FF, prExtendedPictographic}, // E0.0 [3] (🛽..🛿) <reserved-1F6FD>..<reserved-1F6FF>
{0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) <reserved-1F774>..<reserved-1F77F> {0x1F774, 0x1F77F, prExtendedPictographic}, // E0.0 [12] (🝴..🝿) LOT OF FORTUNE..ORCUS
{0x1F7D5, 0x1F7DF, prExtendedPictographic}, // E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<reserved-1F7DF> {0x1F7D5, 0x1F7DF, prExtendedPictographic}, // E0.0 [11] (🟕..🟟) CIRCLED TRIANGLE..<reserved-1F7DF>
{0x1F7E0, 0x1F7EB, prExtendedPictographic}, // E12.0 [12] (🟠..🟫) orange circle..brown square {0x1F7E0, 0x1F7EB, prExtendedPictographic}, // E12.0 [12] (🟠..🟫) orange circle..brown square
{0x1F7EC, 0x1F7EF, prExtendedPictographic}, // E0.0 [4] (🟬..🟯) <reserved-1F7EC>..<reserved-1F7EF> {0x1F7EC, 0x1F7EF, prExtendedPictographic}, // E0.0 [4] (🟬..🟯) <reserved-1F7EC>..<reserved-1F7EF>
@ -1816,30 +1844,37 @@ var workBreakCodePoints = [][3]int{
{0x1FA00, 0x1FA6F, prExtendedPictographic}, // E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING..<reserved-1FA6F> {0x1FA00, 0x1FA6F, prExtendedPictographic}, // E0.0 [112] (🨀..🩯) NEUTRAL CHESS KING..<reserved-1FA6F>
{0x1FA70, 0x1FA73, prExtendedPictographic}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts {0x1FA70, 0x1FA73, prExtendedPictographic}, // E12.0 [4] (🩰..🩳) ballet shoes..shorts
{0x1FA74, 0x1FA74, prExtendedPictographic}, // E13.0 [1] (🩴) thong sandal {0x1FA74, 0x1FA74, prExtendedPictographic}, // E13.0 [1] (🩴) thong sandal
{0x1FA75, 0x1FA77, prExtendedPictographic}, // E0.0 [3] (🩵..🩷) <reserved-1FA75>..<reserved-1FA77> {0x1FA75, 0x1FA77, prExtendedPictographic}, // E15.0 [3] (🩵..🩷) light blue heart..pink heart
{0x1FA78, 0x1FA7A, prExtendedPictographic}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope {0x1FA78, 0x1FA7A, prExtendedPictographic}, // E12.0 [3] (🩸..🩺) drop of blood..stethoscope
{0x1FA7B, 0x1FA7C, prExtendedPictographic}, // E14.0 [2] (🩻..🩼) x-ray..crutch {0x1FA7B, 0x1FA7C, prExtendedPictographic}, // E14.0 [2] (🩻..🩼) x-ray..crutch
{0x1FA7D, 0x1FA7F, prExtendedPictographic}, // E0.0 [3] (🩽..🩿) <reserved-1FA7D>..<reserved-1FA7F> {0x1FA7D, 0x1FA7F, prExtendedPictographic}, // E0.0 [3] (🩽..🩿) <reserved-1FA7D>..<reserved-1FA7F>
{0x1FA80, 0x1FA82, prExtendedPictographic}, // E12.0 [3] (🪀..🪂) yo-yo..parachute {0x1FA80, 0x1FA82, prExtendedPictographic}, // E12.0 [3] (🪀..🪂) yo-yo..parachute
{0x1FA83, 0x1FA86, prExtendedPictographic}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls {0x1FA83, 0x1FA86, prExtendedPictographic}, // E13.0 [4] (🪃..🪆) boomerang..nesting dolls
{0x1FA87, 0x1FA8F, prExtendedPictographic}, // E0.0 [9] (🪇..🪏) <reserved-1FA87>..<reserved-1FA8F> {0x1FA87, 0x1FA88, prExtendedPictographic}, // E15.0 [2] (🪇..🪈) maracas..flute
{0x1FA89, 0x1FA8F, prExtendedPictographic}, // E0.0 [7] (🪉..🪏) <reserved-1FA89>..<reserved-1FA8F>
{0x1FA90, 0x1FA95, prExtendedPictographic}, // E12.0 [6] (🪐..🪕) ringed planet..banjo {0x1FA90, 0x1FA95, prExtendedPictographic}, // E12.0 [6] (🪐..🪕) ringed planet..banjo
{0x1FA96, 0x1FAA8, prExtendedPictographic}, // E13.0 [19] (🪖..🪨) military helmet..rock {0x1FA96, 0x1FAA8, prExtendedPictographic}, // E13.0 [19] (🪖..🪨) military helmet..rock
{0x1FAA9, 0x1FAAC, prExtendedPictographic}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa {0x1FAA9, 0x1FAAC, prExtendedPictographic}, // E14.0 [4] (🪩..🪬) mirror ball..hamsa
{0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E0.0 [3] (🪭..🪯) <reserved-1FAAD>..<reserved-1FAAF> {0x1FAAD, 0x1FAAF, prExtendedPictographic}, // E15.0 [3] (🪭..🪯) folding hand fan..khanda
{0x1FAB0, 0x1FAB6, prExtendedPictographic}, // E13.0 [7] (🪰..🪶) fly..feather {0x1FAB0, 0x1FAB6, prExtendedPictographic}, // E13.0 [7] (🪰..🪶) fly..feather
{0x1FAB7, 0x1FABA, prExtendedPictographic}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs {0x1FAB7, 0x1FABA, prExtendedPictographic}, // E14.0 [4] (🪷..🪺) lotus..nest with eggs
{0x1FABB, 0x1FABF, prExtendedPictographic}, // E0.0 [5] (🪻..🪿) <reserved-1FABB>..<reserved-1FABF> {0x1FABB, 0x1FABD, prExtendedPictographic}, // E15.0 [3] (🪻..🪽) hyacinth..wing
{0x1FABE, 0x1FABE, prExtendedPictographic}, // E0.0 [1] (🪾) <reserved-1FABE>
{0x1FABF, 0x1FABF, prExtendedPictographic}, // E15.0 [1] (🪿) goose
{0x1FAC0, 0x1FAC2, prExtendedPictographic}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging {0x1FAC0, 0x1FAC2, prExtendedPictographic}, // E13.0 [3] (🫀..🫂) anatomical heart..people hugging
{0x1FAC3, 0x1FAC5, prExtendedPictographic}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown {0x1FAC3, 0x1FAC5, prExtendedPictographic}, // E14.0 [3] (🫃..🫅) pregnant man..person with crown
{0x1FAC6, 0x1FACF, prExtendedPictographic}, // E0.0 [10] (🫆..🫏) <reserved-1FAC6>..<reserved-1FACF> {0x1FAC6, 0x1FACD, prExtendedPictographic}, // E0.0 [8] (🫆..🫍) <reserved-1FAC6>..<reserved-1FACD>
{0x1FACE, 0x1FACF, prExtendedPictographic}, // E15.0 [2] (🫎..🫏) moose..donkey
{0x1FAD0, 0x1FAD6, prExtendedPictographic}, // E13.0 [7] (🫐..🫖) blueberries..teapot {0x1FAD0, 0x1FAD6, prExtendedPictographic}, // E13.0 [7] (🫐..🫖) blueberries..teapot
{0x1FAD7, 0x1FAD9, prExtendedPictographic}, // E14.0 [3] (🫗..🫙) pouring liquid..jar {0x1FAD7, 0x1FAD9, prExtendedPictographic}, // E14.0 [3] (🫗..🫙) pouring liquid..jar
{0x1FADA, 0x1FADF, prExtendedPictographic}, // E0.0 [6] (🫚..🫟) <reserved-1FADA>..<reserved-1FADF> {0x1FADA, 0x1FADB, prExtendedPictographic}, // E15.0 [2] (🫚..🫛) ginger root..pea pod
{0x1FADC, 0x1FADF, prExtendedPictographic}, // E0.0 [4] (🫜..🫟) <reserved-1FADC>..<reserved-1FADF>
{0x1FAE0, 0x1FAE7, prExtendedPictographic}, // E14.0 [8] (🫠..🫧) melting face..bubbles {0x1FAE0, 0x1FAE7, prExtendedPictographic}, // E14.0 [8] (🫠..🫧) melting face..bubbles
{0x1FAE8, 0x1FAEF, prExtendedPictographic}, // E0.0 [8] (🫨..🫯) <reserved-1FAE8>..<reserved-1FAEF> {0x1FAE8, 0x1FAE8, prExtendedPictographic}, // E15.0 [1] (🫨) shaking face
{0x1FAE9, 0x1FAEF, prExtendedPictographic}, // E0.0 [7] (🫩..🫯) <reserved-1FAE9>..<reserved-1FAEF>
{0x1FAF0, 0x1FAF6, prExtendedPictographic}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands {0x1FAF0, 0x1FAF6, prExtendedPictographic}, // E14.0 [7] (🫰..🫶) hand with index finger and thumb crossed..heart hands
{0x1FAF7, 0x1FAFF, prExtendedPictographic}, // E0.0 [9] (🫷..🫿) <reserved-1FAF7>..<reserved-1FAFF> {0x1FAF7, 0x1FAF8, prExtendedPictographic}, // E15.0 [2] (🫷..🫸) leftwards pushing hand..rightwards pushing hand
{0x1FAF9, 0x1FAFF, prExtendedPictographic}, // E0.0 [7] (🫹..🫿) <reserved-1FAF9>..<reserved-1FAFF>
{0x1FBF0, 0x1FBF9, prNumeric}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE {0x1FBF0, 0x1FBF9, prNumeric}, // Nd [10] SEGMENTED DIGIT ZERO..SEGMENTED DIGIT NINE
{0x1FC00, 0x1FFFD, prExtendedPictographic}, // E0.0[1022] (🰀..🿽) <reserved-1FC00>..<reserved-1FFFD> {0x1FC00, 0x1FFFD, prExtendedPictographic}, // E0.0[1022] (🰀..🿽) <reserved-1FC00>..<reserved-1FFFD>
{0xE0001, 0xE0001, prFormat}, // Cf LANGUAGE TAG {0xE0001, 0xE0001, prFormat}, // Cf LANGUAGE TAG

View file

@ -22,82 +22,121 @@ const (
wbZWJBit = 16 // This bit is set for any states followed by at least one zero-width joiner (see WB4 and WB3c). wbZWJBit = 16 // This bit is set for any states followed by at least one zero-width joiner (see WB4 and WB3c).
) )
// The word break parser's breaking instructions. // wbTransitions implements the word break parser's state transitions. It's
const ( // anologous to [grTransitions], see comments there for details.
wbDontBreak = iota //
wbBreak // Unicode version 15.0.0.
) func wbTransitions(state, prop int) (newState int, wordBreak bool, rule int) {
switch uint64(state) | uint64(prop)<<32 {
// The word break parser's state transitions. It's anologous to grTransitions,
// see comments there for details. Unicode version 14.0.0.
var wbTransitions = map[[2]int][3]int{
// WB3b. // WB3b.
{wbAny, prNewline}: {wbNewline, wbBreak, 32}, case wbAny | prNewline<<32:
{wbAny, prCR}: {wbCR, wbBreak, 32}, return wbNewline, true, 32
{wbAny, prLF}: {wbLF, wbBreak, 32}, case wbAny | prCR<<32:
return wbCR, true, 32
case wbAny | prLF<<32:
return wbLF, true, 32
// WB3a. // WB3a.
{wbNewline, prAny}: {wbAny, wbBreak, 31}, case wbNewline | prAny<<32:
{wbCR, prAny}: {wbAny, wbBreak, 31}, return wbAny, true, 31
{wbLF, prAny}: {wbAny, wbBreak, 31}, case wbCR | prAny<<32:
return wbAny, true, 31
case wbLF | prAny<<32:
return wbAny, true, 31
// WB3. // WB3.
{wbCR, prLF}: {wbLF, wbDontBreak, 30}, case wbCR | prLF<<32:
return wbLF, false, 30
// WB3d. // WB3d.
{wbAny, prWSegSpace}: {wbWSegSpace, wbBreak, 9990}, case wbAny | prWSegSpace<<32:
{wbWSegSpace, prWSegSpace}: {wbWSegSpace, wbDontBreak, 34}, return wbWSegSpace, true, 9990
case wbWSegSpace | prWSegSpace<<32:
return wbWSegSpace, false, 34
// WB5. // WB5.
{wbAny, prALetter}: {wbALetter, wbBreak, 9990}, case wbAny | prALetter<<32:
{wbAny, prHebrewLetter}: {wbHebrewLetter, wbBreak, 9990}, return wbALetter, true, 9990
{wbALetter, prALetter}: {wbALetter, wbDontBreak, 50}, case wbAny | prHebrewLetter<<32:
{wbALetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50}, return wbHebrewLetter, true, 9990
{wbHebrewLetter, prALetter}: {wbALetter, wbDontBreak, 50}, case wbALetter | prALetter<<32:
{wbHebrewLetter, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 50}, return wbALetter, false, 50
case wbALetter | prHebrewLetter<<32:
return wbHebrewLetter, false, 50
case wbHebrewLetter | prALetter<<32:
return wbALetter, false, 50
case wbHebrewLetter | prHebrewLetter<<32:
return wbHebrewLetter, false, 50
// WB7. Transitions to wbWB7 handled by transitionWordBreakState(). // WB7. Transitions to wbWB7 handled by transitionWordBreakState().
{wbWB7, prALetter}: {wbALetter, wbDontBreak, 70}, case wbWB7 | prALetter<<32:
{wbWB7, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 70}, return wbALetter, false, 70
case wbWB7 | prHebrewLetter<<32:
return wbHebrewLetter, false, 70
// WB7a. // WB7a.
{wbHebrewLetter, prSingleQuote}: {wbAny, wbDontBreak, 71}, case wbHebrewLetter | prSingleQuote<<32:
return wbAny, false, 71
// WB7c. Transitions to wbWB7c handled by transitionWordBreakState(). // WB7c. Transitions to wbWB7c handled by transitionWordBreakState().
{wbWB7c, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 73}, case wbWB7c | prHebrewLetter<<32:
return wbHebrewLetter, false, 73
// WB8. // WB8.
{wbAny, prNumeric}: {wbNumeric, wbBreak, 9990}, case wbAny | prNumeric<<32:
{wbNumeric, prNumeric}: {wbNumeric, wbDontBreak, 80}, return wbNumeric, true, 9990
case wbNumeric | prNumeric<<32:
return wbNumeric, false, 80
// WB9. // WB9.
{wbALetter, prNumeric}: {wbNumeric, wbDontBreak, 90}, case wbALetter | prNumeric<<32:
{wbHebrewLetter, prNumeric}: {wbNumeric, wbDontBreak, 90}, return wbNumeric, false, 90
case wbHebrewLetter | prNumeric<<32:
return wbNumeric, false, 90
// WB10. // WB10.
{wbNumeric, prALetter}: {wbALetter, wbDontBreak, 100}, case wbNumeric | prALetter<<32:
{wbNumeric, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 100}, return wbALetter, false, 100
case wbNumeric | prHebrewLetter<<32:
return wbHebrewLetter, false, 100
// WB11. Transitions to wbWB11 handled by transitionWordBreakState(). // WB11. Transitions to wbWB11 handled by transitionWordBreakState().
{wbWB11, prNumeric}: {wbNumeric, wbDontBreak, 110}, case wbWB11 | prNumeric<<32:
return wbNumeric, false, 110
// WB13. // WB13.
{wbAny, prKatakana}: {wbKatakana, wbBreak, 9990}, case wbAny | prKatakana<<32:
{wbKatakana, prKatakana}: {wbKatakana, wbDontBreak, 130}, return wbKatakana, true, 9990
case wbKatakana | prKatakana<<32:
return wbKatakana, false, 130
// WB13a. // WB13a.
{wbAny, prExtendNumLet}: {wbExtendNumLet, wbBreak, 9990}, case wbAny | prExtendNumLet<<32:
{wbALetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, return wbExtendNumLet, true, 9990
{wbHebrewLetter, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, case wbALetter | prExtendNumLet<<32:
{wbNumeric, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, return wbExtendNumLet, false, 131
{wbKatakana, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, case wbHebrewLetter | prExtendNumLet<<32:
{wbExtendNumLet, prExtendNumLet}: {wbExtendNumLet, wbDontBreak, 131}, return wbExtendNumLet, false, 131
case wbNumeric | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbKatakana | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
case wbExtendNumLet | prExtendNumLet<<32:
return wbExtendNumLet, false, 131
// WB13b. // WB13b.
{wbExtendNumLet, prALetter}: {wbALetter, wbDontBreak, 132}, case wbExtendNumLet | prALetter<<32:
{wbExtendNumLet, prHebrewLetter}: {wbHebrewLetter, wbDontBreak, 132}, return wbALetter, false, 132
{wbExtendNumLet, prNumeric}: {wbNumeric, wbDontBreak, 132}, case wbExtendNumLet | prHebrewLetter<<32:
{wbExtendNumLet, prKatakana}: {prKatakana, wbDontBreak, 132}, return wbHebrewLetter, false, 132
case wbExtendNumLet | prNumeric<<32:
return wbNumeric, false, 132
case wbExtendNumLet | prKatakana<<32:
return wbKatakana, false, 132
default:
return -1, false, -1
}
} }
// transitionWordBreakState determines the new state of the word break parser // transitionWordBreakState determines the new state of the word break parser
@ -141,30 +180,27 @@ func transitionWordBreakState(state int, r rune, b []byte, str string) (newState
// Find the applicable transition in the table. // Find the applicable transition in the table.
var rule int var rule int
transition, ok := wbTransitions[[2]int{state, nextProperty}] newState, wordBreak, rule = wbTransitions(state, nextProperty)
if ok { if newState < 0 {
// We have a specific transition. We'll use it.
newState, wordBreak, rule = transition[0], transition[1] == wbBreak, transition[2]
} else {
// No specific transition found. Try the less specific ones. // No specific transition found. Try the less specific ones.
transAnyProp, okAnyProp := wbTransitions[[2]int{state, prAny}] anyPropState, anyPropWordBreak, anyPropRule := wbTransitions(state, prAny)
transAnyState, okAnyState := wbTransitions[[2]int{wbAny, nextProperty}] anyStateState, anyStateWordBreak, anyStateRule := wbTransitions(wbAny, nextProperty)
if okAnyProp && okAnyState { if anyPropState >= 0 && anyStateState >= 0 {
// Both apply. We'll use a mix (see comments for grTransitions). // Both apply. We'll use a mix (see comments for grTransitions).
newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2] newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule
if transAnyProp[2] < transAnyState[2] { if anyPropRule < anyStateRule {
wordBreak, rule = transAnyProp[1] == wbBreak, transAnyProp[2] wordBreak, rule = anyPropWordBreak, anyPropRule
} }
} else if okAnyProp { } else if anyPropState >= 0 {
// We only have a specific state. // We only have a specific state.
newState, wordBreak, rule = transAnyProp[0], transAnyProp[1] == wbBreak, transAnyProp[2] newState, wordBreak, rule = anyPropState, anyPropWordBreak, anyPropRule
// This branch will probably never be reached because okAnyState will // This branch will probably never be reached because okAnyState will
// always be true given the current transition map. But we keep it here // always be true given the current transition map. But we keep it here
// for future modifications to the transition map where this may not be // for future modifications to the transition map where this may not be
// true anymore. // true anymore.
} else if okAnyState { } else if anyStateState >= 0 {
// We only have a specific property. // We only have a specific property.
newState, wordBreak, rule = transAnyState[0], transAnyState[1] == wbBreak, transAnyState[2] newState, wordBreak, rule = anyStateState, anyStateWordBreak, anyStateRule
} else { } else {
// No known transition. WB999: Any ÷ Any. // No known transition. WB999: Any ÷ Any.
newState, wordBreak, rule = wbAny, true, 9990 newState, wordBreak, rule = wbAny, true, 9990

View file

@ -5,4 +5,4 @@
package internal package internal
// Version is the current tagged release of the library. // Version is the current tagged release of the library.
const Version = "0.159.0" const Version = "0.160.0"

6
vendor/modules.txt vendored
View file

@ -112,7 +112,7 @@ github.com/VividCortex/ewma
# github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9 # github.com/alecthomas/units v0.0.0-20231202071711-9a357b53e9c9
## explicit; go 1.15 ## explicit; go 1.15
github.com/alecthomas/units github.com/alecthomas/units
# github.com/aws/aws-sdk-go v1.50.5 # github.com/aws/aws-sdk-go v1.50.6
## explicit; go 1.19 ## explicit; go 1.19
github.com/aws/aws-sdk-go/aws github.com/aws/aws-sdk-go/aws
github.com/aws/aws-sdk-go/aws/auth/bearer github.com/aws/aws-sdk-go/aws/auth/bearer
@ -527,7 +527,7 @@ github.com/prometheus/prometheus/util/osutil
github.com/prometheus/prometheus/util/pool github.com/prometheus/prometheus/util/pool
github.com/prometheus/prometheus/util/testutil github.com/prometheus/prometheus/util/testutil
github.com/prometheus/prometheus/util/zeropool github.com/prometheus/prometheus/util/zeropool
# github.com/rivo/uniseg v0.4.4 # github.com/rivo/uniseg v0.4.6
## explicit; go 1.18 ## explicit; go 1.18
github.com/rivo/uniseg github.com/rivo/uniseg
# github.com/russross/blackfriday/v2 v2.1.0 # github.com/russross/blackfriday/v2 v2.1.0
@ -701,7 +701,7 @@ golang.org/x/text/unicode/norm
# golang.org/x/time v0.5.0 # golang.org/x/time v0.5.0
## explicit; go 1.18 ## explicit; go 1.18
golang.org/x/time/rate golang.org/x/time/rate
# google.golang.org/api v0.159.0 # google.golang.org/api v0.160.0
## explicit; go 1.19 ## explicit; go 1.19
google.golang.org/api/googleapi google.golang.org/api/googleapi
google.golang.org/api/googleapi/transport google.golang.org/api/googleapi/transport