diff options
| author | Thomas Voss <mail@thomasvoss.com> | 2024-08-10 21:32:27 +0200 | 
|---|---|---|
| committer | Thomas Voss <mail@thomasvoss.com> | 2024-08-10 21:32:27 +0200 | 
| commit | 2c2f850a29b2e896f640679ed9fcec523ccfdb2d (patch) | |
| tree | 412662e9494d00135f0ac3bae5816f7aca53767f /vendor/golang.org/x/text/internal/colltab | |
| parent | e1c08fce740b54454c4fea09634bf839ef9f76d3 (diff) | |
Support sorting countries by name
Diffstat (limited to 'vendor/golang.org/x/text/internal/colltab')
| -rw-r--r-- | vendor/golang.org/x/text/internal/colltab/collelem.go | 376 | ||||
| -rw-r--r-- | vendor/golang.org/x/text/internal/colltab/colltab.go | 105 | ||||
| -rw-r--r-- | vendor/golang.org/x/text/internal/colltab/contract.go | 145 | ||||
| -rw-r--r-- | vendor/golang.org/x/text/internal/colltab/iter.go | 178 | ||||
| -rw-r--r-- | vendor/golang.org/x/text/internal/colltab/numeric.go | 236 | ||||
| -rw-r--r-- | vendor/golang.org/x/text/internal/colltab/table.go | 275 | ||||
| -rw-r--r-- | vendor/golang.org/x/text/internal/colltab/trie.go | 159 | ||||
| -rw-r--r-- | vendor/golang.org/x/text/internal/colltab/weighter.go | 31 | 
8 files changed, 1505 insertions, 0 deletions
| diff --git a/vendor/golang.org/x/text/internal/colltab/collelem.go b/vendor/golang.org/x/text/internal/colltab/collelem.go new file mode 100644 index 0000000..0c23c8a --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/collelem.go @@ -0,0 +1,376 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( +	"fmt" +	"unicode" +) + +// Level identifies the collation comparison level. +// The primary level corresponds to the basic sorting of text. +// The secondary level corresponds to accents and related linguistic elements. +// The tertiary level corresponds to casing and related concepts. +// The quaternary level is derived from the other levels by the +// various algorithms for handling variable elements. +type Level int + +const ( +	Primary Level = iota +	Secondary +	Tertiary +	Quaternary +	Identity + +	NumLevels +) + +const ( +	defaultSecondary = 0x20 +	defaultTertiary  = 0x2 +	maxTertiary      = 0x1F +	MaxQuaternary    = 0x1FFFFF // 21 bits. +) + +// Elem is a representation of a collation element. This API provides ways to encode +// and decode Elems. Implementations of collation tables may use values greater +// or equal to PrivateUse for their own purposes.  However, these should never be +// returned by AppendNext. +type Elem uint32 + +const ( +	maxCE       Elem = 0xAFFFFFFF +	PrivateUse       = minContract +	minContract      = 0xC0000000 +	maxContract      = 0xDFFFFFFF +	minExpand        = 0xE0000000 +	maxExpand        = 0xEFFFFFFF +	minDecomp        = 0xF0000000 +) + +type ceType int + +const ( +	ceNormal           ceType = iota // ceNormal includes implicits (ce == 0) +	ceContractionIndex               // rune can be a start of a contraction +	ceExpansionIndex                 // rune expands into a sequence of collation elements +	ceDecompose                      // rune expands using NFKC decomposition +) + +func (ce Elem) ctype() ceType { +	if ce <= maxCE { +		return ceNormal +	} +	if ce <= maxContract { +		return ceContractionIndex +	} else { +		if ce <= maxExpand { +			return ceExpansionIndex +		} +		return ceDecompose +	} +	panic("should not reach here") +	return ceType(-1) +} + +// For normal collation elements, we assume that a collation element either has +// a primary or non-default secondary value, not both. +// Collation elements with a primary value are of the form +// +//	01pppppp pppppppp ppppppp0 ssssssss +//	  - p* is primary collation value +//	  - s* is the secondary collation value +//	00pppppp pppppppp ppppppps sssttttt, where +//	  - p* is primary collation value +//	  - s* offset of secondary from default value. +//	  - t* is the tertiary collation value +//	100ttttt cccccccc pppppppp pppppppp +//	  - t* is the tertiar collation value +//	  - c* is the canonical combining class +//	  - p* is the primary collation value +// +// Collation elements with a secondary value are of the form +// +//	1010cccc ccccssss ssssssss tttttttt, where +//	  - c* is the canonical combining class +//	  - s* is the secondary collation value +//	  - t* is the tertiary collation value +//	11qqqqqq qqqqqqqq qqqqqqq0 00000000 +//	  - q* quaternary value +const ( +	ceTypeMask              = 0xC0000000 +	ceTypeMaskExt           = 0xE0000000 +	ceIgnoreMask            = 0xF00FFFFF +	ceType1                 = 0x40000000 +	ceType2                 = 0x00000000 +	ceType3or4              = 0x80000000 +	ceType4                 = 0xA0000000 +	ceTypeQ                 = 0xC0000000 +	Ignore                  = ceType4 +	firstNonPrimary         = 0x80000000 +	lastSpecialPrimary      = 0xA0000000 +	secondaryMask           = 0x80000000 +	hasTertiaryMask         = 0x40000000 +	primaryValueMask        = 0x3FFFFE00 +	maxPrimaryBits          = 21 +	compactPrimaryBits      = 16 +	maxSecondaryBits        = 12 +	maxTertiaryBits         = 8 +	maxCCCBits              = 8 +	maxSecondaryCompactBits = 8 +	maxSecondaryDiffBits    = 4 +	maxTertiaryCompactBits  = 5 +	primaryShift            = 9 +	compactSecondaryShift   = 5 +	minCompactSecondary     = defaultSecondary - 4 +) + +func makeImplicitCE(primary int) Elem { +	return ceType1 | Elem(primary<<primaryShift) | defaultSecondary +} + +// MakeElem returns an Elem for the given values.  It will return an error +// if the given combination of values is invalid. +func MakeElem(primary, secondary, tertiary int, ccc uint8) (Elem, error) { +	if w := primary; w >= 1<<maxPrimaryBits || w < 0 { +		return 0, fmt.Errorf("makeCE: primary weight out of bounds: %x >= %x", w, 1<<maxPrimaryBits) +	} +	if w := secondary; w >= 1<<maxSecondaryBits || w < 0 { +		return 0, fmt.Errorf("makeCE: secondary weight out of bounds: %x >= %x", w, 1<<maxSecondaryBits) +	} +	if w := tertiary; w >= 1<<maxTertiaryBits || w < 0 { +		return 0, fmt.Errorf("makeCE: tertiary weight out of bounds: %x >= %x", w, 1<<maxTertiaryBits) +	} +	ce := Elem(0) +	if primary != 0 { +		if ccc != 0 { +			if primary >= 1<<compactPrimaryBits { +				return 0, fmt.Errorf("makeCE: primary weight with non-zero CCC out of bounds: %x >= %x", primary, 1<<compactPrimaryBits) +			} +			if secondary != defaultSecondary { +				return 0, fmt.Errorf("makeCE: cannot combine non-default secondary value (%x) with non-zero CCC (%x)", secondary, ccc) +			} +			ce = Elem(tertiary << (compactPrimaryBits + maxCCCBits)) +			ce |= Elem(ccc) << compactPrimaryBits +			ce |= Elem(primary) +			ce |= ceType3or4 +		} else if tertiary == defaultTertiary { +			if secondary >= 1<<maxSecondaryCompactBits { +				return 0, fmt.Errorf("makeCE: secondary weight with non-zero primary out of bounds: %x >= %x", secondary, 1<<maxSecondaryCompactBits) +			} +			ce = Elem(primary<<(maxSecondaryCompactBits+1) + secondary) +			ce |= ceType1 +		} else { +			d := secondary - defaultSecondary + maxSecondaryDiffBits +			if d >= 1<<maxSecondaryDiffBits || d < 0 { +				return 0, fmt.Errorf("makeCE: secondary weight diff out of bounds: %x < 0 || %x > %x", d, d, 1<<maxSecondaryDiffBits) +			} +			if tertiary >= 1<<maxTertiaryCompactBits { +				return 0, fmt.Errorf("makeCE: tertiary weight with non-zero primary out of bounds: %x > %x", tertiary, 1<<maxTertiaryCompactBits) +			} +			ce = Elem(primary<<maxSecondaryDiffBits + d) +			ce = ce<<maxTertiaryCompactBits + Elem(tertiary) +		} +	} else { +		ce = Elem(secondary<<maxTertiaryBits + tertiary) +		ce += Elem(ccc) << (maxSecondaryBits + maxTertiaryBits) +		ce |= ceType4 +	} +	return ce, nil +} + +// MakeQuaternary returns an Elem with the given quaternary value. +func MakeQuaternary(v int) Elem { +	return ceTypeQ | Elem(v<<primaryShift) +} + +// Mask sets weights for any level smaller than l to 0. +// The resulting Elem can be used to test for equality with +// other Elems to which the same mask has been applied. +func (ce Elem) Mask(l Level) uint32 { +	return 0 +} + +// CCC returns the canonical combining class associated with the underlying character, +// if applicable, or 0 otherwise. +func (ce Elem) CCC() uint8 { +	if ce&ceType3or4 != 0 { +		if ce&ceType4 == ceType3or4 { +			return uint8(ce >> 16) +		} +		return uint8(ce >> 20) +	} +	return 0 +} + +// Primary returns the primary collation weight for ce. +func (ce Elem) Primary() int { +	if ce >= firstNonPrimary { +		if ce > lastSpecialPrimary { +			return 0 +		} +		return int(uint16(ce)) +	} +	return int(ce&primaryValueMask) >> primaryShift +} + +// Secondary returns the secondary collation weight for ce. +func (ce Elem) Secondary() int { +	switch ce & ceTypeMask { +	case ceType1: +		return int(uint8(ce)) +	case ceType2: +		return minCompactSecondary + int((ce>>compactSecondaryShift)&0xF) +	case ceType3or4: +		if ce < ceType4 { +			return defaultSecondary +		} +		return int(ce>>8) & 0xFFF +	case ceTypeQ: +		return 0 +	} +	panic("should not reach here") +} + +// Tertiary returns the tertiary collation weight for ce. +func (ce Elem) Tertiary() uint8 { +	if ce&hasTertiaryMask == 0 { +		if ce&ceType3or4 == 0 { +			return uint8(ce & 0x1F) +		} +		if ce&ceType4 == ceType4 { +			return uint8(ce) +		} +		return uint8(ce>>24) & 0x1F // type 2 +	} else if ce&ceTypeMask == ceType1 { +		return defaultTertiary +	} +	// ce is a quaternary value. +	return 0 +} + +func (ce Elem) updateTertiary(t uint8) Elem { +	if ce&ceTypeMask == ceType1 { +		// convert to type 4 +		nce := ce & primaryValueMask +		nce |= Elem(uint8(ce)-minCompactSecondary) << compactSecondaryShift +		ce = nce +	} else if ce&ceTypeMaskExt == ceType3or4 { +		ce &= ^Elem(maxTertiary << 24) +		return ce | (Elem(t) << 24) +	} else { +		// type 2 or 4 +		ce &= ^Elem(maxTertiary) +	} +	return ce | Elem(t) +} + +// Quaternary returns the quaternary value if explicitly specified, +// 0 if ce == Ignore, or MaxQuaternary otherwise. +// Quaternary values are used only for shifted variants. +func (ce Elem) Quaternary() int { +	if ce&ceTypeMask == ceTypeQ { +		return int(ce&primaryValueMask) >> primaryShift +	} else if ce&ceIgnoreMask == Ignore { +		return 0 +	} +	return MaxQuaternary +} + +// Weight returns the collation weight for the given level. +func (ce Elem) Weight(l Level) int { +	switch l { +	case Primary: +		return ce.Primary() +	case Secondary: +		return ce.Secondary() +	case Tertiary: +		return int(ce.Tertiary()) +	case Quaternary: +		return ce.Quaternary() +	} +	return 0 // return 0 (ignore) for undefined levels. +} + +// For contractions, collation elements are of the form +// 110bbbbb bbbbbbbb iiiiiiii iiiinnnn, where +//   - n* is the size of the first node in the contraction trie. +//   - i* is the index of the first node in the contraction trie. +//   - b* is the offset into the contraction collation element table. +// +// See contract.go for details on the contraction trie. +const ( +	maxNBits              = 4 +	maxTrieIndexBits      = 12 +	maxContractOffsetBits = 13 +) + +func splitContractIndex(ce Elem) (index, n, offset int) { +	n = int(ce & (1<<maxNBits - 1)) +	ce >>= maxNBits +	index = int(ce & (1<<maxTrieIndexBits - 1)) +	ce >>= maxTrieIndexBits +	offset = int(ce & (1<<maxContractOffsetBits - 1)) +	return +} + +// For expansions, Elems are of the form 11100000 00000000 bbbbbbbb bbbbbbbb, +// where b* is the index into the expansion sequence table. +const maxExpandIndexBits = 16 + +func splitExpandIndex(ce Elem) (index int) { +	return int(uint16(ce)) +} + +// Some runes can be expanded using NFKD decomposition. Instead of storing the full +// sequence of collation elements, we decompose the rune and lookup the collation +// elements for each rune in the decomposition and modify the tertiary weights. +// The Elem, in this case, is of the form 11110000 00000000 wwwwwwww vvvvvvvv, where +//   - v* is the replacement tertiary weight for the first rune, +//   - w* is the replacement tertiary weight for the second rune, +// +// Tertiary weights of subsequent runes should be replaced with maxTertiary. +// See https://www.unicode.org/reports/tr10/#Compatibility_Decompositions for more details. +func splitDecompose(ce Elem) (t1, t2 uint8) { +	return uint8(ce), uint8(ce >> 8) +} + +const ( +	// These constants were taken from https://www.unicode.org/versions/Unicode6.0.0/ch12.pdf. +	minUnified       rune = 0x4E00 +	maxUnified            = 0x9FFF +	minCompatibility      = 0xF900 +	maxCompatibility      = 0xFAFF +	minRare               = 0x3400 +	maxRare               = 0x4DBF +) +const ( +	commonUnifiedOffset = 0x10000 +	rareUnifiedOffset   = 0x20000 // largest rune in common is U+FAFF +	otherOffset         = 0x50000 // largest rune in rare is U+2FA1D +	illegalOffset       = otherOffset + int(unicode.MaxRune) +	maxPrimary          = illegalOffset + 1 +) + +// implicitPrimary returns the primary weight for the a rune +// for which there is no entry for the rune in the collation table. +// We take a different approach from the one specified in +// https://unicode.org/reports/tr10/#Implicit_Weights, +// but preserve the resulting relative ordering of the runes. +func implicitPrimary(r rune) int { +	if unicode.Is(unicode.Ideographic, r) { +		if r >= minUnified && r <= maxUnified { +			// The most common case for CJK. +			return int(r) + commonUnifiedOffset +		} +		if r >= minCompatibility && r <= maxCompatibility { +			// This will typically not hit. The DUCET explicitly specifies mappings +			// for all characters that do not decompose. +			return int(r) + commonUnifiedOffset +		} +		return int(r) + rareUnifiedOffset +	} +	return int(r) + otherOffset +} diff --git a/vendor/golang.org/x/text/internal/colltab/colltab.go b/vendor/golang.org/x/text/internal/colltab/colltab.go new file mode 100644 index 0000000..02f2247 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/colltab.go @@ -0,0 +1,105 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package colltab contains functionality related to collation tables. +// It is only to be used by the collate and search packages. +package colltab // import "golang.org/x/text/internal/colltab" + +import ( +	"sort" + +	"golang.org/x/text/language" +) + +// MatchLang finds the index of t in tags, using a matching algorithm used for +// collation and search. tags[0] must be language.Und, the remaining tags should +// be sorted alphabetically. +// +// Language matching for collation and search is different from the matching +// defined by language.Matcher: the (inferred) base language must be an exact +// match for the relevant fields. For example, "gsw" should not match "de". +// Also the parent relation is different, as a parent may have a different +// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is +// zh. +func MatchLang(t language.Tag, tags []language.Tag) int { +	// Canonicalize the values, including collapsing macro languages. +	t, _ = language.All.Canonicalize(t) + +	base, conf := t.Base() +	// Estimate the base language, but only use high-confidence values. +	if conf < language.High { +		// The root locale supports "search" and "standard". We assume that any +		// implementation will only use one of both. +		return 0 +	} + +	// Maximize base and script and normalize the tag. +	if _, s, r := t.Raw(); (r != language.Region{}) { +		p, _ := language.Raw.Compose(base, s, r) +		// Taking the parent forces the script to be maximized. +		p = p.Parent() +		// Add back region and extensions. +		t, _ = language.Raw.Compose(p, r, t.Extensions()) +	} else { +		// Set the maximized base language. +		t, _ = language.Raw.Compose(base, s, t.Extensions()) +	} + +	// Find start index of the language tag. +	start := 1 + sort.Search(len(tags)-1, func(i int) bool { +		b, _, _ := tags[i+1].Raw() +		return base.String() <= b.String() +	}) +	if start < len(tags) { +		if b, _, _ := tags[start].Raw(); b != base { +			return 0 +		} +	} + +	// Besides the base language, script and region, only the collation type and +	// the custom variant defined in the 'u' extension are used to distinguish a +	// locale. +	// Strip all variants and extensions and add back the custom variant. +	tdef, _ := language.Raw.Compose(t.Raw()) +	tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va")) + +	// First search for a specialized collation type, if present. +	try := []language.Tag{tdef} +	if co := t.TypeForKey("co"); co != "" { +		tco, _ := tdef.SetTypeForKey("co", co) +		try = []language.Tag{tco, tdef} +	} + +	for _, tx := range try { +		for ; tx != language.Und; tx = parent(tx) { +			for i, t := range tags[start:] { +				if b, _, _ := t.Raw(); b != base { +					break +				} +				if tx == t { +					return start + i +				} +			} +		} +	} +	return 0 +} + +// parent computes the structural parent. This means inheritance may change +// script. So, unlike the CLDR parent, parent(zh-Hant) == zh. +func parent(t language.Tag) language.Tag { +	if t.TypeForKey("va") != "" { +		t, _ = t.SetTypeForKey("va", "") +		return t +	} +	result := language.Und +	if b, s, r := t.Raw(); (r != language.Region{}) { +		result, _ = language.Raw.Compose(b, s, t.Extensions()) +	} else if (s != language.Script{}) { +		result, _ = language.Raw.Compose(b, t.Extensions()) +	} else if (b != language.Base{}) { +		result, _ = language.Raw.Compose(t.Extensions()) +	} +	return result +} diff --git a/vendor/golang.org/x/text/internal/colltab/contract.go b/vendor/golang.org/x/text/internal/colltab/contract.go new file mode 100644 index 0000000..25649d4 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/contract.go @@ -0,0 +1,145 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import "unicode/utf8" + +// For a description of ContractTrieSet, see text/collate/build/contract.go. + +type ContractTrieSet []struct{ L, H, N, I uint8 } + +// ctScanner is used to match a trie to an input sequence. +// A contraction may match a non-contiguous sequence of bytes in an input string. +// For example, if there is a contraction for <a, combining_ring>, it should match +// the sequence <a, combining_cedilla, combining_ring>, as combining_cedilla does +// not block combining_ring. +// ctScanner does not automatically skip over non-blocking non-starters, but rather +// retains the state of the last match and leaves it up to the user to continue +// the match at the appropriate points. +type ctScanner struct { +	states ContractTrieSet +	s      []byte +	n      int +	index  int +	pindex int +	done   bool +} + +type ctScannerString struct { +	states ContractTrieSet +	s      string +	n      int +	index  int +	pindex int +	done   bool +} + +func (t ContractTrieSet) scanner(index, n int, b []byte) ctScanner { +	return ctScanner{s: b, states: t[index:], n: n} +} + +func (t ContractTrieSet) scannerString(index, n int, str string) ctScannerString { +	return ctScannerString{s: str, states: t[index:], n: n} +} + +// result returns the offset i and bytes consumed p so far.  If no suffix +// matched, i and p will be 0. +func (s *ctScanner) result() (i, p int) { +	return s.index, s.pindex +} + +func (s *ctScannerString) result() (i, p int) { +	return s.index, s.pindex +} + +const ( +	final   = 0 +	noIndex = 0xFF +) + +// scan matches the longest suffix at the current location in the input +// and returns the number of bytes consumed. +func (s *ctScanner) scan(p int) int { +	pr := p // the p at the rune start +	str := s.s +	states, n := s.states, s.n +	for i := 0; i < n && p < len(str); { +		e := states[i] +		c := str[p] +		// TODO: a significant number of contractions are of a form that +		// cannot match discontiguous UTF-8 in a normalized string. We could let +		// a negative value of e.n mean that we can set s.done = true and avoid +		// the need for additional matches. +		if c >= e.L { +			if e.L == c { +				p++ +				if e.I != noIndex { +					s.index = int(e.I) +					s.pindex = p +				} +				if e.N != final { +					i, states, n = 0, states[int(e.H)+n:], int(e.N) +					if p >= len(str) || utf8.RuneStart(str[p]) { +						s.states, s.n, pr = states, n, p +					} +				} else { +					s.done = true +					return p +				} +				continue +			} else if e.N == final && c <= e.H { +				p++ +				s.done = true +				s.index = int(c-e.L) + int(e.I) +				s.pindex = p +				return p +			} +		} +		i++ +	} +	return pr +} + +// scan is a verbatim copy of ctScanner.scan. +func (s *ctScannerString) scan(p int) int { +	pr := p // the p at the rune start +	str := s.s +	states, n := s.states, s.n +	for i := 0; i < n && p < len(str); { +		e := states[i] +		c := str[p] +		// TODO: a significant number of contractions are of a form that +		// cannot match discontiguous UTF-8 in a normalized string. We could let +		// a negative value of e.n mean that we can set s.done = true and avoid +		// the need for additional matches. +		if c >= e.L { +			if e.L == c { +				p++ +				if e.I != noIndex { +					s.index = int(e.I) +					s.pindex = p +				} +				if e.N != final { +					i, states, n = 0, states[int(e.H)+n:], int(e.N) +					if p >= len(str) || utf8.RuneStart(str[p]) { +						s.states, s.n, pr = states, n, p +					} +				} else { +					s.done = true +					return p +				} +				continue +			} else if e.N == final && c <= e.H { +				p++ +				s.done = true +				s.index = int(c-e.L) + int(e.I) +				s.pindex = p +				return p +			} +		} +		i++ +	} +	return pr +} diff --git a/vendor/golang.org/x/text/internal/colltab/iter.go b/vendor/golang.org/x/text/internal/colltab/iter.go new file mode 100644 index 0000000..c1b1ba8 --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/iter.go @@ -0,0 +1,178 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +// An Iter incrementally converts chunks of the input text to collation +// elements, while ensuring that the collation elements are in normalized order +// (that is, they are in the order as if the input text were normalized first). +type Iter struct { +	Weighter Weighter +	Elems    []Elem +	// N is the number of elements in Elems that will not be reordered on +	// subsequent iterations, N <= len(Elems). +	N int + +	bytes []byte +	str   string +	// Because the Elems buffer may contain collation elements that are needed +	// for look-ahead, we need two positions in the text (bytes or str): one for +	// the end position in the text for the current iteration and one for the +	// start of the next call to appendNext. +	pEnd  int // end position in text corresponding to N. +	pNext int // pEnd <= pNext. +} + +// Reset sets the position in the current input text to p and discards any +// results obtained so far. +func (i *Iter) Reset(p int) { +	i.Elems = i.Elems[:0] +	i.N = 0 +	i.pEnd = p +	i.pNext = p +} + +// Len returns the length of the input text. +func (i *Iter) Len() int { +	if i.bytes != nil { +		return len(i.bytes) +	} +	return len(i.str) +} + +// Discard removes the collation elements up to N. +func (i *Iter) Discard() { +	// TODO: change this such that only modifiers following starters will have +	// to be copied. +	i.Elems = i.Elems[:copy(i.Elems, i.Elems[i.N:])] +	i.N = 0 +} + +// End returns the end position of the input text for which Next has returned +// results. +func (i *Iter) End() int { +	return i.pEnd +} + +// SetInput resets i to input s. +func (i *Iter) SetInput(s []byte) { +	i.bytes = s +	i.str = "" +	i.Reset(0) +} + +// SetInputString resets i to input s. +func (i *Iter) SetInputString(s string) { +	i.str = s +	i.bytes = nil +	i.Reset(0) +} + +func (i *Iter) done() bool { +	return i.pNext >= len(i.str) && i.pNext >= len(i.bytes) +} + +func (i *Iter) appendNext() bool { +	if i.done() { +		return false +	} +	var sz int +	if i.bytes == nil { +		i.Elems, sz = i.Weighter.AppendNextString(i.Elems, i.str[i.pNext:]) +	} else { +		i.Elems, sz = i.Weighter.AppendNext(i.Elems, i.bytes[i.pNext:]) +	} +	if sz == 0 { +		sz = 1 +	} +	i.pNext += sz +	return true +} + +// Next appends Elems to the internal array. On each iteration, it will either +// add starters or modifiers. In the majority of cases, an Elem with a primary +// value > 0 will have a CCC of 0. The CCC values of collation elements are also +// used to detect if the input string was not normalized and to adjust the +// result accordingly. +func (i *Iter) Next() bool { +	if i.N == len(i.Elems) && !i.appendNext() { +		return false +	} + +	// Check if the current segment starts with a starter. +	prevCCC := i.Elems[len(i.Elems)-1].CCC() +	if prevCCC == 0 { +		i.N = len(i.Elems) +		i.pEnd = i.pNext +		return true +	} else if i.Elems[i.N].CCC() == 0 { +		// set i.N to only cover part of i.Elems for which prevCCC == 0 and +		// use rest for the next call to next. +		for i.N++; i.N < len(i.Elems) && i.Elems[i.N].CCC() == 0; i.N++ { +		} +		i.pEnd = i.pNext +		return true +	} + +	// The current (partial) segment starts with modifiers. We need to collect +	// all successive modifiers to ensure that they are normalized. +	for { +		p := len(i.Elems) +		i.pEnd = i.pNext +		if !i.appendNext() { +			break +		} + +		if ccc := i.Elems[p].CCC(); ccc == 0 || len(i.Elems)-i.N > maxCombiningCharacters { +			// Leave the starter for the next iteration. This ensures that we +			// do not return sequences of collation elements that cross two +			// segments. +			// +			// TODO: handle large number of combining characters by fully +			// normalizing the input segment before iteration. This ensures +			// results are consistent across the text repo. +			i.N = p +			return true +		} else if ccc < prevCCC { +			i.doNorm(p, ccc) // should be rare, never occurs for NFD and FCC. +		} else { +			prevCCC = ccc +		} +	} + +	done := len(i.Elems) != i.N +	i.N = len(i.Elems) +	return done +} + +// nextNoNorm is the same as next, but does not "normalize" the collation +// elements. +func (i *Iter) nextNoNorm() bool { +	// TODO: remove this function. Using this instead of next does not seem +	// to improve performance in any significant way. We retain this until +	// later for evaluation purposes. +	if i.done() { +		return false +	} +	i.appendNext() +	i.N = len(i.Elems) +	return true +} + +const maxCombiningCharacters = 30 + +// doNorm reorders the collation elements in i.Elems. +// It assumes that blocks of collation elements added with appendNext +// either start and end with the same CCC or start with CCC == 0. +// This allows for a single insertion point for the entire block. +// The correctness of this assumption is verified in builder.go. +func (i *Iter) doNorm(p int, ccc uint8) { +	n := len(i.Elems) +	k := p +	for p--; p > i.N && ccc < i.Elems[p-1].CCC(); p-- { +	} +	i.Elems = append(i.Elems, i.Elems[p:k]...) +	copy(i.Elems[p:], i.Elems[k:]) +	i.Elems = i.Elems[:n] +} diff --git a/vendor/golang.org/x/text/internal/colltab/numeric.go b/vendor/golang.org/x/text/internal/colltab/numeric.go new file mode 100644 index 0000000..53b819c --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/numeric.go @@ -0,0 +1,236 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( +	"unicode" +	"unicode/utf8" +) + +// NewNumericWeighter wraps w to replace individual digits to sort based on their +// numeric value. +// +// Weighter w must have a free primary weight after the primary weight for 9. +// If this is not the case, numeric value will sort at the same primary level +// as the first primary sorting after 9. +func NewNumericWeighter(w Weighter) Weighter { +	getElem := func(s string) Elem { +		elems, _ := w.AppendNextString(nil, s) +		return elems[0] +	} +	nine := getElem("9") + +	// Numbers should order before zero, but the DUCET has no room for this. +	// TODO: move before zero once we use fractional collation elements. +	ns, _ := MakeElem(nine.Primary()+1, nine.Secondary(), int(nine.Tertiary()), 0) + +	return &numericWeighter{ +		Weighter: w, + +		// We assume that w sorts digits of different kinds in order of numeric +		// value and that the tertiary weight order is preserved. +		// +		// TODO: evaluate whether it is worth basing the ranges on the Elem +		// encoding itself once the move to fractional weights is complete. +		zero:          getElem("0"), +		zeroSpecialLo: getElem("0"), // U+FF10 FULLWIDTH DIGIT ZERO +		zeroSpecialHi: getElem("₀"), // U+2080 SUBSCRIPT ZERO +		nine:          nine, +		nineSpecialHi: getElem("₉"), // U+2089 SUBSCRIPT NINE +		numberStart:   ns, +	} +} + +// A numericWeighter translates a stream of digits into a stream of weights +// representing the numeric value. +type numericWeighter struct { +	Weighter + +	// The Elems below all demarcate boundaries of specific ranges. With the +	// current element encoding digits are in two ranges: normal (default +	// tertiary value) and special. For most languages, digits have collation +	// elements in the normal range. +	// +	// Note: the range tests are very specific for the element encoding used by +	// this implementation. The tests in collate_test.go are designed to fail +	// if this code is not updated when an encoding has changed. + +	zero          Elem // normal digit zero +	zeroSpecialLo Elem // special digit zero, low tertiary value +	zeroSpecialHi Elem // special digit zero, high tertiary value +	nine          Elem // normal digit nine +	nineSpecialHi Elem // special digit nine +	numberStart   Elem +} + +// AppendNext calls the namesake of the underlying weigher, but replaces single +// digits with weights representing their value. +func (nw *numericWeighter) AppendNext(buf []Elem, s []byte) (ce []Elem, n int) { +	ce, n = nw.Weighter.AppendNext(buf, s) +	nc := numberConverter{ +		elems: buf, +		w:     nw, +		b:     s, +	} +	isZero, ok := nc.checkNextDigit(ce) +	if !ok { +		return ce, n +	} +	// ce might have been grown already, so take it instead of buf. +	nc.init(ce, len(buf), isZero) +	for n < len(s) { +		ce, sz := nw.Weighter.AppendNext(nc.elems, s[n:]) +		nc.b = s +		n += sz +		if !nc.update(ce) { +			break +		} +	} +	return nc.result(), n +} + +// AppendNextString calls the namesake of the underlying weigher, but replaces +// single digits with weights representing their value. +func (nw *numericWeighter) AppendNextString(buf []Elem, s string) (ce []Elem, n int) { +	ce, n = nw.Weighter.AppendNextString(buf, s) +	nc := numberConverter{ +		elems: buf, +		w:     nw, +		s:     s, +	} +	isZero, ok := nc.checkNextDigit(ce) +	if !ok { +		return ce, n +	} +	nc.init(ce, len(buf), isZero) +	for n < len(s) { +		ce, sz := nw.Weighter.AppendNextString(nc.elems, s[n:]) +		nc.s = s +		n += sz +		if !nc.update(ce) { +			break +		} +	} +	return nc.result(), n +} + +type numberConverter struct { +	w *numericWeighter + +	elems    []Elem +	nDigits  int +	lenIndex int + +	s string // set if the input was of type string +	b []byte // set if the input was of type []byte +} + +// init completes initialization of a numberConverter and prepares it for adding +// more digits. elems is assumed to have a digit starting at oldLen. +func (nc *numberConverter) init(elems []Elem, oldLen int, isZero bool) { +	// Insert a marker indicating the start of a number and a placeholder +	// for the number of digits. +	if isZero { +		elems = append(elems[:oldLen], nc.w.numberStart, 0) +	} else { +		elems = append(elems, 0, 0) +		copy(elems[oldLen+2:], elems[oldLen:]) +		elems[oldLen] = nc.w.numberStart +		elems[oldLen+1] = 0 + +		nc.nDigits = 1 +	} +	nc.elems = elems +	nc.lenIndex = oldLen + 1 +} + +// checkNextDigit reports whether bufNew adds a single digit relative to the old +// buffer. If it does, it also reports whether this digit is zero. +func (nc *numberConverter) checkNextDigit(bufNew []Elem) (isZero, ok bool) { +	if len(nc.elems) >= len(bufNew) { +		return false, false +	} +	e := bufNew[len(nc.elems)] +	if e < nc.w.zeroSpecialLo || nc.w.nine < e { +		// Not a number. +		return false, false +	} +	if e < nc.w.zero { +		if e > nc.w.nineSpecialHi { +			// Not a number. +			return false, false +		} +		if !nc.isDigit() { +			return false, false +		} +		isZero = e <= nc.w.zeroSpecialHi +	} else { +		// This is the common case if we encounter a digit. +		isZero = e == nc.w.zero +	} +	// Test the remaining added collation elements have a zero primary value. +	if n := len(bufNew) - len(nc.elems); n > 1 { +		for i := len(nc.elems) + 1; i < len(bufNew); i++ { +			if bufNew[i].Primary() != 0 { +				return false, false +			} +		} +		// In some rare cases, collation elements will encode runes in +		// unicode.No as a digit. For example Ethiopic digits (U+1369 - U+1371) +		// are not in Nd. Also some digits that clearly belong in unicode.No, +		// like U+0C78 TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR, have +		// collation elements indistinguishable from normal digits. +		// Unfortunately, this means we need to make this check for nearly all +		// non-Latin digits. +		// +		// TODO: check the performance impact and find something better if it is +		// an issue. +		if !nc.isDigit() { +			return false, false +		} +	} +	return isZero, true +} + +func (nc *numberConverter) isDigit() bool { +	if nc.b != nil { +		r, _ := utf8.DecodeRune(nc.b) +		return unicode.In(r, unicode.Nd) +	} +	r, _ := utf8.DecodeRuneInString(nc.s) +	return unicode.In(r, unicode.Nd) +} + +// We currently support a maximum of about 2M digits (the number of primary +// values). Such numbers will compare correctly against small numbers, but their +// comparison against other large numbers is undefined. +// +// TODO: define a proper fallback, such as comparing large numbers textually or +// actually allowing numbers of unlimited length. +// +// TODO: cap this to a lower number (like 100) and maybe allow a larger number +// in an option? +const maxDigits = 1<<maxPrimaryBits - 1 + +func (nc *numberConverter) update(elems []Elem) bool { +	isZero, ok := nc.checkNextDigit(elems) +	if nc.nDigits == 0 && isZero { +		return true +	} +	nc.elems = elems +	if !ok { +		return false +	} +	nc.nDigits++ +	return nc.nDigits < maxDigits +} + +// result fills in the length element for the digit sequence and returns the +// completed collation elements. +func (nc *numberConverter) result() []Elem { +	e, _ := MakeElem(nc.nDigits, defaultSecondary, defaultTertiary, 0) +	nc.elems[nc.lenIndex] = e +	return nc.elems +} diff --git a/vendor/golang.org/x/text/internal/colltab/table.go b/vendor/golang.org/x/text/internal/colltab/table.go new file mode 100644 index 0000000..e26e36d --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/table.go @@ -0,0 +1,275 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab + +import ( +	"unicode/utf8" + +	"golang.org/x/text/unicode/norm" +) + +// Table holds all collation data for a given collation ordering. +type Table struct { +	Index Trie // main trie + +	// expansion info +	ExpandElem []uint32 + +	// contraction info +	ContractTries  ContractTrieSet +	ContractElem   []uint32 +	MaxContractLen int +	VariableTop    uint32 +} + +func (t *Table) AppendNext(w []Elem, b []byte) (res []Elem, n int) { +	return t.appendNext(w, source{bytes: b}) +} + +func (t *Table) AppendNextString(w []Elem, s string) (res []Elem, n int) { +	return t.appendNext(w, source{str: s}) +} + +func (t *Table) Start(p int, b []byte) int { +	// TODO: implement +	panic("not implemented") +} + +func (t *Table) StartString(p int, s string) int { +	// TODO: implement +	panic("not implemented") +} + +func (t *Table) Domain() []string { +	// TODO: implement +	panic("not implemented") +} + +func (t *Table) Top() uint32 { +	return t.VariableTop +} + +type source struct { +	str   string +	bytes []byte +} + +func (src *source) lookup(t *Table) (ce Elem, sz int) { +	if src.bytes == nil { +		return t.Index.lookupString(src.str) +	} +	return t.Index.lookup(src.bytes) +} + +func (src *source) tail(sz int) { +	if src.bytes == nil { +		src.str = src.str[sz:] +	} else { +		src.bytes = src.bytes[sz:] +	} +} + +func (src *source) nfd(buf []byte, end int) []byte { +	if src.bytes == nil { +		return norm.NFD.AppendString(buf[:0], src.str[:end]) +	} +	return norm.NFD.Append(buf[:0], src.bytes[:end]...) +} + +func (src *source) rune() (r rune, sz int) { +	if src.bytes == nil { +		return utf8.DecodeRuneInString(src.str) +	} +	return utf8.DecodeRune(src.bytes) +} + +func (src *source) properties(f norm.Form) norm.Properties { +	if src.bytes == nil { +		return f.PropertiesString(src.str) +	} +	return f.Properties(src.bytes) +} + +// appendNext appends the weights corresponding to the next rune or +// contraction in s.  If a contraction is matched to a discontinuous +// sequence of runes, the weights for the interstitial runes are +// appended as well.  It returns a new slice that includes the appended +// weights and the number of bytes consumed from s. +func (t *Table) appendNext(w []Elem, src source) (res []Elem, n int) { +	ce, sz := src.lookup(t) +	tp := ce.ctype() +	if tp == ceNormal { +		if ce == 0 { +			r, _ := src.rune() +			const ( +				hangulSize  = 3 +				firstHangul = 0xAC00 +				lastHangul  = 0xD7A3 +			) +			if r >= firstHangul && r <= lastHangul { +				// TODO: performance can be considerably improved here. +				n = sz +				var buf [16]byte // Used for decomposing Hangul. +				for b := src.nfd(buf[:0], hangulSize); len(b) > 0; b = b[sz:] { +					ce, sz = t.Index.lookup(b) +					w = append(w, ce) +				} +				return w, n +			} +			ce = makeImplicitCE(implicitPrimary(r)) +		} +		w = append(w, ce) +	} else if tp == ceExpansionIndex { +		w = t.appendExpansion(w, ce) +	} else if tp == ceContractionIndex { +		n := 0 +		src.tail(sz) +		if src.bytes == nil { +			w, n = t.matchContractionString(w, ce, src.str) +		} else { +			w, n = t.matchContraction(w, ce, src.bytes) +		} +		sz += n +	} else if tp == ceDecompose { +		// Decompose using NFKD and replace tertiary weights. +		t1, t2 := splitDecompose(ce) +		i := len(w) +		nfkd := src.properties(norm.NFKD).Decomposition() +		for p := 0; len(nfkd) > 0; nfkd = nfkd[p:] { +			w, p = t.appendNext(w, source{bytes: nfkd}) +		} +		w[i] = w[i].updateTertiary(t1) +		if i++; i < len(w) { +			w[i] = w[i].updateTertiary(t2) +			for i++; i < len(w); i++ { +				w[i] = w[i].updateTertiary(maxTertiary) +			} +		} +	} +	return w, sz +} + +func (t *Table) appendExpansion(w []Elem, ce Elem) []Elem { +	i := splitExpandIndex(ce) +	n := int(t.ExpandElem[i]) +	i++ +	for _, ce := range t.ExpandElem[i : i+n] { +		w = append(w, Elem(ce)) +	} +	return w +} + +func (t *Table) matchContraction(w []Elem, ce Elem, suffix []byte) ([]Elem, int) { +	index, n, offset := splitContractIndex(ce) + +	scan := t.ContractTries.scanner(index, n, suffix) +	buf := [norm.MaxSegmentSize]byte{} +	bufp := 0 +	p := scan.scan(0) + +	if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf { +		// By now we should have filtered most cases. +		p0 := p +		bufn := 0 +		rune := norm.NFD.Properties(suffix[p:]) +		p += rune.Size() +		if rune.LeadCCC() != 0 { +			prevCC := rune.TrailCCC() +			// A gap may only occur in the last normalization segment. +			// This also ensures that len(scan.s) < norm.MaxSegmentSize. +			if end := norm.NFD.FirstBoundary(suffix[p:]); end != -1 { +				scan.s = suffix[:p+end] +			} +			for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf { +				rune = norm.NFD.Properties(suffix[p:]) +				if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc { +					break +				} +				prevCC = rune.TrailCCC() +				if pp := scan.scan(p); pp != p { +					// Copy the interstitial runes for later processing. +					bufn += copy(buf[bufn:], suffix[p0:p]) +					if scan.pindex == pp { +						bufp = bufn +					} +					p, p0 = pp, pp +				} else { +					p += rune.Size() +				} +			} +		} +	} +	// Append weights for the matched contraction, which may be an expansion. +	i, n := scan.result() +	ce = Elem(t.ContractElem[i+offset]) +	if ce.ctype() == ceNormal { +		w = append(w, ce) +	} else { +		w = t.appendExpansion(w, ce) +	} +	// Append weights for the runes in the segment not part of the contraction. +	for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] { +		w, p = t.appendNext(w, source{bytes: b}) +	} +	return w, n +} + +// TODO: unify the two implementations. This is best done after first simplifying +// the algorithm taking into account the inclusion of both NFC and NFD forms +// in the table. +func (t *Table) matchContractionString(w []Elem, ce Elem, suffix string) ([]Elem, int) { +	index, n, offset := splitContractIndex(ce) + +	scan := t.ContractTries.scannerString(index, n, suffix) +	buf := [norm.MaxSegmentSize]byte{} +	bufp := 0 +	p := scan.scan(0) + +	if !scan.done && p < len(suffix) && suffix[p] >= utf8.RuneSelf { +		// By now we should have filtered most cases. +		p0 := p +		bufn := 0 +		rune := norm.NFD.PropertiesString(suffix[p:]) +		p += rune.Size() +		if rune.LeadCCC() != 0 { +			prevCC := rune.TrailCCC() +			// A gap may only occur in the last normalization segment. +			// This also ensures that len(scan.s) < norm.MaxSegmentSize. +			if end := norm.NFD.FirstBoundaryInString(suffix[p:]); end != -1 { +				scan.s = suffix[:p+end] +			} +			for p < len(suffix) && !scan.done && suffix[p] >= utf8.RuneSelf { +				rune = norm.NFD.PropertiesString(suffix[p:]) +				if ccc := rune.LeadCCC(); ccc == 0 || prevCC >= ccc { +					break +				} +				prevCC = rune.TrailCCC() +				if pp := scan.scan(p); pp != p { +					// Copy the interstitial runes for later processing. +					bufn += copy(buf[bufn:], suffix[p0:p]) +					if scan.pindex == pp { +						bufp = bufn +					} +					p, p0 = pp, pp +				} else { +					p += rune.Size() +				} +			} +		} +	} +	// Append weights for the matched contraction, which may be an expansion. +	i, n := scan.result() +	ce = Elem(t.ContractElem[i+offset]) +	if ce.ctype() == ceNormal { +		w = append(w, ce) +	} else { +		w = t.appendExpansion(w, ce) +	} +	// Append weights for the runes in the segment not part of the contraction. +	for b, p := buf[:bufp], 0; len(b) > 0; b = b[p:] { +		w, p = t.appendNext(w, source{bytes: b}) +	} +	return w, n +} diff --git a/vendor/golang.org/x/text/internal/colltab/trie.go b/vendor/golang.org/x/text/internal/colltab/trie.go new file mode 100644 index 0000000..a0eaa0d --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/trie.go @@ -0,0 +1,159 @@ +// Copyright 2012 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// The trie in this file is used to associate the first full character in an +// UTF-8 string to a collation element. All but the last byte in a UTF-8 byte +// sequence are used to lookup offsets in the index table to be used for the +// next byte. The last byte is used to index into a table of collation elements. +// For a full description, see go.text/collate/build/trie.go. + +package colltab + +const blockSize = 64 + +type Trie struct { +	Index0  []uint16 // index for first byte (0xC0-0xFF) +	Values0 []uint32 // index for first byte (0x00-0x7F) +	Index   []uint16 +	Values  []uint32 +} + +const ( +	t1 = 0x00 // 0000 0000 +	tx = 0x80 // 1000 0000 +	t2 = 0xC0 // 1100 0000 +	t3 = 0xE0 // 1110 0000 +	t4 = 0xF0 // 1111 0000 +	t5 = 0xF8 // 1111 1000 +	t6 = 0xFC // 1111 1100 +	te = 0xFE // 1111 1110 +) + +func (t *Trie) lookupValue(n uint16, b byte) Elem { +	return Elem(t.Values[int(n)<<6+int(b)]) +} + +// lookup returns the trie value for the first UTF-8 encoding in s and +// the width in bytes of this encoding. The size will be 0 if s does not +// hold enough bytes to complete the encoding. len(s) must be greater than 0. +func (t *Trie) lookup(s []byte) (v Elem, sz int) { +	c0 := s[0] +	switch { +	case c0 < tx: +		return Elem(t.Values0[c0]), 1 +	case c0 < t2: +		return 0, 1 +	case c0 < t3: +		if len(s) < 2 { +			return 0, 0 +		} +		i := t.Index0[c0] +		c1 := s[1] +		if c1 < tx || t2 <= c1 { +			return 0, 1 +		} +		return t.lookupValue(i, c1), 2 +	case c0 < t4: +		if len(s) < 3 { +			return 0, 0 +		} +		i := t.Index0[c0] +		c1 := s[1] +		if c1 < tx || t2 <= c1 { +			return 0, 1 +		} +		o := int(i)<<6 + int(c1) +		i = t.Index[o] +		c2 := s[2] +		if c2 < tx || t2 <= c2 { +			return 0, 2 +		} +		return t.lookupValue(i, c2), 3 +	case c0 < t5: +		if len(s) < 4 { +			return 0, 0 +		} +		i := t.Index0[c0] +		c1 := s[1] +		if c1 < tx || t2 <= c1 { +			return 0, 1 +		} +		o := int(i)<<6 + int(c1) +		i = t.Index[o] +		c2 := s[2] +		if c2 < tx || t2 <= c2 { +			return 0, 2 +		} +		o = int(i)<<6 + int(c2) +		i = t.Index[o] +		c3 := s[3] +		if c3 < tx || t2 <= c3 { +			return 0, 3 +		} +		return t.lookupValue(i, c3), 4 +	} +	// Illegal rune +	return 0, 1 +} + +// The body of lookupString is a verbatim copy of that of lookup. +func (t *Trie) lookupString(s string) (v Elem, sz int) { +	c0 := s[0] +	switch { +	case c0 < tx: +		return Elem(t.Values0[c0]), 1 +	case c0 < t2: +		return 0, 1 +	case c0 < t3: +		if len(s) < 2 { +			return 0, 0 +		} +		i := t.Index0[c0] +		c1 := s[1] +		if c1 < tx || t2 <= c1 { +			return 0, 1 +		} +		return t.lookupValue(i, c1), 2 +	case c0 < t4: +		if len(s) < 3 { +			return 0, 0 +		} +		i := t.Index0[c0] +		c1 := s[1] +		if c1 < tx || t2 <= c1 { +			return 0, 1 +		} +		o := int(i)<<6 + int(c1) +		i = t.Index[o] +		c2 := s[2] +		if c2 < tx || t2 <= c2 { +			return 0, 2 +		} +		return t.lookupValue(i, c2), 3 +	case c0 < t5: +		if len(s) < 4 { +			return 0, 0 +		} +		i := t.Index0[c0] +		c1 := s[1] +		if c1 < tx || t2 <= c1 { +			return 0, 1 +		} +		o := int(i)<<6 + int(c1) +		i = t.Index[o] +		c2 := s[2] +		if c2 < tx || t2 <= c2 { +			return 0, 2 +		} +		o = int(i)<<6 + int(c2) +		i = t.Index[o] +		c3 := s[3] +		if c3 < tx || t2 <= c3 { +			return 0, 3 +		} +		return t.lookupValue(i, c3), 4 +	} +	// Illegal rune +	return 0, 1 +} diff --git a/vendor/golang.org/x/text/internal/colltab/weighter.go b/vendor/golang.org/x/text/internal/colltab/weighter.go new file mode 100644 index 0000000..f1ec45f --- /dev/null +++ b/vendor/golang.org/x/text/internal/colltab/weighter.go @@ -0,0 +1,31 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package colltab // import "golang.org/x/text/internal/colltab" + +// A Weighter can be used as a source for Collator and Searcher. +type Weighter interface { +	// Start finds the start of the segment that includes position p. +	Start(p int, b []byte) int + +	// StartString finds the start of the segment that includes position p. +	StartString(p int, s string) int + +	// AppendNext appends Elems to buf corresponding to the longest match +	// of a single character or contraction from the start of s. +	// It returns the new buf and the number of bytes consumed. +	AppendNext(buf []Elem, s []byte) (ce []Elem, n int) + +	// AppendNextString appends Elems to buf corresponding to the longest match +	// of a single character or contraction from the start of s. +	// It returns the new buf and the number of bytes consumed. +	AppendNextString(buf []Elem, s string) (ce []Elem, n int) + +	// Domain returns a slice of all single characters and contractions for which +	// collation elements are defined in this table. +	Domain() []string + +	// Top returns the highest variable primary value. +	Top() uint32 +} |