summaryrefslogtreecommitdiffhomepage
path: root/vendor/golang.org/x/text/internal/colltab/colltab.go
blob: 02f22477ec2d84b6e99ec5c94fef0d996fbdd232 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
// Copyright 2015 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package colltab contains functionality related to collation tables.
// It is only to be used by the collate and search packages.
package colltab // import "golang.org/x/text/internal/colltab"

import (
	"sort"

	"golang.org/x/text/language"
)

// MatchLang finds the index of t in tags, using a matching algorithm used for
// collation and search. tags[0] must be language.Und, the remaining tags should
// be sorted alphabetically.
//
// Language matching for collation and search is different from the matching
// defined by language.Matcher: the (inferred) base language must be an exact
// match for the relevant fields. For example, "gsw" should not match "de".
// Also the parent relation is different, as a parent may have a different
// script. So usually the parent of zh-Hant is und, whereas for MatchLang it is
// zh.
func MatchLang(t language.Tag, tags []language.Tag) int {
	// Canonicalize the values, including collapsing macro languages.
	t, _ = language.All.Canonicalize(t)

	base, conf := t.Base()
	// Estimate the base language, but only use high-confidence values.
	if conf < language.High {
		// The root locale supports "search" and "standard". We assume that any
		// implementation will only use one of both.
		return 0
	}

	// Maximize base and script and normalize the tag.
	if _, s, r := t.Raw(); (r != language.Region{}) {
		p, _ := language.Raw.Compose(base, s, r)
		// Taking the parent forces the script to be maximized.
		p = p.Parent()
		// Add back region and extensions.
		t, _ = language.Raw.Compose(p, r, t.Extensions())
	} else {
		// Set the maximized base language.
		t, _ = language.Raw.Compose(base, s, t.Extensions())
	}

	// Find start index of the language tag.
	start := 1 + sort.Search(len(tags)-1, func(i int) bool {
		b, _, _ := tags[i+1].Raw()
		return base.String() <= b.String()
	})
	if start < len(tags) {
		if b, _, _ := tags[start].Raw(); b != base {
			return 0
		}
	}

	// Besides the base language, script and region, only the collation type and
	// the custom variant defined in the 'u' extension are used to distinguish a
	// locale.
	// Strip all variants and extensions and add back the custom variant.
	tdef, _ := language.Raw.Compose(t.Raw())
	tdef, _ = tdef.SetTypeForKey("va", t.TypeForKey("va"))

	// First search for a specialized collation type, if present.
	try := []language.Tag{tdef}
	if co := t.TypeForKey("co"); co != "" {
		tco, _ := tdef.SetTypeForKey("co", co)
		try = []language.Tag{tco, tdef}
	}

	for _, tx := range try {
		for ; tx != language.Und; tx = parent(tx) {
			for i, t := range tags[start:] {
				if b, _, _ := t.Raw(); b != base {
					break
				}
				if tx == t {
					return start + i
				}
			}
		}
	}
	return 0
}

// parent computes the structural parent. This means inheritance may change
// script. So, unlike the CLDR parent, parent(zh-Hant) == zh.
func parent(t language.Tag) language.Tag {
	if t.TypeForKey("va") != "" {
		t, _ = t.SetTypeForKey("va", "")
		return t
	}
	result := language.Und
	if b, s, r := t.Raw(); (r != language.Region{}) {
		result, _ = language.Raw.Compose(b, s, t.Extensions())
	} else if (s != language.Script{}) {
		result, _ = language.Raw.Compose(b, t.Extensions())
	} else if (b != language.Base{}) {
		result, _ = language.Raw.Compose(t.Extensions())
	}
	return result
}