diff options
Diffstat (limited to 'vendor/golang.org/x/text/runes')
| -rw-r--r-- | vendor/golang.org/x/text/runes/cond.go | 187 | ||||
| -rw-r--r-- | vendor/golang.org/x/text/runes/runes.go | 355 | 
2 files changed, 542 insertions, 0 deletions
| diff --git a/vendor/golang.org/x/text/runes/cond.go b/vendor/golang.org/x/text/runes/cond.go new file mode 100644 index 0000000..df7aa02 --- /dev/null +++ b/vendor/golang.org/x/text/runes/cond.go @@ -0,0 +1,187 @@ +// Copyright 2015 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runes + +import ( +	"unicode/utf8" + +	"golang.org/x/text/transform" +) + +// Note: below we pass invalid UTF-8 to the tIn and tNotIn transformers as is. +// This is done for various reasons: +// - To retain the semantics of the Nop transformer: if input is passed to a Nop +//   one would expect it to be unchanged. +// - It would be very expensive to pass a converted RuneError to a transformer: +//   a transformer might need more source bytes after RuneError, meaning that +//   the only way to pass it safely is to create a new buffer and manage the +//   intermingling of RuneErrors and normal input. +// - Many transformers leave ill-formed UTF-8 as is, so this is not +//   inconsistent. Generally ill-formed UTF-8 is only replaced if it is a +//   logical consequence of the operation (as for Map) or if it otherwise would +//   pose security concerns (as for Remove). +// - An alternative would be to return an error on ill-formed UTF-8, but this +//   would be inconsistent with other operations. + +// If returns a transformer that applies tIn to consecutive runes for which +// s.Contains(r) and tNotIn to consecutive runes for which !s.Contains(r). Reset +// is called on tIn and tNotIn at the start of each run. A Nop transformer will +// substitute a nil value passed to tIn or tNotIn. Invalid UTF-8 is translated +// to RuneError to determine which transformer to apply, but is passed as is to +// the respective transformer. +func If(s Set, tIn, tNotIn transform.Transformer) Transformer { +	if tIn == nil && tNotIn == nil { +		return Transformer{transform.Nop} +	} +	if tIn == nil { +		tIn = transform.Nop +	} +	if tNotIn == nil { +		tNotIn = transform.Nop +	} +	sIn, ok := tIn.(transform.SpanningTransformer) +	if !ok { +		sIn = dummySpan{tIn} +	} +	sNotIn, ok := tNotIn.(transform.SpanningTransformer) +	if !ok { +		sNotIn = dummySpan{tNotIn} +	} + +	a := &cond{ +		tIn:    sIn, +		tNotIn: sNotIn, +		f:      s.Contains, +	} +	a.Reset() +	return Transformer{a} +} + +type dummySpan struct{ transform.Transformer } + +func (d dummySpan) Span(src []byte, atEOF bool) (n int, err error) { +	return 0, transform.ErrEndOfSpan +} + +type cond struct { +	tIn, tNotIn transform.SpanningTransformer +	f           func(rune) bool +	check       func(rune) bool               // current check to perform +	t           transform.SpanningTransformer // current transformer to use +} + +// Reset implements transform.Transformer. +func (t *cond) Reset() { +	t.check = t.is +	t.t = t.tIn +	t.t.Reset() // notIn will be reset on first usage. +} + +func (t *cond) is(r rune) bool { +	if t.f(r) { +		return true +	} +	t.check = t.isNot +	t.t = t.tNotIn +	t.tNotIn.Reset() +	return false +} + +func (t *cond) isNot(r rune) bool { +	if !t.f(r) { +		return true +	} +	t.check = t.is +	t.t = t.tIn +	t.tIn.Reset() +	return false +} + +// This implementation of Span doesn't help all too much, but it needs to be +// there to satisfy this package's Transformer interface. +// TODO: there are certainly room for improvements, though. For example, if +// t.t == transform.Nop (which will a common occurrence) it will save a bundle +// to special-case that loop. +func (t *cond) Span(src []byte, atEOF bool) (n int, err error) { +	p := 0 +	for n < len(src) && err == nil { +		// Don't process too much at a time as the Spanner that will be +		// called on this block may terminate early. +		const maxChunk = 4096 +		max := len(src) +		if v := n + maxChunk; v < max { +			max = v +		} +		atEnd := false +		size := 0 +		current := t.t +		for ; p < max; p += size { +			r := rune(src[p]) +			if r < utf8.RuneSelf { +				size = 1 +			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 { +				if !atEOF && !utf8.FullRune(src[p:]) { +					err = transform.ErrShortSrc +					break +				} +			} +			if !t.check(r) { +				// The next rune will be the start of a new run. +				atEnd = true +				break +			} +		} +		n2, err2 := current.Span(src[n:p], atEnd || (atEOF && p == len(src))) +		n += n2 +		if err2 != nil { +			return n, err2 +		} +		// At this point either err != nil or t.check will pass for the rune at p. +		p = n + size +	} +	return n, err +} + +func (t *cond) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { +	p := 0 +	for nSrc < len(src) && err == nil { +		// Don't process too much at a time, as the work might be wasted if the +		// destination buffer isn't large enough to hold the result or a +		// transform returns an error early. +		const maxChunk = 4096 +		max := len(src) +		if n := nSrc + maxChunk; n < len(src) { +			max = n +		} +		atEnd := false +		size := 0 +		current := t.t +		for ; p < max; p += size { +			r := rune(src[p]) +			if r < utf8.RuneSelf { +				size = 1 +			} else if r, size = utf8.DecodeRune(src[p:]); size == 1 { +				if !atEOF && !utf8.FullRune(src[p:]) { +					err = transform.ErrShortSrc +					break +				} +			} +			if !t.check(r) { +				// The next rune will be the start of a new run. +				atEnd = true +				break +			} +		} +		nDst2, nSrc2, err2 := current.Transform(dst[nDst:], src[nSrc:p], atEnd || (atEOF && p == len(src))) +		nDst += nDst2 +		nSrc += nSrc2 +		if err2 != nil { +			return nDst, nSrc, err2 +		} +		// At this point either err != nil or t.check will pass for the rune at p. +		p = nSrc + size +	} +	return nDst, nSrc, err +} diff --git a/vendor/golang.org/x/text/runes/runes.go b/vendor/golang.org/x/text/runes/runes.go new file mode 100644 index 0000000..930e87f --- /dev/null +++ b/vendor/golang.org/x/text/runes/runes.go @@ -0,0 +1,355 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package runes provide transforms for UTF-8 encoded text. +package runes // import "golang.org/x/text/runes" + +import ( +	"unicode" +	"unicode/utf8" + +	"golang.org/x/text/transform" +) + +// A Set is a collection of runes. +type Set interface { +	// Contains returns true if r is contained in the set. +	Contains(r rune) bool +} + +type setFunc func(rune) bool + +func (s setFunc) Contains(r rune) bool { +	return s(r) +} + +// Note: using funcs here instead of wrapping types result in cleaner +// documentation and a smaller API. + +// In creates a Set with a Contains method that returns true for all runes in +// the given RangeTable. +func In(rt *unicode.RangeTable) Set { +	return setFunc(func(r rune) bool { return unicode.Is(rt, r) }) +} + +// NotIn creates a Set with a Contains method that returns true for all runes not +// in the given RangeTable. +func NotIn(rt *unicode.RangeTable) Set { +	return setFunc(func(r rune) bool { return !unicode.Is(rt, r) }) +} + +// Predicate creates a Set with a Contains method that returns f(r). +func Predicate(f func(rune) bool) Set { +	return setFunc(f) +} + +// Transformer implements the transform.Transformer interface. +type Transformer struct { +	t transform.SpanningTransformer +} + +func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { +	return t.t.Transform(dst, src, atEOF) +} + +func (t Transformer) Span(b []byte, atEOF bool) (n int, err error) { +	return t.t.Span(b, atEOF) +} + +func (t Transformer) Reset() { t.t.Reset() } + +// Bytes returns a new byte slice with the result of converting b using t.  It +// calls Reset on t. It returns nil if any error was found. This can only happen +// if an error-producing Transformer is passed to If. +func (t Transformer) Bytes(b []byte) []byte { +	b, _, err := transform.Bytes(t, b) +	if err != nil { +		return nil +	} +	return b +} + +// String returns a string with the result of converting s using t. It calls +// Reset on t. It returns the empty string if any error was found. This can only +// happen if an error-producing Transformer is passed to If. +func (t Transformer) String(s string) string { +	s, _, err := transform.String(t, s) +	if err != nil { +		return "" +	} +	return s +} + +// TODO: +// - Copy: copying strings and bytes in whole-rune units. +// - Validation (maybe) +// - Well-formed-ness (maybe) + +const runeErrorString = string(utf8.RuneError) + +// Remove returns a Transformer that removes runes r for which s.Contains(r). +// Illegal input bytes are replaced by RuneError before being passed to f. +func Remove(s Set) Transformer { +	if f, ok := s.(setFunc); ok { +		// This little trick cuts the running time of BenchmarkRemove for sets +		// created by Predicate roughly in half. +		// TODO: special-case RangeTables as well. +		return Transformer{remove(f)} +	} +	return Transformer{remove(s.Contains)} +} + +// TODO: remove transform.RemoveFunc. + +type remove func(r rune) bool + +func (remove) Reset() {} + +// Span implements transform.Spanner. +func (t remove) Span(src []byte, atEOF bool) (n int, err error) { +	for r, size := rune(0), 0; n < len(src); { +		if r = rune(src[n]); r < utf8.RuneSelf { +			size = 1 +		} else if r, size = utf8.DecodeRune(src[n:]); size == 1 { +			// Invalid rune. +			if !atEOF && !utf8.FullRune(src[n:]) { +				err = transform.ErrShortSrc +			} else { +				err = transform.ErrEndOfSpan +			} +			break +		} +		if t(r) { +			err = transform.ErrEndOfSpan +			break +		} +		n += size +	} +	return +} + +// Transform implements transform.Transformer. +func (t remove) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { +	for r, size := rune(0), 0; nSrc < len(src); { +		if r = rune(src[nSrc]); r < utf8.RuneSelf { +			size = 1 +		} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 { +			// Invalid rune. +			if !atEOF && !utf8.FullRune(src[nSrc:]) { +				err = transform.ErrShortSrc +				break +			} +			// We replace illegal bytes with RuneError. Not doing so might +			// otherwise turn a sequence of invalid UTF-8 into valid UTF-8. +			// The resulting byte sequence may subsequently contain runes +			// for which t(r) is true that were passed unnoticed. +			if !t(utf8.RuneError) { +				if nDst+3 > len(dst) { +					err = transform.ErrShortDst +					break +				} +				dst[nDst+0] = runeErrorString[0] +				dst[nDst+1] = runeErrorString[1] +				dst[nDst+2] = runeErrorString[2] +				nDst += 3 +			} +			nSrc++ +			continue +		} +		if t(r) { +			nSrc += size +			continue +		} +		if nDst+size > len(dst) { +			err = transform.ErrShortDst +			break +		} +		for i := 0; i < size; i++ { +			dst[nDst] = src[nSrc] +			nDst++ +			nSrc++ +		} +	} +	return +} + +// Map returns a Transformer that maps the runes in the input using the given +// mapping. Illegal bytes in the input are converted to utf8.RuneError before +// being passed to the mapping func. +func Map(mapping func(rune) rune) Transformer { +	return Transformer{mapper(mapping)} +} + +type mapper func(rune) rune + +func (mapper) Reset() {} + +// Span implements transform.Spanner. +func (t mapper) Span(src []byte, atEOF bool) (n int, err error) { +	for r, size := rune(0), 0; n < len(src); n += size { +		if r = rune(src[n]); r < utf8.RuneSelf { +			size = 1 +		} else if r, size = utf8.DecodeRune(src[n:]); size == 1 { +			// Invalid rune. +			if !atEOF && !utf8.FullRune(src[n:]) { +				err = transform.ErrShortSrc +			} else { +				err = transform.ErrEndOfSpan +			} +			break +		} +		if t(r) != r { +			err = transform.ErrEndOfSpan +			break +		} +	} +	return n, err +} + +// Transform implements transform.Transformer. +func (t mapper) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { +	var replacement rune +	var b [utf8.UTFMax]byte + +	for r, size := rune(0), 0; nSrc < len(src); { +		if r = rune(src[nSrc]); r < utf8.RuneSelf { +			if replacement = t(r); replacement < utf8.RuneSelf { +				if nDst == len(dst) { +					err = transform.ErrShortDst +					break +				} +				dst[nDst] = byte(replacement) +				nDst++ +				nSrc++ +				continue +			} +			size = 1 +		} else if r, size = utf8.DecodeRune(src[nSrc:]); size == 1 { +			// Invalid rune. +			if !atEOF && !utf8.FullRune(src[nSrc:]) { +				err = transform.ErrShortSrc +				break +			} + +			if replacement = t(utf8.RuneError); replacement == utf8.RuneError { +				if nDst+3 > len(dst) { +					err = transform.ErrShortDst +					break +				} +				dst[nDst+0] = runeErrorString[0] +				dst[nDst+1] = runeErrorString[1] +				dst[nDst+2] = runeErrorString[2] +				nDst += 3 +				nSrc++ +				continue +			} +		} else if replacement = t(r); replacement == r { +			if nDst+size > len(dst) { +				err = transform.ErrShortDst +				break +			} +			for i := 0; i < size; i++ { +				dst[nDst] = src[nSrc] +				nDst++ +				nSrc++ +			} +			continue +		} + +		n := utf8.EncodeRune(b[:], replacement) + +		if nDst+n > len(dst) { +			err = transform.ErrShortDst +			break +		} +		for i := 0; i < n; i++ { +			dst[nDst] = b[i] +			nDst++ +		} +		nSrc += size +	} +	return +} + +// ReplaceIllFormed returns a transformer that replaces all input bytes that are +// not part of a well-formed UTF-8 code sequence with utf8.RuneError. +func ReplaceIllFormed() Transformer { +	return Transformer{&replaceIllFormed{}} +} + +type replaceIllFormed struct{ transform.NopResetter } + +func (t replaceIllFormed) Span(src []byte, atEOF bool) (n int, err error) { +	for n < len(src) { +		// ASCII fast path. +		if src[n] < utf8.RuneSelf { +			n++ +			continue +		} + +		r, size := utf8.DecodeRune(src[n:]) + +		// Look for a valid non-ASCII rune. +		if r != utf8.RuneError || size != 1 { +			n += size +			continue +		} + +		// Look for short source data. +		if !atEOF && !utf8.FullRune(src[n:]) { +			err = transform.ErrShortSrc +			break +		} + +		// We have an invalid rune. +		err = transform.ErrEndOfSpan +		break +	} +	return n, err +} + +func (t replaceIllFormed) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) { +	for nSrc < len(src) { +		// ASCII fast path. +		if r := src[nSrc]; r < utf8.RuneSelf { +			if nDst == len(dst) { +				err = transform.ErrShortDst +				break +			} +			dst[nDst] = r +			nDst++ +			nSrc++ +			continue +		} + +		// Look for a valid non-ASCII rune. +		if _, size := utf8.DecodeRune(src[nSrc:]); size != 1 { +			if size != copy(dst[nDst:], src[nSrc:nSrc+size]) { +				err = transform.ErrShortDst +				break +			} +			nDst += size +			nSrc += size +			continue +		} + +		// Look for short source data. +		if !atEOF && !utf8.FullRune(src[nSrc:]) { +			err = transform.ErrShortSrc +			break +		} + +		// We have an invalid rune. +		if nDst+3 > len(dst) { +			err = transform.ErrShortDst +			break +		} +		dst[nDst+0] = runeErrorString[0] +		dst[nDst+1] = runeErrorString[1] +		dst[nDst+2] = runeErrorString[2] +		nDst += 3 +		nSrc++ +	} +	return nDst, nSrc, err +} |