all repos

rss-tools @ master

get rss feed from sources that(i need and) dont provide one

rss-tools/vendor/golang.org/x/text/language/parse.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
we're vendoring now, 7 days ago
1
// Copyright 2013 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
package language
6
7
import (
8
	"errors"
9
	"sort"
10
	"strconv"
11
	"strings"
12
13
	"golang.org/x/text/internal/language"
14
)
15
16
// ValueError is returned by any of the parsing functions when the
17
// input is well-formed but the respective subtag is not recognized
18
// as a valid value.
19
type ValueError interface {
20
	error
21
22
	// Subtag returns the subtag for which the error occurred.
23
	Subtag() string
24
}
25
26
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
27
// failed it returns an error and any part of the tag that could be parsed.
28
// If parsing succeeded but an unknown value was found, it returns
29
// ValueError. The Tag returned in this case is just stripped of the unknown
30
// value. All other values are preserved. It accepts tags in the BCP 47 format
31
// and extensions to this standard defined in
32
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
33
// The resulting tag is canonicalized using the default canonicalization type.
34
func Parse(s string) (t Tag, err error) {
35
	return Default.Parse(s)
36
}
37
38
// Parse parses the given BCP 47 string and returns a valid Tag. If parsing
39
// failed it returns an error and any part of the tag that could be parsed.
40
// If parsing succeeded but an unknown value was found, it returns
41
// ValueError. The Tag returned in this case is just stripped of the unknown
42
// value. All other values are preserved. It accepts tags in the BCP 47 format
43
// and extensions to this standard defined in
44
// https://www.unicode.org/reports/tr35/#Unicode_Language_and_Locale_Identifiers.
45
// The resulting tag is canonicalized using the canonicalization type c.
46
func (c CanonType) Parse(s string) (t Tag, err error) {
47
	defer func() {
48
		if recover() != nil {
49
			t = Tag{}
50
			err = language.ErrSyntax
51
		}
52
	}()
53
54
	tt, err := language.Parse(s)
55
	if err != nil {
56
		return makeTag(tt), err
57
	}
58
	tt, changed := canonicalize(c, tt)
59
	if changed {
60
		tt.RemakeString()
61
	}
62
	return makeTag(tt), nil
63
}
64
65
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
66
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
67
// Base, Script or Region or slice of type Variant or Extension is passed more
68
// than once, the latter will overwrite the former. Variants and Extensions are
69
// accumulated, but if two extensions of the same type are passed, the latter
70
// will replace the former. For -u extensions, though, the key-type pairs are
71
// added, where later values overwrite older ones. A Tag overwrites all former
72
// values and typically only makes sense as the first argument. The resulting
73
// tag is returned after canonicalizing using the Default CanonType. If one or
74
// more errors are encountered, one of the errors is returned.
75
func Compose(part ...interface{}) (t Tag, err error) {
76
	return Default.Compose(part...)
77
}
78
79
// Compose creates a Tag from individual parts, which may be of type Tag, Base,
80
// Script, Region, Variant, []Variant, Extension, []Extension or error. If a
81
// Base, Script or Region or slice of type Variant or Extension is passed more
82
// than once, the latter will overwrite the former. Variants and Extensions are
83
// accumulated, but if two extensions of the same type are passed, the latter
84
// will replace the former. For -u extensions, though, the key-type pairs are
85
// added, where later values overwrite older ones. A Tag overwrites all former
86
// values and typically only makes sense as the first argument. The resulting
87
// tag is returned after canonicalizing using CanonType c. If one or more errors
88
// are encountered, one of the errors is returned.
89
func (c CanonType) Compose(part ...interface{}) (t Tag, err error) {
90
	defer func() {
91
		if recover() != nil {
92
			t = Tag{}
93
			err = language.ErrSyntax
94
		}
95
	}()
96
97
	var b language.Builder
98
	if err = update(&b, part...); err != nil {
99
		return und, err
100
	}
101
	b.Tag, _ = canonicalize(c, b.Tag)
102
	return makeTag(b.Make()), err
103
}
104
105
var errInvalidArgument = errors.New("invalid Extension or Variant")
106
107
func update(b *language.Builder, part ...interface{}) (err error) {
108
	for _, x := range part {
109
		switch v := x.(type) {
110
		case Tag:
111
			b.SetTag(v.tag())
112
		case Base:
113
			b.Tag.LangID = v.langID
114
		case Script:
115
			b.Tag.ScriptID = v.scriptID
116
		case Region:
117
			b.Tag.RegionID = v.regionID
118
		case Variant:
119
			if v.variant == "" {
120
				err = errInvalidArgument
121
				break
122
			}
123
			b.AddVariant(v.variant)
124
		case Extension:
125
			if v.s == "" {
126
				err = errInvalidArgument
127
				break
128
			}
129
			b.SetExt(v.s)
130
		case []Variant:
131
			b.ClearVariants()
132
			for _, v := range v {
133
				b.AddVariant(v.variant)
134
			}
135
		case []Extension:
136
			b.ClearExtensions()
137
			for _, e := range v {
138
				b.SetExt(e.s)
139
			}
140
		// TODO: support parsing of raw strings based on morphology or just extensions?
141
		case error:
142
			if v != nil {
143
				err = v
144
			}
145
		}
146
	}
147
	return
148
}
149
150
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
151
var errTagListTooLarge = errors.New("tag list exceeds max length")
152
153
// ParseAcceptLanguage parses the contents of an Accept-Language header as
154
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
155
// a list of corresponding quality weights. It is more permissive than RFC 2616
156
// and may return non-nil slices even if the input is not valid.
157
// The Tags will be sorted by highest weight first and then by first occurrence.
158
// Tags with a weight of zero will be dropped. An error will be returned if the
159
// input could not be parsed.
160
func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
161
	defer func() {
162
		if recover() != nil {
163
			tag = nil
164
			q = nil
165
			err = language.ErrSyntax
166
		}
167
	}()
168
169
	if strings.Count(s, "-") > 1000 {
170
		return nil, nil, errTagListTooLarge
171
	}
172
173
	var entry string
174
	for s != "" {
175
		if entry, s = split(s, ','); entry == "" {
176
			continue
177
		}
178
179
		entry, weight := split(entry, ';')
180
181
		// Scan the language.
182
		t, err := Parse(entry)
183
		if err != nil {
184
			id, ok := acceptFallback[entry]
185
			if !ok {
186
				return nil, nil, err
187
			}
188
			t = makeTag(language.Tag{LangID: id})
189
		}
190
191
		// Scan the optional weight.
192
		w := 1.0
193
		if weight != "" {
194
			weight = consume(weight, 'q')
195
			weight = consume(weight, '=')
196
			// consume returns the empty string when a token could not be
197
			// consumed, resulting in an error for ParseFloat.
198
			if w, err = strconv.ParseFloat(weight, 32); err != nil {
199
				return nil, nil, errInvalidWeight
200
			}
201
			// Drop tags with a quality weight of 0.
202
			if w <= 0 {
203
				continue
204
			}
205
		}
206
207
		tag = append(tag, t)
208
		q = append(q, float32(w))
209
	}
210
	sort.Stable(&tagSort{tag, q})
211
	return tag, q, nil
212
}
213
214
// consume removes a leading token c from s and returns the result or the empty
215
// string if there is no such token.
216
func consume(s string, c byte) string {
217
	if s == "" || s[0] != c {
218
		return ""
219
	}
220
	return strings.TrimSpace(s[1:])
221
}
222
223
func split(s string, c byte) (head, tail string) {
224
	if i := strings.IndexByte(s, c); i >= 0 {
225
		return strings.TrimSpace(s[:i]), strings.TrimSpace(s[i+1:])
226
	}
227
	return strings.TrimSpace(s), ""
228
}
229
230
// Add hack mapping to deal with a small number of cases that occur
231
// in Accept-Language (with reasonable frequency).
232
var acceptFallback = map[string]language.Language{
233
	"english": _en,
234
	"deutsch": _de,
235
	"italian": _it,
236
	"french":  _fr,
237
	"*":       _mul, // defined in the spec to match all languages.
238
}
239
240
type tagSort struct {
241
	tag []Tag
242
	q   []float32
243
}
244
245
func (s *tagSort) Len() int {
246
	return len(s.q)
247
}
248
249
func (s *tagSort) Less(i, j int) bool {
250
	return s.q[i] > s.q[j]
251
}
252
253
func (s *tagSort) Swap(i, j int) {
254
	s.tag[i], s.tag[j] = s.tag[j], s.tag[i]
255
	s.q[i], s.q[j] = s.q[j], s.q[i]
256
}