all repos

rss-tools @ 58cc1bd492af31e6c9acf65e690474c8c1edf164

get rss feed from sources that(i need and) dont provide one

rss-tools/vendor/github.com/andybalholm/cascadia/pseudo_classes.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
we're vendoring now, 7 days ago
1
package cascadia
2
3
import (
4
	"bytes"
5
	"fmt"
6
	"regexp"
7
	"strings"
8
9
	"golang.org/x/net/html"
10
	"golang.org/x/net/html/atom"
11
)
12
13
// This file implements the pseudo classes selectors,
14
// which share the implementation of PseudoElement() and Specificity()
15
16
type abstractPseudoClass struct{}
17
18
func (s abstractPseudoClass) Specificity() Specificity {
19
	return Specificity{0, 1, 0}
20
}
21
22
func (c abstractPseudoClass) PseudoElement() string {
23
	return ""
24
}
25
26
type relativePseudoClassSelector struct {
27
	name  string // one of "not", "has", "haschild"
28
	match SelectorGroup
29
}
30
31
func (s relativePseudoClassSelector) Match(n *html.Node) bool {
32
	if n.Type != html.ElementNode {
33
		return false
34
	}
35
	switch s.name {
36
	case "not":
37
		// matches elements that do not match a.
38
		return !s.match.Match(n)
39
	case "has":
40
		//  matches elements with any descendant that matches a.
41
		return hasDescendantMatch(n, s.match)
42
	case "haschild":
43
		// matches elements with a child that matches a.
44
		return hasChildMatch(n, s.match)
45
	default:
46
		panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name))
47
	}
48
}
49
50
// hasChildMatch returns whether n has any child that matches a.
51
func hasChildMatch(n *html.Node, a Matcher) bool {
52
	for c := n.FirstChild; c != nil; c = c.NextSibling {
53
		if a.Match(c) {
54
			return true
55
		}
56
	}
57
	return false
58
}
59
60
// hasDescendantMatch performs a depth-first search of n's descendants,
61
// testing whether any of them match a. It returns true as soon as a match is
62
// found, or false if no match is found.
63
func hasDescendantMatch(n *html.Node, a Matcher) bool {
64
	for c := n.FirstChild; c != nil; c = c.NextSibling {
65
		if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) {
66
			return true
67
		}
68
	}
69
	return false
70
}
71
72
// Specificity returns the specificity of the most specific selectors
73
// in the pseudo-class arguments.
74
// See https://www.w3.org/TR/selectors/#specificity-rules
75
func (s relativePseudoClassSelector) Specificity() Specificity {
76
	var max Specificity
77
	for _, sel := range s.match {
78
		newSpe := sel.Specificity()
79
		if max.Less(newSpe) {
80
			max = newSpe
81
		}
82
	}
83
	return max
84
}
85
86
func (c relativePseudoClassSelector) PseudoElement() string {
87
	return ""
88
}
89
90
type containsPseudoClassSelector struct {
91
	abstractPseudoClass
92
	value string
93
	own   bool
94
}
95
96
func (s containsPseudoClassSelector) Match(n *html.Node) bool {
97
	var text string
98
	if s.own {
99
		// matches nodes that directly contain the given text
100
		text = strings.ToLower(nodeOwnText(n))
101
	} else {
102
		// matches nodes that contain the given text.
103
		text = strings.ToLower(nodeText(n))
104
	}
105
	return strings.Contains(text, s.value)
106
}
107
108
type regexpPseudoClassSelector struct {
109
	abstractPseudoClass
110
	regexp *regexp.Regexp
111
	own    bool
112
}
113
114
func (s regexpPseudoClassSelector) Match(n *html.Node) bool {
115
	var text string
116
	if s.own {
117
		// matches nodes whose text directly matches the specified regular expression
118
		text = nodeOwnText(n)
119
	} else {
120
		// matches nodes whose text matches the specified regular expression
121
		text = nodeText(n)
122
	}
123
	return s.regexp.MatchString(text)
124
}
125
126
// writeNodeText writes the text contained in n and its descendants to b.
127
func writeNodeText(n *html.Node, b *bytes.Buffer) {
128
	switch n.Type {
129
	case html.TextNode:
130
		b.WriteString(n.Data)
131
	case html.ElementNode:
132
		for c := n.FirstChild; c != nil; c = c.NextSibling {
133
			writeNodeText(c, b)
134
		}
135
	}
136
}
137
138
// nodeText returns the text contained in n and its descendants.
139
func nodeText(n *html.Node) string {
140
	var b bytes.Buffer
141
	writeNodeText(n, &b)
142
	return b.String()
143
}
144
145
// nodeOwnText returns the contents of the text nodes that are direct
146
// children of n.
147
func nodeOwnText(n *html.Node) string {
148
	var b bytes.Buffer
149
	for c := n.FirstChild; c != nil; c = c.NextSibling {
150
		if c.Type == html.TextNode {
151
			b.WriteString(c.Data)
152
		}
153
	}
154
	return b.String()
155
}
156
157
type nthPseudoClassSelector struct {
158
	abstractPseudoClass
159
	a, b         int
160
	last, ofType bool
161
}
162
163
func (s nthPseudoClassSelector) Match(n *html.Node) bool {
164
	if s.a == 0 {
165
		if s.last {
166
			return simpleNthLastChildMatch(s.b, s.ofType, n)
167
		} else {
168
			return simpleNthChildMatch(s.b, s.ofType, n)
169
		}
170
	}
171
	return nthChildMatch(s.a, s.b, s.last, s.ofType, n)
172
}
173
174
// nthChildMatch implements :nth-child(an+b).
175
// If last is true, implements :nth-last-child instead.
176
// If ofType is true, implements :nth-of-type instead.
177
func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool {
178
	if n.Type != html.ElementNode {
179
		return false
180
	}
181
182
	parent := n.Parent
183
	if parent == nil {
184
		return false
185
	}
186
187
	i := -1
188
	count := 0
189
	for c := parent.FirstChild; c != nil; c = c.NextSibling {
190
		if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) {
191
			continue
192
		}
193
		count++
194
		if c == n {
195
			i = count
196
			if !last {
197
				break
198
			}
199
		}
200
	}
201
202
	if i == -1 {
203
		// This shouldn't happen, since n should always be one of its parent's children.
204
		return false
205
	}
206
207
	if last {
208
		i = count - i + 1
209
	}
210
211
	i -= b
212
	if a == 0 {
213
		return i == 0
214
	}
215
216
	return i%a == 0 && i/a >= 0
217
}
218
219
// simpleNthChildMatch implements :nth-child(b).
220
// If ofType is true, implements :nth-of-type instead.
221
func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool {
222
	if n.Type != html.ElementNode {
223
		return false
224
	}
225
226
	parent := n.Parent
227
	if parent == nil {
228
		return false
229
	}
230
231
	count := 0
232
	for c := parent.FirstChild; c != nil; c = c.NextSibling {
233
		if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
234
			continue
235
		}
236
		count++
237
		if c == n {
238
			return count == b
239
		}
240
		if count >= b {
241
			return false
242
		}
243
	}
244
	return false
245
}
246
247
// simpleNthLastChildMatch implements :nth-last-child(b).
248
// If ofType is true, implements :nth-last-of-type instead.
249
func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool {
250
	if n.Type != html.ElementNode {
251
		return false
252
	}
253
254
	parent := n.Parent
255
	if parent == nil {
256
		return false
257
	}
258
259
	count := 0
260
	for c := parent.LastChild; c != nil; c = c.PrevSibling {
261
		if c.Type != html.ElementNode || (ofType && c.Data != n.Data) {
262
			continue
263
		}
264
		count++
265
		if c == n {
266
			return count == b
267
		}
268
		if count >= b {
269
			return false
270
		}
271
	}
272
	return false
273
}
274
275
type onlyChildPseudoClassSelector struct {
276
	abstractPseudoClass
277
	ofType bool
278
}
279
280
// Match implements :only-child.
281
// If `ofType` is true, it implements :only-of-type instead.
282
func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool {
283
	if n.Type != html.ElementNode {
284
		return false
285
	}
286
287
	parent := n.Parent
288
	if parent == nil {
289
		return false
290
	}
291
292
	count := 0
293
	for c := parent.FirstChild; c != nil; c = c.NextSibling {
294
		if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) {
295
			continue
296
		}
297
		count++
298
		if count > 1 {
299
			return false
300
		}
301
	}
302
303
	return count == 1
304
}
305
306
type inputPseudoClassSelector struct {
307
	abstractPseudoClass
308
}
309
310
// Matches input, select, textarea and button elements.
311
func (s inputPseudoClassSelector) Match(n *html.Node) bool {
312
	return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button")
313
}
314
315
type emptyElementPseudoClassSelector struct {
316
	abstractPseudoClass
317
}
318
319
// Matches empty elements.
320
func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool {
321
	if n.Type != html.ElementNode {
322
		return false
323
	}
324
325
	for c := n.FirstChild; c != nil; c = c.NextSibling {
326
		switch c.Type {
327
		case html.ElementNode:
328
			return false
329
		case html.TextNode:
330
			if strings.TrimSpace(nodeText(c)) == "" {
331
				continue
332
			} else {
333
				return false
334
			}
335
		}
336
	}
337
338
	return true
339
}
340
341
type rootPseudoClassSelector struct {
342
	abstractPseudoClass
343
}
344
345
// Match implements :root
346
func (s rootPseudoClassSelector) Match(n *html.Node) bool {
347
	if n.Type != html.ElementNode {
348
		return false
349
	}
350
	if n.Parent == nil {
351
		return false
352
	}
353
	return n.Parent.Type == html.DocumentNode
354
}
355
356
func hasAttr(n *html.Node, attr string) bool {
357
	return matchAttribute(n, attr, func(string) bool { return true })
358
}
359
360
type linkPseudoClassSelector struct {
361
	abstractPseudoClass
362
}
363
364
// Match implements :link
365
func (s linkPseudoClassSelector) Match(n *html.Node) bool {
366
	return (n.DataAtom == atom.A || n.DataAtom == atom.Area || n.DataAtom == atom.Link) && hasAttr(n, "href")
367
}
368
369
type langPseudoClassSelector struct {
370
	abstractPseudoClass
371
	lang string
372
}
373
374
func (s langPseudoClassSelector) Match(n *html.Node) bool {
375
	own := matchAttribute(n, "lang", func(val string) bool {
376
		return val == s.lang || strings.HasPrefix(val, s.lang+"-")
377
	})
378
	if n.Parent == nil {
379
		return own
380
	}
381
	return own || s.Match(n.Parent)
382
}
383
384
type enabledPseudoClassSelector struct {
385
	abstractPseudoClass
386
}
387
388
func (s enabledPseudoClassSelector) Match(n *html.Node) bool {
389
	if n.Type != html.ElementNode {
390
		return false
391
	}
392
	switch n.DataAtom {
393
	case atom.A, atom.Area, atom.Link:
394
		return hasAttr(n, "href")
395
	case atom.Optgroup, atom.Menuitem, atom.Fieldset:
396
		return !hasAttr(n, "disabled")
397
	case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option:
398
		return !hasAttr(n, "disabled") && !inDisabledFieldset(n)
399
	}
400
	return false
401
}
402
403
type disabledPseudoClassSelector struct {
404
	abstractPseudoClass
405
}
406
407
func (s disabledPseudoClassSelector) Match(n *html.Node) bool {
408
	if n.Type != html.ElementNode {
409
		return false
410
	}
411
	switch n.DataAtom {
412
	case atom.Optgroup, atom.Menuitem, atom.Fieldset:
413
		return hasAttr(n, "disabled")
414
	case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option:
415
		return hasAttr(n, "disabled") || inDisabledFieldset(n)
416
	}
417
	return false
418
}
419
420
func hasLegendInPreviousSiblings(n *html.Node) bool {
421
	for s := n.PrevSibling; s != nil; s = s.PrevSibling {
422
		if s.DataAtom == atom.Legend {
423
			return true
424
		}
425
	}
426
	return false
427
}
428
429
func inDisabledFieldset(n *html.Node) bool {
430
	if n.Parent == nil {
431
		return false
432
	}
433
	if n.Parent.DataAtom == atom.Fieldset && hasAttr(n.Parent, "disabled") &&
434
		(n.DataAtom != atom.Legend || hasLegendInPreviousSiblings(n)) {
435
		return true
436
	}
437
	return inDisabledFieldset(n.Parent)
438
}
439
440
type checkedPseudoClassSelector struct {
441
	abstractPseudoClass
442
}
443
444
func (s checkedPseudoClassSelector) Match(n *html.Node) bool {
445
	if n.Type != html.ElementNode {
446
		return false
447
	}
448
	switch n.DataAtom {
449
	case atom.Input, atom.Menuitem:
450
		return hasAttr(n, "checked") && matchAttribute(n, "type", func(val string) bool {
451
			t := toLowerASCII(val)
452
			return t == "checkbox" || t == "radio"
453
		})
454
	case atom.Option:
455
		return hasAttr(n, "selected")
456
	}
457
	return false
458
}