all repos

rss-tools @ a5ac527

get rss feed from sources that(i need and) dont provide one

rss-tools/vendor/github.com/andybalholm/cascadia/selector.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
we're vendoring now, 7 days ago
1
package cascadia
2
3
import (
4
	"fmt"
5
	"regexp"
6
	"strings"
7
8
	"golang.org/x/net/html"
9
)
10
11
// Matcher is the interface for basic selector functionality.
12
// Match returns whether a selector matches n.
13
type Matcher interface {
14
	Match(n *html.Node) bool
15
}
16
17
// Sel is the interface for all the functionality provided by selectors.
18
type Sel interface {
19
	Matcher
20
	Specificity() Specificity
21
22
	// Returns a CSS input compiling to this selector.
23
	String() string
24
25
	// Returns a pseudo-element, or an empty string.
26
	PseudoElement() string
27
}
28
29
// Parse parses a selector. Use `ParseWithPseudoElement`
30
// if you need support for pseudo-elements.
31
func Parse(sel string) (Sel, error) {
32
	p := &parser{s: sel}
33
	compiled, err := p.parseSelector()
34
	if err != nil {
35
		return nil, err
36
	}
37
38
	if p.i < len(sel) {
39
		return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
40
	}
41
42
	return compiled, nil
43
}
44
45
// ParseWithPseudoElement parses a single selector,
46
// with support for pseudo-element.
47
func ParseWithPseudoElement(sel string) (Sel, error) {
48
	p := &parser{s: sel, acceptPseudoElements: true}
49
	compiled, err := p.parseSelector()
50
	if err != nil {
51
		return nil, err
52
	}
53
54
	if p.i < len(sel) {
55
		return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
56
	}
57
58
	return compiled, nil
59
}
60
61
// ParseGroup parses a selector, or a group of selectors separated by commas.
62
// Use `ParseGroupWithPseudoElements`
63
// if you need support for pseudo-elements.
64
func ParseGroup(sel string) (SelectorGroup, error) {
65
	p := &parser{s: sel}
66
	compiled, err := p.parseSelectorGroup()
67
	if err != nil {
68
		return nil, err
69
	}
70
71
	if p.i < len(sel) {
72
		return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
73
	}
74
75
	return compiled, nil
76
}
77
78
// ParseGroupWithPseudoElements parses a selector, or a group of selectors separated by commas.
79
// It supports pseudo-elements.
80
func ParseGroupWithPseudoElements(sel string) (SelectorGroup, error) {
81
	p := &parser{s: sel, acceptPseudoElements: true}
82
	compiled, err := p.parseSelectorGroup()
83
	if err != nil {
84
		return nil, err
85
	}
86
87
	if p.i < len(sel) {
88
		return nil, fmt.Errorf("parsing %q: %d bytes left over", sel, len(sel)-p.i)
89
	}
90
91
	return compiled, nil
92
}
93
94
// A Selector is a function which tells whether a node matches or not.
95
//
96
// This type is maintained for compatibility; I recommend using the newer and
97
// more idiomatic interfaces Sel and Matcher.
98
type Selector func(*html.Node) bool
99
100
// Compile parses a selector and returns, if successful, a Selector object
101
// that can be used to match against html.Node objects.
102
func Compile(sel string) (Selector, error) {
103
	compiled, err := ParseGroup(sel)
104
	if err != nil {
105
		return nil, err
106
	}
107
108
	return Selector(compiled.Match), nil
109
}
110
111
// MustCompile is like Compile, but panics instead of returning an error.
112
func MustCompile(sel string) Selector {
113
	compiled, err := Compile(sel)
114
	if err != nil {
115
		panic(err)
116
	}
117
	return compiled
118
}
119
120
// MatchAll returns a slice of the nodes that match the selector,
121
// from n and its children.
122
func (s Selector) MatchAll(n *html.Node) []*html.Node {
123
	return s.matchAllInto(n, nil)
124
}
125
126
func (s Selector) matchAllInto(n *html.Node, storage []*html.Node) []*html.Node {
127
	if s(n) {
128
		storage = append(storage, n)
129
	}
130
131
	for child := n.FirstChild; child != nil; child = child.NextSibling {
132
		storage = s.matchAllInto(child, storage)
133
	}
134
135
	return storage
136
}
137
138
func queryInto(n *html.Node, m Matcher, storage []*html.Node) []*html.Node {
139
	for child := n.FirstChild; child != nil; child = child.NextSibling {
140
		if m.Match(child) {
141
			storage = append(storage, child)
142
		}
143
		storage = queryInto(child, m, storage)
144
	}
145
146
	return storage
147
}
148
149
// QueryAll returns a slice of all the nodes that match m, from the descendants
150
// of n.
151
func QueryAll(n *html.Node, m Matcher) []*html.Node {
152
	return queryInto(n, m, nil)
153
}
154
155
// Match returns true if the node matches the selector.
156
func (s Selector) Match(n *html.Node) bool {
157
	return s(n)
158
}
159
160
// MatchFirst returns the first node that matches s, from n and its children.
161
func (s Selector) MatchFirst(n *html.Node) *html.Node {
162
	if s.Match(n) {
163
		return n
164
	}
165
166
	for c := n.FirstChild; c != nil; c = c.NextSibling {
167
		m := s.MatchFirst(c)
168
		if m != nil {
169
			return m
170
		}
171
	}
172
	return nil
173
}
174
175
// Query returns the first node that matches m, from the descendants of n.
176
// If none matches, it returns nil.
177
func Query(n *html.Node, m Matcher) *html.Node {
178
	for c := n.FirstChild; c != nil; c = c.NextSibling {
179
		if m.Match(c) {
180
			return c
181
		}
182
		if matched := Query(c, m); matched != nil {
183
			return matched
184
		}
185
	}
186
187
	return nil
188
}
189
190
// Filter returns the nodes in nodes that match the selector.
191
func (s Selector) Filter(nodes []*html.Node) (result []*html.Node) {
192
	for _, n := range nodes {
193
		if s(n) {
194
			result = append(result, n)
195
		}
196
	}
197
	return result
198
}
199
200
// Filter returns the nodes that match m.
201
func Filter(nodes []*html.Node, m Matcher) (result []*html.Node) {
202
	for _, n := range nodes {
203
		if m.Match(n) {
204
			result = append(result, n)
205
		}
206
	}
207
	return result
208
}
209
210
type tagSelector struct {
211
	tag string
212
}
213
214
// Matches elements with a given tag name.
215
func (t tagSelector) Match(n *html.Node) bool {
216
	return n.Type == html.ElementNode && n.Data == t.tag
217
}
218
219
func (c tagSelector) Specificity() Specificity {
220
	return Specificity{0, 0, 1}
221
}
222
223
func (c tagSelector) PseudoElement() string {
224
	return ""
225
}
226
227
type classSelector struct {
228
	class string
229
}
230
231
// Matches elements by class attribute.
232
func (t classSelector) Match(n *html.Node) bool {
233
	return matchAttribute(n, "class", func(s string) bool {
234
		return matchInclude(t.class, s, false)
235
	})
236
}
237
238
func (c classSelector) Specificity() Specificity {
239
	return Specificity{0, 1, 0}
240
}
241
242
func (c classSelector) PseudoElement() string {
243
	return ""
244
}
245
246
type idSelector struct {
247
	id string
248
}
249
250
// Matches elements by id attribute.
251
func (t idSelector) Match(n *html.Node) bool {
252
	return matchAttribute(n, "id", func(s string) bool {
253
		return s == t.id
254
	})
255
}
256
257
func (c idSelector) Specificity() Specificity {
258
	return Specificity{1, 0, 0}
259
}
260
261
func (c idSelector) PseudoElement() string {
262
	return ""
263
}
264
265
type attrSelector struct {
266
	key, val, operation string
267
	regexp              *regexp.Regexp
268
	insensitive         bool
269
}
270
271
// Matches elements by attribute value.
272
func (t attrSelector) Match(n *html.Node) bool {
273
	switch t.operation {
274
	case "":
275
		return matchAttribute(n, t.key, func(string) bool { return true })
276
	case "=":
277
		return matchAttribute(n, t.key, func(s string) bool { return matchInsensitiveValue(s, t.val, t.insensitive) })
278
	case "!=":
279
		return attributeNotEqualMatch(t.key, t.val, n, t.insensitive)
280
	case "~=":
281
		// matches elements where the attribute named key is a whitespace-separated list that includes val.
282
		return matchAttribute(n, t.key, func(s string) bool { return matchInclude(t.val, s, t.insensitive) })
283
	case "|=":
284
		return attributeDashMatch(t.key, t.val, n, t.insensitive)
285
	case "^=":
286
		return attributePrefixMatch(t.key, t.val, n, t.insensitive)
287
	case "$=":
288
		return attributeSuffixMatch(t.key, t.val, n, t.insensitive)
289
	case "*=":
290
		return attributeSubstringMatch(t.key, t.val, n, t.insensitive)
291
	case "#=":
292
		return attributeRegexMatch(t.key, t.regexp, n)
293
	default:
294
		panic(fmt.Sprintf("unsuported operation : %s", t.operation))
295
	}
296
}
297
298
// matches elements where we ignore (or not) the case of the attribute value
299
// the user attribute is the value set by the user to match elements
300
// the real attribute is the attribute value found in the code parsed
301
func matchInsensitiveValue(userAttr string, realAttr string, ignoreCase bool) bool {
302
	if ignoreCase {
303
		return strings.EqualFold(userAttr, realAttr)
304
	}
305
	return userAttr == realAttr
306
307
}
308
309
// matches elements where the attribute named key satisifes the function f.
310
func matchAttribute(n *html.Node, key string, f func(string) bool) bool {
311
	if n.Type != html.ElementNode {
312
		return false
313
	}
314
	for _, a := range n.Attr {
315
		if a.Key == key && f(a.Val) {
316
			return true
317
		}
318
	}
319
	return false
320
}
321
322
// attributeNotEqualMatch matches elements where
323
// the attribute named key does not have the value val.
324
func attributeNotEqualMatch(key, val string, n *html.Node, ignoreCase bool) bool {
325
	if n.Type != html.ElementNode {
326
		return false
327
	}
328
	for _, a := range n.Attr {
329
		if a.Key == key && matchInsensitiveValue(a.Val, val, ignoreCase) {
330
			return false
331
		}
332
	}
333
	return true
334
}
335
336
// returns true if s is a whitespace-separated list that includes val.
337
func matchInclude(val string, s string, ignoreCase bool) bool {
338
	for s != "" {
339
		i := strings.IndexAny(s, " \t\r\n\f")
340
		if i == -1 {
341
			return matchInsensitiveValue(s, val, ignoreCase)
342
		}
343
		if matchInsensitiveValue(s[:i], val, ignoreCase) {
344
			return true
345
		}
346
		s = s[i+1:]
347
	}
348
	return false
349
}
350
351
//  matches elements where the attribute named key equals val or starts with val plus a hyphen.
352
func attributeDashMatch(key, val string, n *html.Node, ignoreCase bool) bool {
353
	return matchAttribute(n, key,
354
		func(s string) bool {
355
			if matchInsensitiveValue(s, val, ignoreCase) {
356
				return true
357
			}
358
			if len(s) <= len(val) {
359
				return false
360
			}
361
			if matchInsensitiveValue(s[:len(val)], val, ignoreCase) && s[len(val)] == '-' {
362
				return true
363
			}
364
			return false
365
		})
366
}
367
368
// attributePrefixMatch returns a Selector that matches elements where
369
// the attribute named key starts with val.
370
func attributePrefixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
371
	return matchAttribute(n, key,
372
		func(s string) bool {
373
			if strings.TrimSpace(s) == "" {
374
				return false
375
			}
376
			if ignoreCase {
377
				return strings.HasPrefix(strings.ToLower(s), strings.ToLower(val))
378
			}
379
			return strings.HasPrefix(s, val)
380
		})
381
}
382
383
// attributeSuffixMatch matches elements where
384
// the attribute named key ends with val.
385
func attributeSuffixMatch(key, val string, n *html.Node, ignoreCase bool) bool {
386
	return matchAttribute(n, key,
387
		func(s string) bool {
388
			if strings.TrimSpace(s) == "" {
389
				return false
390
			}
391
			if ignoreCase {
392
				return strings.HasSuffix(strings.ToLower(s), strings.ToLower(val))
393
			}
394
			return strings.HasSuffix(s, val)
395
		})
396
}
397
398
// attributeSubstringMatch matches nodes where
399
// the attribute named key contains val.
400
func attributeSubstringMatch(key, val string, n *html.Node, ignoreCase bool) bool {
401
	return matchAttribute(n, key,
402
		func(s string) bool {
403
			if strings.TrimSpace(s) == "" {
404
				return false
405
			}
406
			if ignoreCase {
407
				return strings.Contains(strings.ToLower(s), strings.ToLower(val))
408
			}
409
			return strings.Contains(s, val)
410
		})
411
}
412
413
// attributeRegexMatch  matches nodes where
414
// the attribute named key matches the regular expression rx
415
func attributeRegexMatch(key string, rx *regexp.Regexp, n *html.Node) bool {
416
	return matchAttribute(n, key,
417
		func(s string) bool {
418
			return rx.MatchString(s)
419
		})
420
}
421
422
func (c attrSelector) Specificity() Specificity {
423
	return Specificity{0, 1, 0}
424
}
425
426
func (c attrSelector) PseudoElement() string {
427
	return ""
428
}
429
430
// see pseudo_classes.go for pseudo classes selectors
431
432
// on a static context, some selectors can't match anything
433
type neverMatchSelector struct {
434
	value string
435
}
436
437
func (s neverMatchSelector) Match(n *html.Node) bool {
438
	return false
439
}
440
441
func (s neverMatchSelector) Specificity() Specificity {
442
	return Specificity{0, 0, 0}
443
}
444
445
func (c neverMatchSelector) PseudoElement() string {
446
	return ""
447
}
448
449
type compoundSelector struct {
450
	selectors     []Sel
451
	pseudoElement string
452
}
453
454
// Matches elements if each sub-selectors matches.
455
func (t compoundSelector) Match(n *html.Node) bool {
456
	if len(t.selectors) == 0 {
457
		return n.Type == html.ElementNode
458
	}
459
460
	for _, sel := range t.selectors {
461
		if !sel.Match(n) {
462
			return false
463
		}
464
	}
465
	return true
466
}
467
468
func (s compoundSelector) Specificity() Specificity {
469
	var out Specificity
470
	for _, sel := range s.selectors {
471
		out = out.Add(sel.Specificity())
472
	}
473
	if s.pseudoElement != "" {
474
		// https://drafts.csswg.org/selectors-3/#specificity
475
		out = out.Add(Specificity{0, 0, 1})
476
	}
477
	return out
478
}
479
480
func (c compoundSelector) PseudoElement() string {
481
	return c.pseudoElement
482
}
483
484
type combinedSelector struct {
485
	first      Sel
486
	combinator byte
487
	second     Sel
488
}
489
490
func (t combinedSelector) Match(n *html.Node) bool {
491
	if t.first == nil {
492
		return false // maybe we should panic
493
	}
494
	switch t.combinator {
495
	case 0:
496
		return t.first.Match(n)
497
	case ' ':
498
		return descendantMatch(t.first, t.second, n)
499
	case '>':
500
		return childMatch(t.first, t.second, n)
501
	case '+':
502
		return siblingMatch(t.first, t.second, true, n)
503
	case '~':
504
		return siblingMatch(t.first, t.second, false, n)
505
	default:
506
		panic("unknown combinator")
507
	}
508
}
509
510
// matches an element if it matches d and has an ancestor that matches a.
511
func descendantMatch(a, d Matcher, n *html.Node) bool {
512
	if !d.Match(n) {
513
		return false
514
	}
515
516
	for p := n.Parent; p != nil; p = p.Parent {
517
		if a.Match(p) {
518
			return true
519
		}
520
	}
521
522
	return false
523
}
524
525
// matches an element if it matches d and its parent matches a.
526
func childMatch(a, d Matcher, n *html.Node) bool {
527
	return d.Match(n) && n.Parent != nil && a.Match(n.Parent)
528
}
529
530
// matches an element if it matches s2 and is preceded by an element that matches s1.
531
// If adjacent is true, the sibling must be immediately before the element.
532
func siblingMatch(s1, s2 Matcher, adjacent bool, n *html.Node) bool {
533
	if !s2.Match(n) {
534
		return false
535
	}
536
537
	if adjacent {
538
		for n = n.PrevSibling; n != nil; n = n.PrevSibling {
539
			if n.Type == html.TextNode || n.Type == html.CommentNode {
540
				continue
541
			}
542
			return s1.Match(n)
543
		}
544
		return false
545
	}
546
547
	// Walk backwards looking for element that matches s1
548
	for c := n.PrevSibling; c != nil; c = c.PrevSibling {
549
		if s1.Match(c) {
550
			return true
551
		}
552
	}
553
554
	return false
555
}
556
557
func (s combinedSelector) Specificity() Specificity {
558
	spec := s.first.Specificity()
559
	if s.second != nil {
560
		spec = spec.Add(s.second.Specificity())
561
	}
562
	return spec
563
}
564
565
// on combinedSelector, a pseudo-element only makes sens on the last
566
// selector, although others increase specificity.
567
func (c combinedSelector) PseudoElement() string {
568
	if c.second == nil {
569
		return ""
570
	}
571
	return c.second.PseudoElement()
572
}
573
574
// A SelectorGroup is a list of selectors, which matches if any of the
575
// individual selectors matches.
576
type SelectorGroup []Sel
577
578
// Match returns true if the node matches one of the single selectors.
579
func (s SelectorGroup) Match(n *html.Node) bool {
580
	for _, sel := range s {
581
		if sel.Match(n) {
582
			return true
583
		}
584
	}
585
	return false
586
}