rss-tools/vendor/github.com/andybalholm/cascadia/pseudo_classes.go (view raw)
| 1 | package cascadia |
| 2 | |
| 3 | import ( |
| 4 | "bytes" |
| 5 | "fmt" |
| 6 | "regexp" |
| 7 | "strings" |
| 8 | |
| 9 | "golang.org/x/net/html" |
| 10 | "golang.org/x/net/html/atom" |
| 11 | ) |
| 12 | |
| 13 | // This file implements the pseudo classes selectors, |
| 14 | // which share the implementation of PseudoElement() and Specificity() |
| 15 | |
| 16 | type abstractPseudoClass struct{} |
| 17 | |
| 18 | func (s abstractPseudoClass) Specificity() Specificity { |
| 19 | return Specificity{0, 1, 0} |
| 20 | } |
| 21 | |
| 22 | func (c abstractPseudoClass) PseudoElement() string { |
| 23 | return "" |
| 24 | } |
| 25 | |
| 26 | type relativePseudoClassSelector struct { |
| 27 | name string // one of "not", "has", "haschild" |
| 28 | match SelectorGroup |
| 29 | } |
| 30 | |
| 31 | func (s relativePseudoClassSelector) Match(n *html.Node) bool { |
| 32 | if n.Type != html.ElementNode { |
| 33 | return false |
| 34 | } |
| 35 | switch s.name { |
| 36 | case "not": |
| 37 | // matches elements that do not match a. |
| 38 | return !s.match.Match(n) |
| 39 | case "has": |
| 40 | // matches elements with any descendant that matches a. |
| 41 | return hasDescendantMatch(n, s.match) |
| 42 | case "haschild": |
| 43 | // matches elements with a child that matches a. |
| 44 | return hasChildMatch(n, s.match) |
| 45 | default: |
| 46 | panic(fmt.Sprintf("unsupported relative pseudo class selector : %s", s.name)) |
| 47 | } |
| 48 | } |
| 49 | |
| 50 | // hasChildMatch returns whether n has any child that matches a. |
| 51 | func hasChildMatch(n *html.Node, a Matcher) bool { |
| 52 | for c := n.FirstChild; c != nil; c = c.NextSibling { |
| 53 | if a.Match(c) { |
| 54 | return true |
| 55 | } |
| 56 | } |
| 57 | return false |
| 58 | } |
| 59 | |
| 60 | // hasDescendantMatch performs a depth-first search of n's descendants, |
| 61 | // testing whether any of them match a. It returns true as soon as a match is |
| 62 | // found, or false if no match is found. |
| 63 | func hasDescendantMatch(n *html.Node, a Matcher) bool { |
| 64 | for c := n.FirstChild; c != nil; c = c.NextSibling { |
| 65 | if a.Match(c) || (c.Type == html.ElementNode && hasDescendantMatch(c, a)) { |
| 66 | return true |
| 67 | } |
| 68 | } |
| 69 | return false |
| 70 | } |
| 71 | |
| 72 | // Specificity returns the specificity of the most specific selectors |
| 73 | // in the pseudo-class arguments. |
| 74 | // See https://www.w3.org/TR/selectors/#specificity-rules |
| 75 | func (s relativePseudoClassSelector) Specificity() Specificity { |
| 76 | var max Specificity |
| 77 | for _, sel := range s.match { |
| 78 | newSpe := sel.Specificity() |
| 79 | if max.Less(newSpe) { |
| 80 | max = newSpe |
| 81 | } |
| 82 | } |
| 83 | return max |
| 84 | } |
| 85 | |
| 86 | func (c relativePseudoClassSelector) PseudoElement() string { |
| 87 | return "" |
| 88 | } |
| 89 | |
| 90 | type containsPseudoClassSelector struct { |
| 91 | abstractPseudoClass |
| 92 | value string |
| 93 | own bool |
| 94 | } |
| 95 | |
| 96 | func (s containsPseudoClassSelector) Match(n *html.Node) bool { |
| 97 | var text string |
| 98 | if s.own { |
| 99 | // matches nodes that directly contain the given text |
| 100 | text = strings.ToLower(nodeOwnText(n)) |
| 101 | } else { |
| 102 | // matches nodes that contain the given text. |
| 103 | text = strings.ToLower(nodeText(n)) |
| 104 | } |
| 105 | return strings.Contains(text, s.value) |
| 106 | } |
| 107 | |
| 108 | type regexpPseudoClassSelector struct { |
| 109 | abstractPseudoClass |
| 110 | regexp *regexp.Regexp |
| 111 | own bool |
| 112 | } |
| 113 | |
| 114 | func (s regexpPseudoClassSelector) Match(n *html.Node) bool { |
| 115 | var text string |
| 116 | if s.own { |
| 117 | // matches nodes whose text directly matches the specified regular expression |
| 118 | text = nodeOwnText(n) |
| 119 | } else { |
| 120 | // matches nodes whose text matches the specified regular expression |
| 121 | text = nodeText(n) |
| 122 | } |
| 123 | return s.regexp.MatchString(text) |
| 124 | } |
| 125 | |
| 126 | // writeNodeText writes the text contained in n and its descendants to b. |
| 127 | func writeNodeText(n *html.Node, b *bytes.Buffer) { |
| 128 | switch n.Type { |
| 129 | case html.TextNode: |
| 130 | b.WriteString(n.Data) |
| 131 | case html.ElementNode: |
| 132 | for c := n.FirstChild; c != nil; c = c.NextSibling { |
| 133 | writeNodeText(c, b) |
| 134 | } |
| 135 | } |
| 136 | } |
| 137 | |
| 138 | // nodeText returns the text contained in n and its descendants. |
| 139 | func nodeText(n *html.Node) string { |
| 140 | var b bytes.Buffer |
| 141 | writeNodeText(n, &b) |
| 142 | return b.String() |
| 143 | } |
| 144 | |
| 145 | // nodeOwnText returns the contents of the text nodes that are direct |
| 146 | // children of n. |
| 147 | func nodeOwnText(n *html.Node) string { |
| 148 | var b bytes.Buffer |
| 149 | for c := n.FirstChild; c != nil; c = c.NextSibling { |
| 150 | if c.Type == html.TextNode { |
| 151 | b.WriteString(c.Data) |
| 152 | } |
| 153 | } |
| 154 | return b.String() |
| 155 | } |
| 156 | |
| 157 | type nthPseudoClassSelector struct { |
| 158 | abstractPseudoClass |
| 159 | a, b int |
| 160 | last, ofType bool |
| 161 | } |
| 162 | |
| 163 | func (s nthPseudoClassSelector) Match(n *html.Node) bool { |
| 164 | if s.a == 0 { |
| 165 | if s.last { |
| 166 | return simpleNthLastChildMatch(s.b, s.ofType, n) |
| 167 | } else { |
| 168 | return simpleNthChildMatch(s.b, s.ofType, n) |
| 169 | } |
| 170 | } |
| 171 | return nthChildMatch(s.a, s.b, s.last, s.ofType, n) |
| 172 | } |
| 173 | |
| 174 | // nthChildMatch implements :nth-child(an+b). |
| 175 | // If last is true, implements :nth-last-child instead. |
| 176 | // If ofType is true, implements :nth-of-type instead. |
| 177 | func nthChildMatch(a, b int, last, ofType bool, n *html.Node) bool { |
| 178 | if n.Type != html.ElementNode { |
| 179 | return false |
| 180 | } |
| 181 | |
| 182 | parent := n.Parent |
| 183 | if parent == nil { |
| 184 | return false |
| 185 | } |
| 186 | |
| 187 | i := -1 |
| 188 | count := 0 |
| 189 | for c := parent.FirstChild; c != nil; c = c.NextSibling { |
| 190 | if (c.Type != html.ElementNode) || (ofType && c.Data != n.Data) { |
| 191 | continue |
| 192 | } |
| 193 | count++ |
| 194 | if c == n { |
| 195 | i = count |
| 196 | if !last { |
| 197 | break |
| 198 | } |
| 199 | } |
| 200 | } |
| 201 | |
| 202 | if i == -1 { |
| 203 | // This shouldn't happen, since n should always be one of its parent's children. |
| 204 | return false |
| 205 | } |
| 206 | |
| 207 | if last { |
| 208 | i = count - i + 1 |
| 209 | } |
| 210 | |
| 211 | i -= b |
| 212 | if a == 0 { |
| 213 | return i == 0 |
| 214 | } |
| 215 | |
| 216 | return i%a == 0 && i/a >= 0 |
| 217 | } |
| 218 | |
| 219 | // simpleNthChildMatch implements :nth-child(b). |
| 220 | // If ofType is true, implements :nth-of-type instead. |
| 221 | func simpleNthChildMatch(b int, ofType bool, n *html.Node) bool { |
| 222 | if n.Type != html.ElementNode { |
| 223 | return false |
| 224 | } |
| 225 | |
| 226 | parent := n.Parent |
| 227 | if parent == nil { |
| 228 | return false |
| 229 | } |
| 230 | |
| 231 | count := 0 |
| 232 | for c := parent.FirstChild; c != nil; c = c.NextSibling { |
| 233 | if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { |
| 234 | continue |
| 235 | } |
| 236 | count++ |
| 237 | if c == n { |
| 238 | return count == b |
| 239 | } |
| 240 | if count >= b { |
| 241 | return false |
| 242 | } |
| 243 | } |
| 244 | return false |
| 245 | } |
| 246 | |
| 247 | // simpleNthLastChildMatch implements :nth-last-child(b). |
| 248 | // If ofType is true, implements :nth-last-of-type instead. |
| 249 | func simpleNthLastChildMatch(b int, ofType bool, n *html.Node) bool { |
| 250 | if n.Type != html.ElementNode { |
| 251 | return false |
| 252 | } |
| 253 | |
| 254 | parent := n.Parent |
| 255 | if parent == nil { |
| 256 | return false |
| 257 | } |
| 258 | |
| 259 | count := 0 |
| 260 | for c := parent.LastChild; c != nil; c = c.PrevSibling { |
| 261 | if c.Type != html.ElementNode || (ofType && c.Data != n.Data) { |
| 262 | continue |
| 263 | } |
| 264 | count++ |
| 265 | if c == n { |
| 266 | return count == b |
| 267 | } |
| 268 | if count >= b { |
| 269 | return false |
| 270 | } |
| 271 | } |
| 272 | return false |
| 273 | } |
| 274 | |
| 275 | type onlyChildPseudoClassSelector struct { |
| 276 | abstractPseudoClass |
| 277 | ofType bool |
| 278 | } |
| 279 | |
| 280 | // Match implements :only-child. |
| 281 | // If `ofType` is true, it implements :only-of-type instead. |
| 282 | func (s onlyChildPseudoClassSelector) Match(n *html.Node) bool { |
| 283 | if n.Type != html.ElementNode { |
| 284 | return false |
| 285 | } |
| 286 | |
| 287 | parent := n.Parent |
| 288 | if parent == nil { |
| 289 | return false |
| 290 | } |
| 291 | |
| 292 | count := 0 |
| 293 | for c := parent.FirstChild; c != nil; c = c.NextSibling { |
| 294 | if (c.Type != html.ElementNode) || (s.ofType && c.Data != n.Data) { |
| 295 | continue |
| 296 | } |
| 297 | count++ |
| 298 | if count > 1 { |
| 299 | return false |
| 300 | } |
| 301 | } |
| 302 | |
| 303 | return count == 1 |
| 304 | } |
| 305 | |
| 306 | type inputPseudoClassSelector struct { |
| 307 | abstractPseudoClass |
| 308 | } |
| 309 | |
| 310 | // Matches input, select, textarea and button elements. |
| 311 | func (s inputPseudoClassSelector) Match(n *html.Node) bool { |
| 312 | return n.Type == html.ElementNode && (n.Data == "input" || n.Data == "select" || n.Data == "textarea" || n.Data == "button") |
| 313 | } |
| 314 | |
| 315 | type emptyElementPseudoClassSelector struct { |
| 316 | abstractPseudoClass |
| 317 | } |
| 318 | |
| 319 | // Matches empty elements. |
| 320 | func (s emptyElementPseudoClassSelector) Match(n *html.Node) bool { |
| 321 | if n.Type != html.ElementNode { |
| 322 | return false |
| 323 | } |
| 324 | |
| 325 | for c := n.FirstChild; c != nil; c = c.NextSibling { |
| 326 | switch c.Type { |
| 327 | case html.ElementNode: |
| 328 | return false |
| 329 | case html.TextNode: |
| 330 | if strings.TrimSpace(nodeText(c)) == "" { |
| 331 | continue |
| 332 | } else { |
| 333 | return false |
| 334 | } |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | return true |
| 339 | } |
| 340 | |
| 341 | type rootPseudoClassSelector struct { |
| 342 | abstractPseudoClass |
| 343 | } |
| 344 | |
| 345 | // Match implements :root |
| 346 | func (s rootPseudoClassSelector) Match(n *html.Node) bool { |
| 347 | if n.Type != html.ElementNode { |
| 348 | return false |
| 349 | } |
| 350 | if n.Parent == nil { |
| 351 | return false |
| 352 | } |
| 353 | return n.Parent.Type == html.DocumentNode |
| 354 | } |
| 355 | |
| 356 | func hasAttr(n *html.Node, attr string) bool { |
| 357 | return matchAttribute(n, attr, func(string) bool { return true }) |
| 358 | } |
| 359 | |
| 360 | type linkPseudoClassSelector struct { |
| 361 | abstractPseudoClass |
| 362 | } |
| 363 | |
| 364 | // Match implements :link |
| 365 | func (s linkPseudoClassSelector) Match(n *html.Node) bool { |
| 366 | return (n.DataAtom == atom.A || n.DataAtom == atom.Area || n.DataAtom == atom.Link) && hasAttr(n, "href") |
| 367 | } |
| 368 | |
| 369 | type langPseudoClassSelector struct { |
| 370 | abstractPseudoClass |
| 371 | lang string |
| 372 | } |
| 373 | |
| 374 | func (s langPseudoClassSelector) Match(n *html.Node) bool { |
| 375 | own := matchAttribute(n, "lang", func(val string) bool { |
| 376 | return val == s.lang || strings.HasPrefix(val, s.lang+"-") |
| 377 | }) |
| 378 | if n.Parent == nil { |
| 379 | return own |
| 380 | } |
| 381 | return own || s.Match(n.Parent) |
| 382 | } |
| 383 | |
| 384 | type enabledPseudoClassSelector struct { |
| 385 | abstractPseudoClass |
| 386 | } |
| 387 | |
| 388 | func (s enabledPseudoClassSelector) Match(n *html.Node) bool { |
| 389 | if n.Type != html.ElementNode { |
| 390 | return false |
| 391 | } |
| 392 | switch n.DataAtom { |
| 393 | case atom.A, atom.Area, atom.Link: |
| 394 | return hasAttr(n, "href") |
| 395 | case atom.Optgroup, atom.Menuitem, atom.Fieldset: |
| 396 | return !hasAttr(n, "disabled") |
| 397 | case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option: |
| 398 | return !hasAttr(n, "disabled") && !inDisabledFieldset(n) |
| 399 | } |
| 400 | return false |
| 401 | } |
| 402 | |
| 403 | type disabledPseudoClassSelector struct { |
| 404 | abstractPseudoClass |
| 405 | } |
| 406 | |
| 407 | func (s disabledPseudoClassSelector) Match(n *html.Node) bool { |
| 408 | if n.Type != html.ElementNode { |
| 409 | return false |
| 410 | } |
| 411 | switch n.DataAtom { |
| 412 | case atom.Optgroup, atom.Menuitem, atom.Fieldset: |
| 413 | return hasAttr(n, "disabled") |
| 414 | case atom.Button, atom.Input, atom.Select, atom.Textarea, atom.Option: |
| 415 | return hasAttr(n, "disabled") || inDisabledFieldset(n) |
| 416 | } |
| 417 | return false |
| 418 | } |
| 419 | |
| 420 | func hasLegendInPreviousSiblings(n *html.Node) bool { |
| 421 | for s := n.PrevSibling; s != nil; s = s.PrevSibling { |
| 422 | if s.DataAtom == atom.Legend { |
| 423 | return true |
| 424 | } |
| 425 | } |
| 426 | return false |
| 427 | } |
| 428 | |
| 429 | func inDisabledFieldset(n *html.Node) bool { |
| 430 | if n.Parent == nil { |
| 431 | return false |
| 432 | } |
| 433 | if n.Parent.DataAtom == atom.Fieldset && hasAttr(n.Parent, "disabled") && |
| 434 | (n.DataAtom != atom.Legend || hasLegendInPreviousSiblings(n)) { |
| 435 | return true |
| 436 | } |
| 437 | return inDisabledFieldset(n.Parent) |
| 438 | } |
| 439 | |
| 440 | type checkedPseudoClassSelector struct { |
| 441 | abstractPseudoClass |
| 442 | } |
| 443 | |
| 444 | func (s checkedPseudoClassSelector) Match(n *html.Node) bool { |
| 445 | if n.Type != html.ElementNode { |
| 446 | return false |
| 447 | } |
| 448 | switch n.DataAtom { |
| 449 | case atom.Input, atom.Menuitem: |
| 450 | return hasAttr(n, "checked") && matchAttribute(n, "type", func(val string) bool { |
| 451 | t := toLowerASCII(val) |
| 452 | return t == "checkbox" || t == "radio" |
| 453 | }) |
| 454 | case atom.Option: |
| 455 | return hasAttr(n, "selected") |
| 456 | } |
| 457 | return false |
| 458 | } |