all repos

rss-tools @ a5ac52722b131734c74504b6e6f4d9900536cac7

get rss feed from sources that(i need and) dont provide one

rss-tools/vendor/golang.org/x/net/html/render.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
we're vendoring now, 7 days ago
1
// Copyright 2011 The Go Authors. All rights reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
package html
6
7
import (
8
	"bufio"
9
	"errors"
10
	"fmt"
11
	"io"
12
	"strings"
13
)
14
15
type writer interface {
16
	io.Writer
17
	io.ByteWriter
18
	WriteString(string) (int, error)
19
}
20
21
// Render renders the parse tree n to the given writer.
22
//
23
// Rendering is done on a 'best effort' basis: calling Parse on the output of
24
// Render will always result in something similar to the original tree, but it
25
// is not necessarily an exact clone unless the original tree was 'well-formed'.
26
// 'Well-formed' is not easily specified; the HTML5 specification is
27
// complicated.
28
//
29
// Calling Parse on arbitrary input typically results in a 'well-formed' parse
30
// tree. However, it is possible for Parse to yield a 'badly-formed' parse tree.
31
// For example, in a 'well-formed' parse tree, no <a> element is a child of
32
// another <a> element: parsing "<a><a>" results in two sibling elements.
33
// Similarly, in a 'well-formed' parse tree, no <a> element is a child of a
34
// <table> element: parsing "<p><table><a>" results in a <p> with two sibling
35
// children; the <a> is reparented to the <table>'s parent. However, calling
36
// Parse on "<a><table><a>" does not return an error, but the result has an <a>
37
// element with an <a> child, and is therefore not 'well-formed'.
38
//
39
// Programmatically constructed trees are typically also 'well-formed', but it
40
// is possible to construct a tree that looks innocuous but, when rendered and
41
// re-parsed, results in a different tree. A simple example is that a solitary
42
// text node would become a tree containing <html>, <head> and <body> elements.
43
// Another example is that the programmatic equivalent of "a<head>b</head>c"
44
// becomes "<html><head><head/><body>abc</body></html>".
45
func Render(w io.Writer, n *Node) error {
46
	if x, ok := w.(writer); ok {
47
		return render(x, n)
48
	}
49
	buf := bufio.NewWriter(w)
50
	if err := render(buf, n); err != nil {
51
		return err
52
	}
53
	return buf.Flush()
54
}
55
56
// plaintextAbort is returned from render1 when a <plaintext> element
57
// has been rendered. No more end tags should be rendered after that.
58
var plaintextAbort = errors.New("html: internal error (plaintext abort)")
59
60
func render(w writer, n *Node) error {
61
	err := render1(w, n)
62
	if err == plaintextAbort {
63
		err = nil
64
	}
65
	return err
66
}
67
68
func render1(w writer, n *Node) error {
69
	// Render non-element nodes; these are the easy cases.
70
	switch n.Type {
71
	case ErrorNode:
72
		return errors.New("html: cannot render an ErrorNode node")
73
	case TextNode:
74
		return escape(w, n.Data)
75
	case DocumentNode:
76
		for c := n.FirstChild; c != nil; c = c.NextSibling {
77
			if err := render1(w, c); err != nil {
78
				return err
79
			}
80
		}
81
		return nil
82
	case ElementNode:
83
		// No-op.
84
	case CommentNode:
85
		if _, err := w.WriteString("<!--"); err != nil {
86
			return err
87
		}
88
		if err := escapeComment(w, n.Data); err != nil {
89
			return err
90
		}
91
		if _, err := w.WriteString("-->"); err != nil {
92
			return err
93
		}
94
		return nil
95
	case DoctypeNode:
96
		if _, err := w.WriteString("<!DOCTYPE "); err != nil {
97
			return err
98
		}
99
		if err := escape(w, n.Data); err != nil {
100
			return err
101
		}
102
		if n.Attr != nil {
103
			var p, s string
104
			for _, a := range n.Attr {
105
				switch a.Key {
106
				case "public":
107
					p = a.Val
108
				case "system":
109
					s = a.Val
110
				}
111
			}
112
			if p != "" {
113
				if _, err := w.WriteString(" PUBLIC "); err != nil {
114
					return err
115
				}
116
				if err := writeQuoted(w, p); err != nil {
117
					return err
118
				}
119
				if s != "" {
120
					if err := w.WriteByte(' '); err != nil {
121
						return err
122
					}
123
					if err := writeQuoted(w, s); err != nil {
124
						return err
125
					}
126
				}
127
			} else if s != "" {
128
				if _, err := w.WriteString(" SYSTEM "); err != nil {
129
					return err
130
				}
131
				if err := writeQuoted(w, s); err != nil {
132
					return err
133
				}
134
			}
135
		}
136
		return w.WriteByte('>')
137
	case RawNode:
138
		_, err := w.WriteString(n.Data)
139
		return err
140
	default:
141
		return errors.New("html: unknown node type")
142
	}
143
144
	// Render the <xxx> opening tag.
145
	if err := w.WriteByte('<'); err != nil {
146
		return err
147
	}
148
	if _, err := w.WriteString(n.Data); err != nil {
149
		return err
150
	}
151
	for _, a := range n.Attr {
152
		if err := w.WriteByte(' '); err != nil {
153
			return err
154
		}
155
		if a.Namespace != "" {
156
			if _, err := w.WriteString(a.Namespace); err != nil {
157
				return err
158
			}
159
			if err := w.WriteByte(':'); err != nil {
160
				return err
161
			}
162
		}
163
		if _, err := w.WriteString(a.Key); err != nil {
164
			return err
165
		}
166
		if _, err := w.WriteString(`="`); err != nil {
167
			return err
168
		}
169
		if err := escape(w, a.Val); err != nil {
170
			return err
171
		}
172
		if err := w.WriteByte('"'); err != nil {
173
			return err
174
		}
175
	}
176
	if voidElements[n.Data] {
177
		if n.FirstChild != nil {
178
			return fmt.Errorf("html: void element <%s> has child nodes", n.Data)
179
		}
180
		_, err := w.WriteString("/>")
181
		return err
182
	}
183
	if err := w.WriteByte('>'); err != nil {
184
		return err
185
	}
186
187
	// Add initial newline where there is danger of a newline being ignored.
188
	if c := n.FirstChild; c != nil && c.Type == TextNode && strings.HasPrefix(c.Data, "\n") {
189
		switch n.Data {
190
		case "pre", "listing", "textarea":
191
			if err := w.WriteByte('\n'); err != nil {
192
				return err
193
			}
194
		}
195
	}
196
197
	// Render any child nodes
198
	if childTextNodesAreLiteral(n) {
199
		for c := n.FirstChild; c != nil; c = c.NextSibling {
200
			if c.Type == TextNode {
201
				if _, err := w.WriteString(c.Data); err != nil {
202
					return err
203
				}
204
			} else {
205
				if err := render1(w, c); err != nil {
206
					return err
207
				}
208
			}
209
		}
210
		if n.Data == "plaintext" {
211
			// Don't render anything else. <plaintext> must be the
212
			// last element in the file, with no closing tag.
213
			return plaintextAbort
214
		}
215
	} else {
216
		for c := n.FirstChild; c != nil; c = c.NextSibling {
217
			if err := render1(w, c); err != nil {
218
				return err
219
			}
220
		}
221
	}
222
223
	// Render the </xxx> closing tag.
224
	if _, err := w.WriteString("</"); err != nil {
225
		return err
226
	}
227
	if _, err := w.WriteString(n.Data); err != nil {
228
		return err
229
	}
230
	return w.WriteByte('>')
231
}
232
233
func childTextNodesAreLiteral(n *Node) bool {
234
	// Per WHATWG HTML 13.3, if the parent of the current node is a style,
235
	// script, xmp, iframe, noembed, noframes, or plaintext element, and the
236
	// current node is a text node, append the value of the node's data
237
	// literally. The specification is not explicit about it, but we only
238
	// enforce this if we are in the HTML namespace (i.e. when the namespace is
239
	// "").
240
	// NOTE: we also always include noscript elements, although the
241
	// specification states that they should only be rendered as such if
242
	// scripting is enabled for the node (which is not something we track).
243
	if n.Namespace != "" {
244
		return false
245
	}
246
	switch n.Data {
247
	case "iframe", "noembed", "noframes", "noscript", "plaintext", "script", "style", "xmp":
248
		return true
249
	default:
250
		return false
251
	}
252
}
253
254
// writeQuoted writes s to w surrounded by quotes. Normally it will use double
255
// quotes, but if s contains a double quote, it will use single quotes.
256
// It is used for writing the identifiers in a doctype declaration.
257
// In valid HTML, they can't contain both types of quotes.
258
func writeQuoted(w writer, s string) error {
259
	var q byte = '"'
260
	if strings.Contains(s, `"`) {
261
		q = '\''
262
	}
263
	if err := w.WriteByte(q); err != nil {
264
		return err
265
	}
266
	if _, err := w.WriteString(s); err != nil {
267
		return err
268
	}
269
	if err := w.WriteByte(q); err != nil {
270
		return err
271
	}
272
	return nil
273
}
274
275
// Section 12.1.2, "Elements", gives this list of void elements. Void elements
276
// are those that can't have any contents.
277
var voidElements = map[string]bool{
278
	"area":   true,
279
	"base":   true,
280
	"br":     true,
281
	"col":    true,
282
	"embed":  true,
283
	"hr":     true,
284
	"img":    true,
285
	"input":  true,
286
	"keygen": true, // "keygen" has been removed from the spec, but are kept here for backwards compatibility.
287
	"link":   true,
288
	"meta":   true,
289
	"param":  true,
290
	"source": true,
291
	"track":  true,
292
	"wbr":    true,
293
}