all repos

rss-tools @ 50b546d

get rss feed from sources that(i need and) dont provide one

rss-tools/app/atom.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
fix moviefeed, 29 days ago
1
package app
2
3
import (
4
	"bytes"
5
	"crypto/sha1"
6
	"encoding/xml"
7
	"fmt"
8
	"io"
9
	"net/http"
10
	"strings"
11
	"time"
12
)
13
14
const (
15
	atomNamespace  = "http://www.w3.org/2005/Atom"
16
	xhtmlNamespace = "http://www.w3.org/1999/xhtml"
17
	defaultAuthor  = "rss-tools"
18
)
19
20
type AtomFeed struct {
21
	XMLName  xml.Name     `xml:"feed"`
22
	XMLNS    string       `xml:"xmlns,attr"`
23
	Title    string       `xml:"title"`
24
	ID       string       `xml:"id"`
25
	Updated  string       `xml:"updated"`
26
	Authors  []AtomPerson `xml:"author,omitempty"`
27
	Subtitle string       `xml:"subtitle,omitempty"`
28
	Entries  []AtomEntry  `xml:"entry"`
29
}
30
31
type AtomEntry struct {
32
	Title   string      `xml:"title"`
33
	ID      string      `xml:"id"`
34
	Updated string      `xml:"updated"`
35
	Links   []AtomLink  `xml:"link,omitempty"`
36
	Content AtomContent `xml:"content"`
37
}
38
39
type AtomContent struct {
40
	XMLName xml.Name `xml:"content"`
41
	Type    string   `xml:"type,attr,omitempty"`
42
	Value   string   `xml:",chardata"`
43
}
44
45
func (c AtomContent) MarshalXML(e *xml.Encoder, start xml.StartElement) error {
46
	contentType := c.Type
47
	if contentType == "" {
48
		contentType = "text"
49
	}
50
51
	start.Name = xml.Name{Local: "content"}
52
	start.Attr = append(start.Attr, xml.Attr{
53
		Name:  xml.Name{Local: "type"},
54
		Value: contentType,
55
	})
56
57
	if err := e.EncodeToken(start); err != nil {
58
		return err
59
	}
60
61
	if contentType == "xhtml" {
62
		if err := validateXHTMLFragment(c.Value); err != nil {
63
			return err
64
		}
65
66
		if err := e.Encode(xhtmlDiv{
67
			XMLNS: xhtmlNamespace,
68
			Inner: c.Value,
69
		}); err != nil {
70
			return err
71
		}
72
	} else {
73
		if err := e.EncodeToken(xml.CharData([]byte(c.Value))); err != nil {
74
			return err
75
		}
76
	}
77
78
	if err := e.EncodeToken(start.End()); err != nil {
79
		return err
80
	}
81
	return e.Flush()
82
}
83
84
type xhtmlDiv struct {
85
	XMLName xml.Name `xml:"div"`
86
	XMLNS   string   `xml:"xmlns,attr"`
87
	Inner   string   `xml:",innerxml"`
88
}
89
90
func validateXHTMLFragment(fragment string) error {
91
	wrapped := fmt.Sprintf(`<div xmlns="%s">%s</div>`, xhtmlNamespace, fragment)
92
	dec := xml.NewDecoder(strings.NewReader(wrapped))
93
	for {
94
		_, err := dec.Token()
95
		if err == io.EOF {
96
			return nil
97
		}
98
		if err != nil {
99
			return fmt.Errorf("invalid xhtml content: %w", err)
100
		}
101
	}
102
}
103
104
type AtomPerson struct {
105
	Name string `xml:"name"`
106
}
107
108
type AtomLink struct {
109
	Rel    string `xml:"rel,attr,omitempty"`
110
	Type   string `xml:"type,attr,omitempty"`
111
	Length string `xml:"length,attr,omitempty"`
112
	Href   string `xml:"href,attr"`
113
}
114
115
type FeedEntry struct {
116
	Title       string
117
	ID          string
118
	Links       []FeedLink
119
	Content     string
120
	ContentType string // "text", "html", or "xhtml"; defaults to "text"
121
	Updated     time.Time
122
}
123
124
type FeedLink struct {
125
	Rel    string
126
	Type   string
127
	Length string
128
	Href   string
129
}
130
131
type FeedBuilder struct{ f AtomFeed }
132
133
func NewFeed(title, id string) *FeedBuilder {
134
	return &FeedBuilder{f: AtomFeed{
135
		XMLNS:   atomNamespace,
136
		Title:   title,
137
		ID:      id,
138
		Updated: time.Now().Format(time.RFC3339),
139
		Authors: []AtomPerson{{Name: defaultAuthor}},
140
	}}
141
}
142
143
func (f *FeedBuilder) WithSubtitle(subtitle string) *FeedBuilder {
144
	f.f.Subtitle = subtitle
145
	return f
146
}
147
148
func (f *FeedBuilder) WithAuthor(name string) *FeedBuilder {
149
	name = strings.TrimSpace(name)
150
	if name == "" {
151
		return f
152
	}
153
	f.f.Authors = []AtomPerson{{Name: name}}
154
	return f
155
}
156
157
func (f *FeedBuilder) WithUpdated(updated time.Time) *FeedBuilder {
158
	if !updated.IsZero() {
159
		f.f.Updated = updated.Format(time.RFC3339)
160
	}
161
	return f
162
}
163
164
func (f *FeedBuilder) Add(entry FeedEntry) *FeedBuilder {
165
	if entry.Updated.IsZero() {
166
		entry.Updated = time.Now()
167
	}
168
	if entry.ID == "" {
169
		hash := sha1.Sum(fmt.Appendf(nil, "%s|%s|%s", entry.Title, entry.Content, entry.Updated.Format(time.RFC3339Nano)))
170
		entry.ID = fmt.Sprintf("urn:sha1:%x", hash)
171
	}
172
173
	contentType := entry.ContentType
174
	if contentType == "" {
175
		contentType = "text"
176
	}
177
178
	links := make([]AtomLink, 0, len(entry.Links))
179
	for _, link := range entry.Links {
180
		if link.Href == "" {
181
			continue
182
		}
183
		links = append(links, AtomLink(link))
184
	}
185
186
	f.f.Entries = append(f.f.Entries, AtomEntry{
187
		Title:   entry.Title,
188
		ID:      entry.ID,
189
		Updated: entry.Updated.Format(time.RFC3339),
190
		Links:   links,
191
		Content: AtomContent{
192
			Type:  contentType,
193
			Value: entry.Content,
194
		},
195
	})
196
197
	feedUpdated, err := time.Parse(time.RFC3339, f.f.Updated)
198
	if err != nil || entry.Updated.After(feedUpdated) {
199
		f.f.Updated = entry.Updated.Format(time.RFC3339)
200
	}
201
	return f
202
}
203
204
func (f *FeedBuilder) SetUpdated(updated time.Time) *FeedBuilder {
205
	if !updated.IsZero() {
206
		f.f.Updated = updated.Format(time.RFC3339)
207
	}
208
	return f
209
}
210
211
func (f *FeedBuilder) WriteTo(w io.Writer) error {
212
	enc := xml.NewEncoder(w)
213
	enc.Indent("", "  ")
214
	return enc.Encode(f.f)
215
}
216
217
func (f *FeedBuilder) Bytes() ([]byte, error) {
218
	var buf bytes.Buffer
219
	if err := f.WriteTo(&buf); err != nil {
220
		return nil, err
221
	}
222
	return buf.Bytes(), nil
223
}
224
225
func (f *FeedBuilder) Render(w http.ResponseWriter) error {
226
	w.Header().Set("Content-Type", "application/atom+xml; charset=utf-8")
227
	return f.WriteTo(w)
228
}