all repos

rss-tools @ 8c0c68e023cfb8e513f5d9d072ab47ac61b5bb65

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/ztoe/ztoe.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
refactor atom feed builder, 14 days ago
1
// source for https://ztoe.com.ua/unhooking-search.php
2
package ztoe
3
4
import (
5
	"context"
6
	"errors"
7
	"fmt"
8
	"io"
9
	"net/http"
10
	"regexp"
11
	"strings"
12
	"time"
13
14
	"github.com/PuerkitoBio/goquery"
15
	"golang.org/x/net/html/charset"
16
17
	"olexsmir.xyz/rss-tools/app"
18
	"olexsmir.xyz/rss-tools/app/atom"
19
)
20
21
type ztoe struct {
22
	get func(ctx context.Context, url string) (*http.Response, error)
23
}
24
25
const sourceURL = "https://ztoe.com.ua/unhooking-search.php"
26
27
func Register(a *app.App) error {
28
	z := ztoe{get: a.Get}
29
	a.Route("GET /ztoe/{group}/{subgroup}", z.handler(sourceURL))
30
	a.Logger.Info("ztoe source registered")
31
	return nil
32
}
33
34
func (z *ztoe) handler(scheduleURL string) http.HandlerFunc {
35
	return func(w http.ResponseWriter, r *http.Request) {
36
		group := r.PathValue("group")
37
		subgroup := r.PathValue("subgroup")
38
39
		schedule, err := z.fetchSchedule(r.Context(), scheduleURL)
40
		if err != nil {
41
			http.Error(w, "failed to fetch schedule", http.StatusBadGateway)
42
			return
43
		}
44
45
		row, ok := schedule.Rows[group+"."+subgroup]
46
		if !ok {
47
			http.Error(w, "group/subgroup not found", http.StatusNotFound)
48
			return
49
		}
50
51
		slots := make([]slot, 0, len(schedule.TimeSlots))
52
		for i, t := range schedule.TimeSlots {
53
			slots = append(slots, slot{Range: t, Outage: i < len(row) && row[i]})
54
		}
55
56
		feed := atom.NewFeed(
57
			fmt.Sprintf("ZTOE power outages for %s.%s", group, subgroup),
58
			fmt.Sprintf("ztoe-%s-%s", group, subgroup))
59
60
		for _, interval := range buildOutageIntervals(slots) {
61
			feed.Add(&atom.Entry{
62
				Title:   fmt.Sprintf("Power outage %s-%s", interval.Start, interval.End),
63
				ID:      fmt.Sprintf("ztoe-%s-%s-%s-%s-%s", group, subgroup, schedule.Date, strings.ReplaceAll(interval.Start, ":", ""), strings.ReplaceAll(interval.End, ":", "")),
64
				Content: atom.NewText(fmt.Sprintf("Date: %s\nGroup: %s.%s\nTime: %s-%s", schedule.Date, group, subgroup, interval.Start, interval.End), ""),
65
				Updated: atom.Time(intervalTime(schedule.Date, interval.Start)),
66
			})
67
		}
68
		if err := feed.Render(w); err != nil {
69
			http.Error(w, "failed to render feed", http.StatusInternalServerError)
70
			return
71
		}
72
	}
73
}
74
75
var (
76
	timeSlotRe = regexp.MustCompile(`^\d{2}:\d{2}-\d{2}:\d{2}$`)
77
	dateRe     = regexp.MustCompile(`\d{2}\.\d{2}\.\d{4}`)
78
	subgroupRe = regexp.MustCompile(`^\d+\.\d+$`)
79
	bgColorRe  = regexp.MustCompile(`(?i)background(?:-color)?\s*:\s*([^;]+)`)
80
)
81
82
type slot struct {
83
	Range  string `json:"range"`
84
	Outage bool   `json:"outage"`
85
}
86
87
type outageInterval struct {
88
	Start string `json:"start"`
89
	End   string `json:"end"`
90
}
91
92
type parsedSchedule struct {
93
	Date      string
94
	TimeSlots []string
95
	Rows      map[string][]bool
96
}
97
98
func (z *ztoe) fetchSchedule(ctx context.Context, scheduleURL string) (*parsedSchedule, error) {
99
	res, err := z.get(ctx, scheduleURL)
100
	if err != nil {
101
		return nil, err
102
	}
103
	defer res.Body.Close()
104
105
	if res.StatusCode != http.StatusOK {
106
		return nil, fmt.Errorf("unexpected status: %s", res.Status)
107
	}
108
109
	return parseSchedule(res.Body, res.Header.Get("Content-Type"))
110
}
111
112
func parseSchedule(r io.Reader, contentType string) (*parsedSchedule, error) {
113
	if contentType == "" {
114
		contentType = "text/html"
115
	}
116
	decoded, err := charset.NewReader(r, contentType)
117
	if err != nil {
118
		return nil, err
119
	}
120
121
	doc, err := goquery.NewDocumentFromReader(decoded)
122
	if err != nil {
123
		return nil, err
124
	}
125
126
	table := findScheduleTable(doc)
127
	if table == nil {
128
		return nil, errors.New("failed to locate schedule table")
129
	}
130
131
	timeSlots := extractTimeSlots(table)
132
	if len(timeSlots) == 0 {
133
		return nil, errors.New("failed to parse schedule time slots")
134
	}
135
136
	rows := extractRows(table, len(timeSlots))
137
	if len(rows) == 0 {
138
		return nil, errors.New("failed to parse schedule rows")
139
	}
140
141
	return &parsedSchedule{
142
		Date:      extractDate(table),
143
		TimeSlots: timeSlots,
144
		Rows:      rows,
145
	}, nil
146
}
147
148
func findScheduleTable(doc *goquery.Document) *goquery.Selection {
149
	var found *goquery.Selection
150
	doc.Find("table").EachWithBreak(func(_ int, table *goquery.Selection) bool {
151
		if len(extractTimeSlots(table)) >= 48 {
152
			found = table
153
			return false
154
		}
155
		return true
156
	})
157
	return found
158
}
159
160
func extractDate(table *goquery.Selection) string {
161
	date := ""
162
	table.Find("td,th").EachWithBreak(func(_ int, cell *goquery.Selection) bool {
163
		match := dateRe.FindString(normalizeWhitespace(cell.Text()))
164
		if match == "" {
165
			return true
166
		}
167
		date = match
168
		return false
169
	})
170
	return date
171
}
172
173
func extractTimeSlots(table *goquery.Selection) []string {
174
	slots := make([]string, 0, 48)
175
	seen := make(map[string]struct{}, 48)
176
	table.Find("td,th").Each(func(_ int, cell *goquery.Selection) {
177
		text := normalizeWhitespace(cell.Text())
178
		if !timeSlotRe.MatchString(text) {
179
			return
180
		}
181
		if _, ok := seen[text]; ok {
182
			return
183
		}
184
		seen[text] = struct{}{}
185
		slots = append(slots, text)
186
	})
187
	return slots
188
}
189
190
func extractRows(table *goquery.Selection, slotCount int) map[string][]bool {
191
	rows := make(map[string][]bool)
192
	table.Find("tr").Each(func(_ int, tr *goquery.Selection) {
193
		tds := tr.ChildrenFiltered("td")
194
		if tds.Length() == 0 {
195
			return
196
		}
197
198
		subgroup := ""
199
		subgroupIdx := -1
200
		tds.EachWithBreak(func(i int, td *goquery.Selection) bool {
201
			text := normalizeWhitespace(td.Text())
202
			if !subgroupRe.MatchString(text) {
203
				return true
204
			}
205
			subgroup = text
206
			subgroupIdx = i
207
			return false
208
		})
209
		if subgroup == "" {
210
			return
211
		}
212
213
		slots := make([]bool, 0, slotCount)
214
		for i := subgroupIdx + 1; i < tds.Length() && len(slots) < slotCount; i++ {
215
			td := tds.Eq(i)
216
			style, ok := td.Attr("style")
217
			if !ok {
218
				continue
219
			}
220
			color, ok := extractBackgroundColor(style)
221
			if !ok {
222
				continue
223
			}
224
			slots = append(slots, isOutageColor(color))
225
		}
226
227
		if len(slots) == slotCount {
228
			rows[subgroup] = slots
229
		}
230
	})
231
	return rows
232
}
233
234
func extractBackgroundColor(style string) (string, bool) {
235
	match := bgColorRe.FindStringSubmatch(style)
236
	if len(match) < 2 {
237
		return "", false
238
	}
239
	color := strings.ToLower(strings.TrimSpace(match[1]))
240
	return strings.ReplaceAll(color, " ", ""), true
241
}
242
243
func isOutageColor(color string) bool {
244
	switch color {
245
	case "", "white", "#fff", "#ffffff", "rgb(255,255,255)", "rgba(255,255,255,1)":
246
		return false
247
	default:
248
		return !strings.Contains(color, "255,255,255")
249
	}
250
}
251
252
func buildOutageIntervals(slots []slot) []outageInterval {
253
	intervals := make([]outageInterval, 0)
254
	var current outageInterval
255
	active := false
256
257
	for _, slot := range slots {
258
		start, end, ok := strings.Cut(slot.Range, "-")
259
		if !ok {
260
			continue
261
		}
262
263
		if slot.Outage {
264
			if !active {
265
				current = outageInterval{Start: start, End: end}
266
				active = true
267
				continue
268
			}
269
			if current.End == start {
270
				current.End = end
271
				continue
272
			}
273
			intervals = append(intervals, current)
274
			current = outageInterval{Start: start, End: end}
275
			continue
276
		}
277
278
		if active {
279
			intervals = append(intervals, current)
280
			active = false
281
		}
282
	}
283
	if active {
284
		intervals = append(intervals, current)
285
	}
286
	return intervals
287
}
288
289
func normalizeWhitespace(s string) string {
290
	return strings.Join(strings.Fields(strings.TrimSpace(s)), " ")
291
}
292
293
func intervalTime(date, hhmm string) time.Time {
294
	day, err := time.ParseInLocation("02.01.2006", date, time.Local)
295
	if err != nil {
296
		return time.Now()
297
	}
298
	clock, err := time.Parse("15:04", hhmm)
299
	if err != nil {
300
		return day
301
	}
302
	return time.Date(
303
		day.Year(), day.Month(), day.Day(),
304
		clock.Hour(), clock.Minute(), 0, 0,
305
		day.Location(),
306
	)
307
}