all repos

rss-tools @ 63e22ef23f74cc61b571d71b3c938c26bbdc105f

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/ztoe/ztoe.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
add moviefeed source, 1 month ago
1
// source for https://ztoe.com.ua/unhooking-search.php
2
package ztoe
3
4
import (
5
	"context"
6
	"errors"
7
	"fmt"
8
	"io"
9
	"net/http"
10
	"regexp"
11
	"strings"
12
	"time"
13
14
	"github.com/PuerkitoBio/goquery"
15
	"golang.org/x/net/html/charset"
16
17
	"olexsmir.xyz/rss-tools/app"
18
)
19
20
type ztoe struct {
21
	get func(ctx context.Context, url string) (*http.Response, error)
22
}
23
24
const sourceURL = "https://ztoe.com.ua/unhooking-search.php"
25
26
func Register(a *app.App) error {
27
	z := ztoe{get: a.Get}
28
	a.Route("GET /ztoe/{group}/{subgroup}", z.handler(sourceURL))
29
	a.Logger.Info("ztoe source registered")
30
	return nil
31
}
32
33
func (z *ztoe) handler(scheduleURL string) http.HandlerFunc {
34
	return func(w http.ResponseWriter, r *http.Request) {
35
		group := r.PathValue("group")
36
		subgroup := r.PathValue("subgroup")
37
38
		schedule, err := z.fetchSchedule(r.Context(), scheduleURL)
39
		if err != nil {
40
			http.Error(w, "failed to fetch schedule", http.StatusBadGateway)
41
			return
42
		}
43
44
		row, ok := schedule.Rows[group+"."+subgroup]
45
		if !ok {
46
			http.Error(w, "group/subgroup not found", http.StatusNotFound)
47
			return
48
		}
49
50
		slots := make([]slot, 0, len(schedule.TimeSlots))
51
		for i, t := range schedule.TimeSlots {
52
			slots = append(slots, slot{Range: t, Outage: i < len(row) && row[i]})
53
		}
54
55
		feed := app.NewFeed(
56
			fmt.Sprintf("ZTOE power outages for %s.%s", group, subgroup),
57
			fmt.Sprintf("ztoe-%s-%s", group, subgroup))
58
59
		for _, interval := range buildOutageIntervals(slots) {
60
			feed.Add(app.FeedEntry{
61
				Title:   fmt.Sprintf("Power outage %s-%s", interval.Start, interval.End),
62
				ID:      fmt.Sprintf("ztoe-%s-%s-%s-%s-%s", group, subgroup, schedule.Date, strings.ReplaceAll(interval.Start, ":", ""), strings.ReplaceAll(interval.End, ":", "")),
63
				Content: fmt.Sprintf("Date: %s\nGroup: %s.%s\nTime: %s-%s", schedule.Date, group, subgroup, interval.Start, interval.End),
64
				Updated: intervalTime(schedule.Date, interval.Start),
65
			})
66
		}
67
		if err := feed.Render(w); err != nil {
68
			http.Error(w, "failed to render feed", http.StatusInternalServerError)
69
			return
70
		}
71
	}
72
}
73
74
var (
75
	timeSlotRe = regexp.MustCompile(`^\d{2}:\d{2}-\d{2}:\d{2}$`)
76
	dateRe     = regexp.MustCompile(`\d{2}\.\d{2}\.\d{4}`)
77
	subgroupRe = regexp.MustCompile(`^\d+\.\d+$`)
78
	bgColorRe  = regexp.MustCompile(`(?i)background(?:-color)?\s*:\s*([^;]+)`)
79
)
80
81
type slot struct {
82
	Range  string `json:"range"`
83
	Outage bool   `json:"outage"`
84
}
85
86
type outageInterval struct {
87
	Start string `json:"start"`
88
	End   string `json:"end"`
89
}
90
91
type parsedSchedule struct {
92
	Date      string
93
	TimeSlots []string
94
	Rows      map[string][]bool
95
}
96
97
func (z *ztoe) fetchSchedule(ctx context.Context, scheduleURL string) (*parsedSchedule, error) {
98
	res, err := z.get(ctx, scheduleURL)
99
	if err != nil {
100
		return nil, err
101
	}
102
	defer res.Body.Close()
103
104
	if res.StatusCode != http.StatusOK {
105
		return nil, fmt.Errorf("unexpected status: %s", res.Status)
106
	}
107
108
	return parseSchedule(res.Body, res.Header.Get("Content-Type"))
109
}
110
111
func parseSchedule(r io.Reader, contentType string) (*parsedSchedule, error) {
112
	if contentType == "" {
113
		contentType = "text/html"
114
	}
115
	decoded, err := charset.NewReader(r, contentType)
116
	if err != nil {
117
		return nil, err
118
	}
119
120
	doc, err := goquery.NewDocumentFromReader(decoded)
121
	if err != nil {
122
		return nil, err
123
	}
124
125
	table := findScheduleTable(doc)
126
	if table == nil {
127
		return nil, errors.New("failed to locate schedule table")
128
	}
129
130
	timeSlots := extractTimeSlots(table)
131
	if len(timeSlots) == 0 {
132
		return nil, errors.New("failed to parse schedule time slots")
133
	}
134
135
	rows := extractRows(table, len(timeSlots))
136
	if len(rows) == 0 {
137
		return nil, errors.New("failed to parse schedule rows")
138
	}
139
140
	return &parsedSchedule{
141
		Date:      extractDate(table),
142
		TimeSlots: timeSlots,
143
		Rows:      rows,
144
	}, nil
145
}
146
147
func findScheduleTable(doc *goquery.Document) *goquery.Selection {
148
	var found *goquery.Selection
149
	doc.Find("table").EachWithBreak(func(_ int, table *goquery.Selection) bool {
150
		if len(extractTimeSlots(table)) >= 48 {
151
			found = table
152
			return false
153
		}
154
		return true
155
	})
156
	return found
157
}
158
159
func extractDate(table *goquery.Selection) string {
160
	date := ""
161
	table.Find("td,th").EachWithBreak(func(_ int, cell *goquery.Selection) bool {
162
		match := dateRe.FindString(normalizeWhitespace(cell.Text()))
163
		if match == "" {
164
			return true
165
		}
166
		date = match
167
		return false
168
	})
169
	return date
170
}
171
172
func extractTimeSlots(table *goquery.Selection) []string {
173
	slots := make([]string, 0, 48)
174
	seen := make(map[string]struct{}, 48)
175
	table.Find("td,th").Each(func(_ int, cell *goquery.Selection) {
176
		text := normalizeWhitespace(cell.Text())
177
		if !timeSlotRe.MatchString(text) {
178
			return
179
		}
180
		if _, ok := seen[text]; ok {
181
			return
182
		}
183
		seen[text] = struct{}{}
184
		slots = append(slots, text)
185
	})
186
	return slots
187
}
188
189
func extractRows(table *goquery.Selection, slotCount int) map[string][]bool {
190
	rows := make(map[string][]bool)
191
	table.Find("tr").Each(func(_ int, tr *goquery.Selection) {
192
		tds := tr.ChildrenFiltered("td")
193
		if tds.Length() == 0 {
194
			return
195
		}
196
197
		subgroup := ""
198
		subgroupIdx := -1
199
		tds.EachWithBreak(func(i int, td *goquery.Selection) bool {
200
			text := normalizeWhitespace(td.Text())
201
			if !subgroupRe.MatchString(text) {
202
				return true
203
			}
204
			subgroup = text
205
			subgroupIdx = i
206
			return false
207
		})
208
		if subgroup == "" {
209
			return
210
		}
211
212
		slots := make([]bool, 0, slotCount)
213
		for i := subgroupIdx + 1; i < tds.Length() && len(slots) < slotCount; i++ {
214
			td := tds.Eq(i)
215
			style, ok := td.Attr("style")
216
			if !ok {
217
				continue
218
			}
219
			color, ok := extractBackgroundColor(style)
220
			if !ok {
221
				continue
222
			}
223
			slots = append(slots, isOutageColor(color))
224
		}
225
226
		if len(slots) == slotCount {
227
			rows[subgroup] = slots
228
		}
229
	})
230
	return rows
231
}
232
233
func extractBackgroundColor(style string) (string, bool) {
234
	match := bgColorRe.FindStringSubmatch(style)
235
	if len(match) < 2 {
236
		return "", false
237
	}
238
	color := strings.ToLower(strings.TrimSpace(match[1]))
239
	return strings.ReplaceAll(color, " ", ""), true
240
}
241
242
func isOutageColor(color string) bool {
243
	switch color {
244
	case "", "white", "#fff", "#ffffff", "rgb(255,255,255)", "rgba(255,255,255,1)":
245
		return false
246
	default:
247
		return !strings.Contains(color, "255,255,255")
248
	}
249
}
250
251
func buildOutageIntervals(slots []slot) []outageInterval {
252
	intervals := make([]outageInterval, 0)
253
	var current outageInterval
254
	active := false
255
256
	for _, slot := range slots {
257
		start, end, ok := strings.Cut(slot.Range, "-")
258
		if !ok {
259
			continue
260
		}
261
262
		if slot.Outage {
263
			if !active {
264
				current = outageInterval{Start: start, End: end}
265
				active = true
266
				continue
267
			}
268
			if current.End == start {
269
				current.End = end
270
				continue
271
			}
272
			intervals = append(intervals, current)
273
			current = outageInterval{Start: start, End: end}
274
			continue
275
		}
276
277
		if active {
278
			intervals = append(intervals, current)
279
			active = false
280
		}
281
	}
282
	if active {
283
		intervals = append(intervals, current)
284
	}
285
	return intervals
286
}
287
288
func normalizeWhitespace(s string) string {
289
	return strings.Join(strings.Fields(strings.TrimSpace(s)), " ")
290
}
291
292
func intervalTime(date, hhmm string) time.Time {
293
	day, err := time.ParseInLocation("02.01.2006", date, time.Local)
294
	if err != nil {
295
		return time.Now()
296
	}
297
	clock, err := time.Parse("15:04", hhmm)
298
	if err != nil {
299
		return day
300
	}
301
	return time.Date(
302
		day.Year(), day.Month(), day.Day(),
303
		clock.Hour(), clock.Minute(), 0, 0,
304
		day.Location(),
305
	)
306
}