all repos

rss-tools @ c37eeed

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/ztoe/ztoe.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
add souce for ztoe, 1 month ago
1
// source for https://ztoe.com.ua/unhooking-search.php
2
package ztoe
3
4
import (
5
	"context"
6
	"errors"
7
	"fmt"
8
	"io"
9
	"net/http"
10
	"regexp"
11
	"strings"
12
	"time"
13
14
	"github.com/PuerkitoBio/goquery"
15
	"golang.org/x/net/html/charset"
16
17
	"olexsmir.xyz/rss-tools/app"
18
)
19
20
type ztoe struct {
21
	get func(ctx context.Context, url string) (*http.Response, error)
22
}
23
24
const sourceURL = "https://ztoe.com.ua/unhooking-search.php"
25
26
func Register(a *app.App) error {
27
	z := ztoe{get: a.Get}
28
	a.Route("GET /ztoe/{group}/{subgroup}", z.handler(sourceURL))
29
	return nil
30
}
31
32
func (z *ztoe) handler(scheduleURL string) http.HandlerFunc {
33
	return func(w http.ResponseWriter, r *http.Request) {
34
		group := r.PathValue("group")
35
		subgroup := r.PathValue("subgroup")
36
37
		schedule, err := z.fetchSchedule(r.Context(), scheduleURL)
38
		if err != nil {
39
			http.Error(w, "failed to fetch schedule", http.StatusBadGateway)
40
			return
41
		}
42
43
		row, ok := schedule.Rows[group+"."+subgroup]
44
		if !ok {
45
			http.Error(w, "group/subgroup not found", http.StatusNotFound)
46
			return
47
		}
48
49
		slots := make([]slot, 0, len(schedule.TimeSlots))
50
		for i, t := range schedule.TimeSlots {
51
			slots = append(slots, slot{Range: t, Outage: i < len(row) && row[i]})
52
		}
53
54
		feed := app.NewFeed(
55
			fmt.Sprintf("ZTOE power outages for %s.%s", group, subgroup),
56
			fmt.Sprintf("ztoe-%s-%s", group, subgroup))
57
58
		for _, interval := range buildOutageIntervals(slots) {
59
			feed.Add(
60
				fmt.Sprintf("Power outage %s-%s", interval.Start, interval.End),
61
				fmt.Sprintf("ztoe-%s-%s-%s-%s-%s",
62
					group, subgroup, schedule.Date,
63
					strings.ReplaceAll(interval.Start, ":", ""),
64
					strings.ReplaceAll(interval.End, ":", ""),
65
				),
66
				fmt.Sprintf("Date: %s\nGroup: %s.%s\nTime: %s-%s",
67
					schedule.Date, group, subgroup, interval.Start, interval.End),
68
				intervalTime(schedule.Date, interval.Start),
69
			)
70
		}
71
		if err := feed.Render(w); err != nil {
72
			http.Error(w, "failed to render feed", http.StatusInternalServerError)
73
			return
74
		}
75
	}
76
}
77
78
var (
79
	timeSlotRe = regexp.MustCompile(`^\d{2}:\d{2}-\d{2}:\d{2}$`)
80
	dateRe     = regexp.MustCompile(`\d{2}\.\d{2}\.\d{4}`)
81
	subgroupRe = regexp.MustCompile(`^\d+\.\d+$`)
82
	bgColorRe  = regexp.MustCompile(`(?i)background(?:-color)?\s*:\s*([^;]+)`)
83
)
84
85
type slot struct {
86
	Range  string `json:"range"`
87
	Outage bool   `json:"outage"`
88
}
89
90
type outageInterval struct {
91
	Start string `json:"start"`
92
	End   string `json:"end"`
93
}
94
95
type parsedSchedule struct {
96
	Date      string
97
	TimeSlots []string
98
	Rows      map[string][]bool
99
}
100
101
func (z *ztoe) fetchSchedule(ctx context.Context, scheduleURL string) (*parsedSchedule, error) {
102
	res, err := z.get(ctx, scheduleURL)
103
	if err != nil {
104
		return nil, err
105
	}
106
	defer res.Body.Close()
107
108
	if res.StatusCode != http.StatusOK {
109
		return nil, fmt.Errorf("unexpected status: %s", res.Status)
110
	}
111
112
	return parseSchedule(res.Body, res.Header.Get("Content-Type"))
113
}
114
115
func parseSchedule(r io.Reader, contentType string) (*parsedSchedule, error) {
116
	if contentType == "" {
117
		contentType = "text/html"
118
	}
119
	decoded, err := charset.NewReader(r, contentType)
120
	if err != nil {
121
		return nil, err
122
	}
123
124
	doc, err := goquery.NewDocumentFromReader(decoded)
125
	if err != nil {
126
		return nil, err
127
	}
128
129
	table := findScheduleTable(doc)
130
	if table == nil {
131
		return nil, errors.New("failed to locate schedule table")
132
	}
133
134
	timeSlots := extractTimeSlots(table)
135
	if len(timeSlots) == 0 {
136
		return nil, errors.New("failed to parse schedule time slots")
137
	}
138
139
	rows := extractRows(table, len(timeSlots))
140
	if len(rows) == 0 {
141
		return nil, errors.New("failed to parse schedule rows")
142
	}
143
144
	return &parsedSchedule{
145
		Date:      extractDate(table),
146
		TimeSlots: timeSlots,
147
		Rows:      rows,
148
	}, nil
149
}
150
151
func findScheduleTable(doc *goquery.Document) *goquery.Selection {
152
	var found *goquery.Selection
153
	doc.Find("table").EachWithBreak(func(_ int, table *goquery.Selection) bool {
154
		if len(extractTimeSlots(table)) >= 48 {
155
			found = table
156
			return false
157
		}
158
		return true
159
	})
160
	return found
161
}
162
163
func extractDate(table *goquery.Selection) string {
164
	date := ""
165
	table.Find("td,th").EachWithBreak(func(_ int, cell *goquery.Selection) bool {
166
		match := dateRe.FindString(normalizeWhitespace(cell.Text()))
167
		if match == "" {
168
			return true
169
		}
170
		date = match
171
		return false
172
	})
173
	return date
174
}
175
176
func extractTimeSlots(table *goquery.Selection) []string {
177
	slots := make([]string, 0, 48)
178
	seen := make(map[string]struct{}, 48)
179
	table.Find("td,th").Each(func(_ int, cell *goquery.Selection) {
180
		text := normalizeWhitespace(cell.Text())
181
		if !timeSlotRe.MatchString(text) {
182
			return
183
		}
184
		if _, ok := seen[text]; ok {
185
			return
186
		}
187
		seen[text] = struct{}{}
188
		slots = append(slots, text)
189
	})
190
	return slots
191
}
192
193
func extractRows(table *goquery.Selection, slotCount int) map[string][]bool {
194
	rows := make(map[string][]bool)
195
	table.Find("tr").Each(func(_ int, tr *goquery.Selection) {
196
		tds := tr.ChildrenFiltered("td")
197
		if tds.Length() == 0 {
198
			return
199
		}
200
201
		subgroup := ""
202
		subgroupIdx := -1
203
		tds.EachWithBreak(func(i int, td *goquery.Selection) bool {
204
			text := normalizeWhitespace(td.Text())
205
			if !subgroupRe.MatchString(text) {
206
				return true
207
			}
208
			subgroup = text
209
			subgroupIdx = i
210
			return false
211
		})
212
		if subgroup == "" {
213
			return
214
		}
215
216
		slots := make([]bool, 0, slotCount)
217
		for i := subgroupIdx + 1; i < tds.Length() && len(slots) < slotCount; i++ {
218
			td := tds.Eq(i)
219
			style, ok := td.Attr("style")
220
			if !ok {
221
				continue
222
			}
223
			color, ok := extractBackgroundColor(style)
224
			if !ok {
225
				continue
226
			}
227
			slots = append(slots, isOutageColor(color))
228
		}
229
230
		if len(slots) == slotCount {
231
			rows[subgroup] = slots
232
		}
233
	})
234
	return rows
235
}
236
237
func extractBackgroundColor(style string) (string, bool) {
238
	match := bgColorRe.FindStringSubmatch(style)
239
	if len(match) < 2 {
240
		return "", false
241
	}
242
	color := strings.ToLower(strings.TrimSpace(match[1]))
243
	return strings.ReplaceAll(color, " ", ""), true
244
}
245
246
func isOutageColor(color string) bool {
247
	switch color {
248
	case "", "white", "#fff", "#ffffff", "rgb(255,255,255)", "rgba(255,255,255,1)":
249
		return false
250
	default:
251
		return !strings.Contains(color, "255,255,255")
252
	}
253
}
254
255
func buildOutageIntervals(slots []slot) []outageInterval {
256
	intervals := make([]outageInterval, 0)
257
	var current outageInterval
258
	active := false
259
260
	for _, slot := range slots {
261
		start, end, ok := strings.Cut(slot.Range, "-")
262
		if !ok {
263
			continue
264
		}
265
266
		if slot.Outage {
267
			if !active {
268
				current = outageInterval{Start: start, End: end}
269
				active = true
270
				continue
271
			}
272
			if current.End == start {
273
				current.End = end
274
				continue
275
			}
276
			intervals = append(intervals, current)
277
			current = outageInterval{Start: start, End: end}
278
			continue
279
		}
280
281
		if active {
282
			intervals = append(intervals, current)
283
			active = false
284
		}
285
	}
286
	if active {
287
		intervals = append(intervals, current)
288
	}
289
	return intervals
290
}
291
292
func normalizeWhitespace(s string) string {
293
	return strings.Join(strings.Fields(strings.TrimSpace(s)), " ")
294
}
295
296
func intervalTime(date, hhmm string) time.Time {
297
	day, err := time.ParseInLocation("02.01.2006", date, time.Local)
298
	if err != nil {
299
		return time.Now()
300
	}
301
	clock, err := time.Parse("15:04", hhmm)
302
	if err != nil {
303
		return day
304
	}
305
	return time.Date(
306
		day.Year(), day.Month(), day.Day(),
307
		clock.Hour(), clock.Minute(), 0, 0,
308
		day.Location(),
309
	)
310
}