all repos

rss-tools @ 01ec2af

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/ztoe/ztoe.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
update feed api again, 1 month ago
1
// source for https://ztoe.com.ua/unhooking-search.php
2
package ztoe
3
4
import (
5
	"context"
6
	"errors"
7
	"fmt"
8
	"io"
9
	"net/http"
10
	"regexp"
11
	"strings"
12
	"time"
13
14
	"github.com/PuerkitoBio/goquery"
15
	"golang.org/x/net/html/charset"
16
17
	"olexsmir.xyz/rss-tools/app"
18
)
19
20
type ztoe struct {
21
	get func(ctx context.Context, url string) (*http.Response, error)
22
}
23
24
const sourceURL = "https://ztoe.com.ua/unhooking-search.php"
25
26
func Register(a *app.App) error {
27
	z := ztoe{get: a.Get}
28
	a.Route("GET /ztoe/{group}/{subgroup}", z.handler(sourceURL))
29
	return nil
30
}
31
32
func (z *ztoe) handler(scheduleURL string) http.HandlerFunc {
33
	return func(w http.ResponseWriter, r *http.Request) {
34
		group := r.PathValue("group")
35
		subgroup := r.PathValue("subgroup")
36
37
		schedule, err := z.fetchSchedule(r.Context(), scheduleURL)
38
		if err != nil {
39
			http.Error(w, "failed to fetch schedule", http.StatusBadGateway)
40
			return
41
		}
42
43
		row, ok := schedule.Rows[group+"."+subgroup]
44
		if !ok {
45
			http.Error(w, "group/subgroup not found", http.StatusNotFound)
46
			return
47
		}
48
49
		slots := make([]slot, 0, len(schedule.TimeSlots))
50
		for i, t := range schedule.TimeSlots {
51
			slots = append(slots, slot{Range: t, Outage: i < len(row) && row[i]})
52
		}
53
54
		feed := app.NewFeed(
55
			fmt.Sprintf("ZTOE power outages for %s.%s", group, subgroup),
56
			fmt.Sprintf("ztoe-%s-%s", group, subgroup))
57
58
		for _, interval := range buildOutageIntervals(slots) {
59
			feed.Add(app.FeedEntry{
60
				Title:   fmt.Sprintf("Power outage %s-%s", interval.Start, interval.End),
61
				ID:      fmt.Sprintf("ztoe-%s-%s-%s-%s-%s", group, subgroup, schedule.Date, strings.ReplaceAll(interval.Start, ":", ""), strings.ReplaceAll(interval.End, ":", "")),
62
				Content: fmt.Sprintf("Date: %s\nGroup: %s.%s\nTime: %s-%s", schedule.Date, group, subgroup, interval.Start, interval.End),
63
				Updated: intervalTime(schedule.Date, interval.Start),
64
			})
65
		}
66
		if err := feed.Render(w); err != nil {
67
			http.Error(w, "failed to render feed", http.StatusInternalServerError)
68
			return
69
		}
70
	}
71
}
72
73
var (
74
	timeSlotRe = regexp.MustCompile(`^\d{2}:\d{2}-\d{2}:\d{2}$`)
75
	dateRe     = regexp.MustCompile(`\d{2}\.\d{2}\.\d{4}`)
76
	subgroupRe = regexp.MustCompile(`^\d+\.\d+$`)
77
	bgColorRe  = regexp.MustCompile(`(?i)background(?:-color)?\s*:\s*([^;]+)`)
78
)
79
80
type slot struct {
81
	Range  string `json:"range"`
82
	Outage bool   `json:"outage"`
83
}
84
85
type outageInterval struct {
86
	Start string `json:"start"`
87
	End   string `json:"end"`
88
}
89
90
type parsedSchedule struct {
91
	Date      string
92
	TimeSlots []string
93
	Rows      map[string][]bool
94
}
95
96
func (z *ztoe) fetchSchedule(ctx context.Context, scheduleURL string) (*parsedSchedule, error) {
97
	res, err := z.get(ctx, scheduleURL)
98
	if err != nil {
99
		return nil, err
100
	}
101
	defer res.Body.Close()
102
103
	if res.StatusCode != http.StatusOK {
104
		return nil, fmt.Errorf("unexpected status: %s", res.Status)
105
	}
106
107
	return parseSchedule(res.Body, res.Header.Get("Content-Type"))
108
}
109
110
func parseSchedule(r io.Reader, contentType string) (*parsedSchedule, error) {
111
	if contentType == "" {
112
		contentType = "text/html"
113
	}
114
	decoded, err := charset.NewReader(r, contentType)
115
	if err != nil {
116
		return nil, err
117
	}
118
119
	doc, err := goquery.NewDocumentFromReader(decoded)
120
	if err != nil {
121
		return nil, err
122
	}
123
124
	table := findScheduleTable(doc)
125
	if table == nil {
126
		return nil, errors.New("failed to locate schedule table")
127
	}
128
129
	timeSlots := extractTimeSlots(table)
130
	if len(timeSlots) == 0 {
131
		return nil, errors.New("failed to parse schedule time slots")
132
	}
133
134
	rows := extractRows(table, len(timeSlots))
135
	if len(rows) == 0 {
136
		return nil, errors.New("failed to parse schedule rows")
137
	}
138
139
	return &parsedSchedule{
140
		Date:      extractDate(table),
141
		TimeSlots: timeSlots,
142
		Rows:      rows,
143
	}, nil
144
}
145
146
func findScheduleTable(doc *goquery.Document) *goquery.Selection {
147
	var found *goquery.Selection
148
	doc.Find("table").EachWithBreak(func(_ int, table *goquery.Selection) bool {
149
		if len(extractTimeSlots(table)) >= 48 {
150
			found = table
151
			return false
152
		}
153
		return true
154
	})
155
	return found
156
}
157
158
func extractDate(table *goquery.Selection) string {
159
	date := ""
160
	table.Find("td,th").EachWithBreak(func(_ int, cell *goquery.Selection) bool {
161
		match := dateRe.FindString(normalizeWhitespace(cell.Text()))
162
		if match == "" {
163
			return true
164
		}
165
		date = match
166
		return false
167
	})
168
	return date
169
}
170
171
func extractTimeSlots(table *goquery.Selection) []string {
172
	slots := make([]string, 0, 48)
173
	seen := make(map[string]struct{}, 48)
174
	table.Find("td,th").Each(func(_ int, cell *goquery.Selection) {
175
		text := normalizeWhitespace(cell.Text())
176
		if !timeSlotRe.MatchString(text) {
177
			return
178
		}
179
		if _, ok := seen[text]; ok {
180
			return
181
		}
182
		seen[text] = struct{}{}
183
		slots = append(slots, text)
184
	})
185
	return slots
186
}
187
188
func extractRows(table *goquery.Selection, slotCount int) map[string][]bool {
189
	rows := make(map[string][]bool)
190
	table.Find("tr").Each(func(_ int, tr *goquery.Selection) {
191
		tds := tr.ChildrenFiltered("td")
192
		if tds.Length() == 0 {
193
			return
194
		}
195
196
		subgroup := ""
197
		subgroupIdx := -1
198
		tds.EachWithBreak(func(i int, td *goquery.Selection) bool {
199
			text := normalizeWhitespace(td.Text())
200
			if !subgroupRe.MatchString(text) {
201
				return true
202
			}
203
			subgroup = text
204
			subgroupIdx = i
205
			return false
206
		})
207
		if subgroup == "" {
208
			return
209
		}
210
211
		slots := make([]bool, 0, slotCount)
212
		for i := subgroupIdx + 1; i < tds.Length() && len(slots) < slotCount; i++ {
213
			td := tds.Eq(i)
214
			style, ok := td.Attr("style")
215
			if !ok {
216
				continue
217
			}
218
			color, ok := extractBackgroundColor(style)
219
			if !ok {
220
				continue
221
			}
222
			slots = append(slots, isOutageColor(color))
223
		}
224
225
		if len(slots) == slotCount {
226
			rows[subgroup] = slots
227
		}
228
	})
229
	return rows
230
}
231
232
func extractBackgroundColor(style string) (string, bool) {
233
	match := bgColorRe.FindStringSubmatch(style)
234
	if len(match) < 2 {
235
		return "", false
236
	}
237
	color := strings.ToLower(strings.TrimSpace(match[1]))
238
	return strings.ReplaceAll(color, " ", ""), true
239
}
240
241
func isOutageColor(color string) bool {
242
	switch color {
243
	case "", "white", "#fff", "#ffffff", "rgb(255,255,255)", "rgba(255,255,255,1)":
244
		return false
245
	default:
246
		return !strings.Contains(color, "255,255,255")
247
	}
248
}
249
250
func buildOutageIntervals(slots []slot) []outageInterval {
251
	intervals := make([]outageInterval, 0)
252
	var current outageInterval
253
	active := false
254
255
	for _, slot := range slots {
256
		start, end, ok := strings.Cut(slot.Range, "-")
257
		if !ok {
258
			continue
259
		}
260
261
		if slot.Outage {
262
			if !active {
263
				current = outageInterval{Start: start, End: end}
264
				active = true
265
				continue
266
			}
267
			if current.End == start {
268
				current.End = end
269
				continue
270
			}
271
			intervals = append(intervals, current)
272
			current = outageInterval{Start: start, End: end}
273
			continue
274
		}
275
276
		if active {
277
			intervals = append(intervals, current)
278
			active = false
279
		}
280
	}
281
	if active {
282
		intervals = append(intervals, current)
283
	}
284
	return intervals
285
}
286
287
func normalizeWhitespace(s string) string {
288
	return strings.Join(strings.Fields(strings.TrimSpace(s)), " ")
289
}
290
291
func intervalTime(date, hhmm string) time.Time {
292
	day, err := time.ParseInLocation("02.01.2006", date, time.Local)
293
	if err != nil {
294
		return time.Now()
295
	}
296
	clock, err := time.Parse("15:04", hhmm)
297
	if err != nil {
298
		return day
299
	}
300
	return time.Date(
301
		day.Year(), day.Month(), day.Day(),
302
		clock.Hour(), clock.Minute(), 0, 0,
303
		day.Location(),
304
	)
305
}