rss-tools/sources/ztoe/ztoe.go (view raw)
| 1 | // source for https://ztoe.com.ua/unhooking-search.php |
| 2 | package ztoe |
| 3 | |
| 4 | import ( |
| 5 | "context" |
| 6 | "errors" |
| 7 | "fmt" |
| 8 | "io" |
| 9 | "net/http" |
| 10 | "regexp" |
| 11 | "strings" |
| 12 | "time" |
| 13 | |
| 14 | "github.com/PuerkitoBio/goquery" |
| 15 | "golang.org/x/net/html/charset" |
| 16 | |
| 17 | "olexsmir.xyz/rss-tools/app" |
| 18 | "olexsmir.xyz/rss-tools/app/atom" |
| 19 | ) |
| 20 | |
| 21 | type ztoe struct { |
| 22 | get func(ctx context.Context, url string) (*http.Response, error) |
| 23 | } |
| 24 | |
| 25 | const sourceURL = "https://ztoe.com.ua/unhooking-search.php" |
| 26 | |
| 27 | func Register(a *app.App) error { |
| 28 | z := ztoe{get: a.Get} |
| 29 | a.Route("GET /ztoe/{group}/{subgroup}", z.handler(sourceURL)) |
| 30 | a.Logger.Info("ztoe source registered") |
| 31 | return nil |
| 32 | } |
| 33 | |
| 34 | func (z *ztoe) handler(scheduleURL string) http.HandlerFunc { |
| 35 | return func(w http.ResponseWriter, r *http.Request) { |
| 36 | group := r.PathValue("group") |
| 37 | subgroup := r.PathValue("subgroup") |
| 38 | |
| 39 | schedule, err := z.fetchSchedule(r.Context(), scheduleURL) |
| 40 | if err != nil { |
| 41 | http.Error(w, "failed to fetch schedule", http.StatusBadGateway) |
| 42 | return |
| 43 | } |
| 44 | |
| 45 | row, ok := schedule.Rows[group+"."+subgroup] |
| 46 | if !ok { |
| 47 | http.Error(w, "group/subgroup not found", http.StatusNotFound) |
| 48 | return |
| 49 | } |
| 50 | |
| 51 | slots := make([]slot, 0, len(schedule.TimeSlots)) |
| 52 | for i, t := range schedule.TimeSlots { |
| 53 | slots = append(slots, slot{Range: t, Outage: i < len(row) && row[i]}) |
| 54 | } |
| 55 | |
| 56 | feed := atom.NewFeed( |
| 57 | fmt.Sprintf("ZTOE power outages for %s.%s", group, subgroup), |
| 58 | fmt.Sprintf("ztoe-%s-%s", group, subgroup)) |
| 59 | |
| 60 | for _, interval := range buildOutageIntervals(slots) { |
| 61 | feed.Add(&atom.Entry{ |
| 62 | Title: fmt.Sprintf("Power outage %s-%s", interval.Start, interval.End), |
| 63 | ID: fmt.Sprintf("ztoe-%s-%s-%s-%s-%s", group, subgroup, schedule.Date, strings.ReplaceAll(interval.Start, ":", ""), strings.ReplaceAll(interval.End, ":", "")), |
| 64 | Content: atom.NewText(fmt.Sprintf("Date: %s\nGroup: %s.%s\nTime: %s-%s", schedule.Date, group, subgroup, interval.Start, interval.End), ""), |
| 65 | Updated: atom.Time(intervalTime(schedule.Date, interval.Start)), |
| 66 | }) |
| 67 | } |
| 68 | if err := feed.Render(w); err != nil { |
| 69 | http.Error(w, "failed to render feed", http.StatusInternalServerError) |
| 70 | return |
| 71 | } |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | var ( |
| 76 | timeSlotRe = regexp.MustCompile(`^\d{2}:\d{2}-\d{2}:\d{2}$`) |
| 77 | dateRe = regexp.MustCompile(`\d{2}\.\d{2}\.\d{4}`) |
| 78 | subgroupRe = regexp.MustCompile(`^\d+\.\d+$`) |
| 79 | bgColorRe = regexp.MustCompile(`(?i)background(?:-color)?\s*:\s*([^;]+)`) |
| 80 | ) |
| 81 | |
| 82 | type slot struct { |
| 83 | Range string `json:"range"` |
| 84 | Outage bool `json:"outage"` |
| 85 | } |
| 86 | |
| 87 | type outageInterval struct { |
| 88 | Start string `json:"start"` |
| 89 | End string `json:"end"` |
| 90 | } |
| 91 | |
| 92 | type parsedSchedule struct { |
| 93 | Date string |
| 94 | TimeSlots []string |
| 95 | Rows map[string][]bool |
| 96 | } |
| 97 | |
| 98 | func (z *ztoe) fetchSchedule(ctx context.Context, scheduleURL string) (*parsedSchedule, error) { |
| 99 | res, err := z.get(ctx, scheduleURL) |
| 100 | if err != nil { |
| 101 | return nil, err |
| 102 | } |
| 103 | defer res.Body.Close() |
| 104 | |
| 105 | if res.StatusCode != http.StatusOK { |
| 106 | return nil, fmt.Errorf("unexpected status: %s", res.Status) |
| 107 | } |
| 108 | |
| 109 | return parseSchedule(res.Body, res.Header.Get("Content-Type")) |
| 110 | } |
| 111 | |
| 112 | func parseSchedule(r io.Reader, contentType string) (*parsedSchedule, error) { |
| 113 | if contentType == "" { |
| 114 | contentType = "text/html" |
| 115 | } |
| 116 | decoded, err := charset.NewReader(r, contentType) |
| 117 | if err != nil { |
| 118 | return nil, err |
| 119 | } |
| 120 | |
| 121 | doc, err := goquery.NewDocumentFromReader(decoded) |
| 122 | if err != nil { |
| 123 | return nil, err |
| 124 | } |
| 125 | |
| 126 | table := findScheduleTable(doc) |
| 127 | if table == nil { |
| 128 | return nil, errors.New("failed to locate schedule table") |
| 129 | } |
| 130 | |
| 131 | timeSlots := extractTimeSlots(table) |
| 132 | if len(timeSlots) == 0 { |
| 133 | return nil, errors.New("failed to parse schedule time slots") |
| 134 | } |
| 135 | |
| 136 | rows := extractRows(table, len(timeSlots)) |
| 137 | if len(rows) == 0 { |
| 138 | return nil, errors.New("failed to parse schedule rows") |
| 139 | } |
| 140 | |
| 141 | return &parsedSchedule{ |
| 142 | Date: extractDate(table), |
| 143 | TimeSlots: timeSlots, |
| 144 | Rows: rows, |
| 145 | }, nil |
| 146 | } |
| 147 | |
| 148 | func findScheduleTable(doc *goquery.Document) *goquery.Selection { |
| 149 | var found *goquery.Selection |
| 150 | doc.Find("table").EachWithBreak(func(_ int, table *goquery.Selection) bool { |
| 151 | if len(extractTimeSlots(table)) >= 48 { |
| 152 | found = table |
| 153 | return false |
| 154 | } |
| 155 | return true |
| 156 | }) |
| 157 | return found |
| 158 | } |
| 159 | |
| 160 | func extractDate(table *goquery.Selection) string { |
| 161 | date := "" |
| 162 | table.Find("td,th").EachWithBreak(func(_ int, cell *goquery.Selection) bool { |
| 163 | match := dateRe.FindString(normalizeWhitespace(cell.Text())) |
| 164 | if match == "" { |
| 165 | return true |
| 166 | } |
| 167 | date = match |
| 168 | return false |
| 169 | }) |
| 170 | return date |
| 171 | } |
| 172 | |
| 173 | func extractTimeSlots(table *goquery.Selection) []string { |
| 174 | slots := make([]string, 0, 48) |
| 175 | seen := make(map[string]struct{}, 48) |
| 176 | table.Find("td,th").Each(func(_ int, cell *goquery.Selection) { |
| 177 | text := normalizeWhitespace(cell.Text()) |
| 178 | if !timeSlotRe.MatchString(text) { |
| 179 | return |
| 180 | } |
| 181 | if _, ok := seen[text]; ok { |
| 182 | return |
| 183 | } |
| 184 | seen[text] = struct{}{} |
| 185 | slots = append(slots, text) |
| 186 | }) |
| 187 | return slots |
| 188 | } |
| 189 | |
| 190 | func extractRows(table *goquery.Selection, slotCount int) map[string][]bool { |
| 191 | rows := make(map[string][]bool) |
| 192 | table.Find("tr").Each(func(_ int, tr *goquery.Selection) { |
| 193 | tds := tr.ChildrenFiltered("td") |
| 194 | if tds.Length() == 0 { |
| 195 | return |
| 196 | } |
| 197 | |
| 198 | subgroup := "" |
| 199 | subgroupIdx := -1 |
| 200 | tds.EachWithBreak(func(i int, td *goquery.Selection) bool { |
| 201 | text := normalizeWhitespace(td.Text()) |
| 202 | if !subgroupRe.MatchString(text) { |
| 203 | return true |
| 204 | } |
| 205 | subgroup = text |
| 206 | subgroupIdx = i |
| 207 | return false |
| 208 | }) |
| 209 | if subgroup == "" { |
| 210 | return |
| 211 | } |
| 212 | |
| 213 | slots := make([]bool, 0, slotCount) |
| 214 | for i := subgroupIdx + 1; i < tds.Length() && len(slots) < slotCount; i++ { |
| 215 | td := tds.Eq(i) |
| 216 | style, ok := td.Attr("style") |
| 217 | if !ok { |
| 218 | continue |
| 219 | } |
| 220 | color, ok := extractBackgroundColor(style) |
| 221 | if !ok { |
| 222 | continue |
| 223 | } |
| 224 | slots = append(slots, isOutageColor(color)) |
| 225 | } |
| 226 | |
| 227 | if len(slots) == slotCount { |
| 228 | rows[subgroup] = slots |
| 229 | } |
| 230 | }) |
| 231 | return rows |
| 232 | } |
| 233 | |
| 234 | func extractBackgroundColor(style string) (string, bool) { |
| 235 | match := bgColorRe.FindStringSubmatch(style) |
| 236 | if len(match) < 2 { |
| 237 | return "", false |
| 238 | } |
| 239 | color := strings.ToLower(strings.TrimSpace(match[1])) |
| 240 | return strings.ReplaceAll(color, " ", ""), true |
| 241 | } |
| 242 | |
| 243 | func isOutageColor(color string) bool { |
| 244 | switch color { |
| 245 | case "", "white", "#fff", "#ffffff", "rgb(255,255,255)", "rgba(255,255,255,1)": |
| 246 | return false |
| 247 | default: |
| 248 | return !strings.Contains(color, "255,255,255") |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | func buildOutageIntervals(slots []slot) []outageInterval { |
| 253 | intervals := make([]outageInterval, 0) |
| 254 | var current outageInterval |
| 255 | active := false |
| 256 | |
| 257 | for _, slot := range slots { |
| 258 | start, end, ok := strings.Cut(slot.Range, "-") |
| 259 | if !ok { |
| 260 | continue |
| 261 | } |
| 262 | |
| 263 | if slot.Outage { |
| 264 | if !active { |
| 265 | current = outageInterval{Start: start, End: end} |
| 266 | active = true |
| 267 | continue |
| 268 | } |
| 269 | if current.End == start { |
| 270 | current.End = end |
| 271 | continue |
| 272 | } |
| 273 | intervals = append(intervals, current) |
| 274 | current = outageInterval{Start: start, End: end} |
| 275 | continue |
| 276 | } |
| 277 | |
| 278 | if active { |
| 279 | intervals = append(intervals, current) |
| 280 | active = false |
| 281 | } |
| 282 | } |
| 283 | if active { |
| 284 | intervals = append(intervals, current) |
| 285 | } |
| 286 | return intervals |
| 287 | } |
| 288 | |
| 289 | func normalizeWhitespace(s string) string { |
| 290 | return strings.Join(strings.Fields(strings.TrimSpace(s)), " ") |
| 291 | } |
| 292 | |
| 293 | func intervalTime(date, hhmm string) time.Time { |
| 294 | day, err := time.ParseInLocation("02.01.2006", date, time.Local) |
| 295 | if err != nil { |
| 296 | return time.Now() |
| 297 | } |
| 298 | clock, err := time.Parse("15:04", hhmm) |
| 299 | if err != nil { |
| 300 | return day |
| 301 | } |
| 302 | return time.Date( |
| 303 | day.Year(), day.Month(), day.Day(), |
| 304 | clock.Hour(), clock.Minute(), 0, 0, |
| 305 | day.Location(), |
| 306 | ) |
| 307 | } |