all repos

rss-tools @ db0b936

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/musicfeed/musicfeed.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
only refresh music cache once a week, 10 days ago
1
package musicfeed
2
3
import (
4
	"context"
5
	"encoding/binary"
6
	"fmt"
7
	"html"
8
	"log/slog"
9
	"net/http"
10
	"sort"
11
	"strings"
12
	"sync"
13
	"sync/atomic"
14
	"time"
15
16
	"olexsmir.xyz/rss-tools/app"
17
	"olexsmir.xyz/rss-tools/app/atom"
18
)
19
20
type artistEntry struct {
21
	label string
22
	mbid  string
23
}
24
25
type release struct {
26
	id             string
27
	releaseGroupID string
28
	title          string
29
	date           time.Time
30
	releaseType    string
31
	artistName     string
32
	label          string
33
	hasArtwork     bool
34
}
35
36
type releaseFetcher interface {
37
	searchArtist(ctx context.Context, name string) (string, string, error)
38
	fetchArtist(ctx context.Context, mbid string) (string, error)
39
	fetchReleases(ctx context.Context, mbid string) ([]mbRelease, error)
40
}
41
42
type musicfeed struct {
43
	bucket    *app.Bucket
44
	artists   []string
45
	api       releaseFetcher
46
	maxAge    time.Duration
47
	logger    *slog.Logger
48
	refreshMu sync.Mutex
49
	refreshed atomic.Bool
50
}
51
52
func Register(a *app.App) error {
53
	if len(a.Config.MusicArtists) == 0 {
54
		return nil
55
	}
56
57
	bucket, err := a.Bucket("musicfeed")
58
	if err != nil {
59
		return err
60
	}
61
62
	maxAge := time.Duration(a.Config.MusicMaxAgeDays) * 24 * time.Hour
63
	if maxAge <= 0 {
64
		maxAge = 30 * 24 * time.Hour
65
	}
66
67
	mf := &musicfeed{
68
		bucket:  bucket,
69
		artists: a.Config.MusicArtists,
70
		api:     newMusicBrainzAPI(a.Client),
71
		maxAge:  maxAge,
72
		logger:  a.Logger,
73
	}
74
75
	a.AddWorker(mf.worker)
76
	a.Route("GET /music", mf.handleMusic)
77
78
	a.Logger.Info("musicfeed source registered")
79
	return nil
80
}
81
82
func (mf *musicfeed) handleMusic(w http.ResponseWriter, r *http.Request) {
83
	if !mf.refreshed.Load() && (time.Now().Weekday() == time.Friday || mf.cacheMissing()) {
84
		mf.refreshMu.Lock()
85
		if !mf.refreshed.Load() {
86
			mf.refresh(r.Context())
87
			mf.refreshed.Store(true)
88
		}
89
		mf.refreshMu.Unlock()
90
	}
91
92
	cached, err := mf.bucket.Get([]byte("feed"))
93
	if err != nil {
94
		slog.Error("failed to read cached feed", "err", err)
95
		http.Error(w, "Internal server error", http.StatusInternalServerError)
96
		return
97
	}
98
	if cached == nil {
99
		http.Error(w, "feed not yet available", http.StatusServiceUnavailable)
100
		return
101
	}
102
	w.Header().Set("Content-Type", "application/atom+xml; charset=utf-8")
103
	w.Write(cached)
104
}
105
106
func (mf *musicfeed) worker(ctx context.Context) error {
107
	mf.logger.Info("starting musicfeed worker")
108
109
	// Only refresh on Fridays — releases drop on Friday, so we fetch once weekly
110
	// to avoid rate-limiting MusicBrainz.
111
	if time.Now().Weekday() == time.Friday {
112
		mf.maybeRefresh(ctx)
113
	}
114
115
	for {
116
		next := nextFridayRefresh(time.Now())
117
		dur := time.Until(next)
118
		mf.logger.Info("next music feed refresh", "at", next.Format("2006-01-02 15:04"), "in", dur.Round(time.Second))
119
120
		select {
121
		case <-ctx.Done():
122
			return nil
123
		case <-time.After(dur):
124
			mf.maybeRefresh(ctx)
125
		}
126
	}
127
}
128
129
func nextFridayRefresh(after time.Time) time.Time {
130
	const targetHour = 19
131
	y, m, d := after.Date()
132
	loc := after.Location()
133
134
	// If today is Friday before target hour, return today at target hour.
135
	if after.Weekday() == time.Friday {
136
		target := time.Date(y, m, d, targetHour, 0, 0, 0, loc)
137
		if after.Before(target) {
138
			return target
139
		}
140
	}
141
142
	// Otherwise advance to next Friday at target hour.
143
	next := time.Date(y, m, d, 0, 0, 0, 0, loc).Add(24 * time.Hour)
144
	for next.Weekday() != time.Friday {
145
		next = next.Add(24 * time.Hour)
146
	}
147
	return time.Date(next.Year(), next.Month(), next.Day(), targetHour, 0, 0, 0, loc)
148
}
149
150
func (mf *musicfeed) cacheMissing() bool {
151
	_, err := mf.bucket.Get([]byte("feed"))
152
	return err != nil
153
}
154
155
func (mf *musicfeed) maybeRefresh(ctx context.Context) {
156
	mf.refreshMu.Lock()
157
	defer mf.refreshMu.Unlock()
158
159
	if mf.refreshed.Load() {
160
		raw, err := mf.bucket.Get([]byte("refreshed_at"))
161
		if err == nil && raw != nil {
162
			lastRefresh := time.Unix(int64(binary.BigEndian.Uint64(raw)), 0)
163
			if isSameDay(lastRefresh, time.Now()) {
164
				return
165
			}
166
		}
167
	}
168
169
	mf.logger.Info("starting music feed refresh")
170
	mf.refresh(ctx)
171
	mf.refreshed.Store(true)
172
}
173
174
func isSameDay(a, b time.Time) bool {
175
	ay, am, ad := a.Date()
176
	by, bm, bd := b.Date()
177
	return ay == by && am == bm && ad == bd
178
}
179
180
func (mf *musicfeed) refresh(ctx context.Context) {
181
	type artistResult struct {
182
		releases []release
183
	}
184
185
	var mu sync.Mutex
186
	var all []release
187
	var wg sync.WaitGroup
188
	sem := make(chan struct{}, 5)
189
190
	for _, raw := range mf.artists {
191
		raw := raw
192
		wg.Add(1)
193
		sem <- struct{}{}
194
195
		go func() {
196
			defer wg.Done()
197
			defer func() { <-sem }()
198
199
			entry := parseArtistEntry(raw)
200
			mbid, label := mf.resolveArtist(ctx, entry)
201
			if mbid == "" {
202
				mf.logger.Warn("could not resolve artist, skipping", "entry", raw)
203
				return
204
			}
205
206
			mbReleases, err := mf.api.fetchReleases(ctx, mbid)
207
			if err != nil {
208
				mf.logger.Warn("failed to fetch releases", "artist", label, "err", err)
209
				return
210
			}
211
212
			var artistReleases []release
213
			for _, r := range mbReleases {
214
				if r.Date == "" {
215
					continue
216
				}
217
				date := parseMBDate(r.Date)
218
				if date.IsZero() {
219
					continue
220
				}
221
				if time.Since(date) > mf.maxAge || date.After(time.Now()) {
222
					continue
223
				}
224
				artistName := ""
225
				if len(r.ArtistCredit) > 0 {
226
					artistName = r.ArtistCredit[0].Name
227
				}
228
				artistReleases = append(artistReleases, release{
229
					id:             r.ID,
230
					releaseGroupID: r.ReleaseGroup.ID,
231
					title:          r.Title,
232
					date:           date,
233
					releaseType:    r.ReleaseGroup.PrimaryType,
234
					artistName:     artistName,
235
					label:          label,
236
					hasArtwork:     r.CoverArtArchive.Artwork,
237
				})
238
			}
239
240
			mu.Lock()
241
			all = append(all, artistReleases...)
242
			mu.Unlock()
243
		}()
244
	}
245
246
	wg.Wait()
247
248
	all = dedupeByReleaseGroup(all)
249
250
	sort.Slice(all, func(i, j int) bool {
251
		return all[i].date.After(all[j].date)
252
	})
253
254
	feed := generateFeed(all)
255
	bytes, err := feed.Bytes()
256
	if err != nil {
257
		mf.logger.Error("failed to serialize feed", "err", err)
258
		return
259
	}
260
261
	if err := mf.bucket.Set([]byte("feed"), bytes); err != nil {
262
		mf.logger.Error("failed to cache feed", "err", err)
263
	}
264
265
	var ts [8]byte
266
	binary.BigEndian.PutUint64(ts[:], uint64(time.Now().Unix()))
267
	if err := mf.bucket.Set([]byte("refreshed_at"), ts[:]); err != nil {
268
		mf.logger.Error("failed to save refresh timestamp", "err", err)
269
	}
270
271
	mf.logger.Info("music feed refreshed", "releases", len(all))
272
}
273
274
func (mf *musicfeed) resolveArtist(ctx context.Context, entry artistEntry) (string, string) {
275
	if entry.mbid != "" {
276
		return entry.mbid, entry.label
277
	}
278
279
	cached, err := mf.bucket.Get([]byte("mapping:" + entry.label))
280
	if err == nil && cached != nil {
281
		return string(cached), entry.label
282
	}
283
284
	if isMBID(entry.label) {
285
		name, err := mf.api.fetchArtist(ctx, entry.label)
286
		if err != nil {
287
			mf.logger.Warn("failed to fetch artist name", "mbid", entry.label, "err", err)
288
			return entry.label, entry.label
289
		}
290
		if err := mf.bucket.Set([]byte("mapping:"+name), []byte(entry.label)); err != nil {
291
			mf.logger.Warn("failed to cache artist mapping", "err", err)
292
		}
293
		return entry.label, name
294
	}
295
296
	mbid, name, err := mf.api.searchArtist(ctx, entry.label)
297
	if err != nil {
298
		mf.logger.Warn("failed to search artist", "label", entry.label, "err", err)
299
		return "", entry.label
300
	}
301
302
	if err := mf.bucket.Set([]byte("mapping:"+entry.label), []byte(mbid)); err != nil {
303
		mf.logger.Warn("failed to cache artist mapping", "err", err)
304
	}
305
306
	return mbid, name
307
}
308
309
func parseArtistEntry(raw string) artistEntry {
310
	label, mbid, found := strings.Cut(raw, "::")
311
	if found {
312
		return artistEntry{label: strings.TrimSpace(label), mbid: strings.TrimSpace(mbid)}
313
	}
314
	return artistEntry{label: strings.TrimSpace(raw)}
315
}
316
317
func isMBID(s string) bool {
318
	if len(s) != 36 {
319
		return false
320
	}
321
	for i, c := range s {
322
		switch i {
323
		case 8, 13, 18, 23:
324
			if c != '-' {
325
				return false
326
			}
327
		default:
328
			if (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F') {
329
				return false
330
			}
331
		}
332
	}
333
	return true
334
}
335
336
func dedupeByReleaseGroup(releases []release) []release {
337
	seen := make(map[string]int)
338
	var out []release
339
	for _, r := range releases {
340
		if r.releaseGroupID == "" {
341
			out = append(out, r)
342
			continue
343
		}
344
		if idx, ok := seen[r.releaseGroupID]; ok {
345
			if r.hasArtwork && !out[idx].hasArtwork {
346
				out[idx] = r
347
			}
348
			continue
349
		}
350
		seen[r.releaseGroupID] = len(out)
351
		out = append(out, r)
352
	}
353
	return out
354
}
355
356
func parseMBDate(s string) time.Time {
357
	formats := []string{"2006-01-02", "2006-01", "2006"}
358
	for _, f := range formats {
359
		if t, err := time.Parse(f, s); err == nil {
360
			return t
361
		}
362
	}
363
	return time.Time{}
364
}
365
366
func generateFeed(releases []release) *atom.Feed {
367
	feed := atom.NewFeed("New Music Releases", "musicfeed")
368
	for _, r := range releases {
369
		displayName := r.label
370
		if displayName == "" {
371
			displayName = r.artistName
372
		}
373
374
		links := []atom.Link{
375
			{
376
				Rel:  "alternate",
377
				Href: fmt.Sprintf("https://musicbrainz.org/release/%s", r.id),
378
			},
379
		}
380
381
		content, contentType := releaseContent(r, displayName)
382
383
		if r.hasArtwork {
384
			links = append(links, atom.Link{
385
				Rel:  "enclosure",
386
				Type: "image/jpeg",
387
				Href: fmt.Sprintf("%s/release/%s/front-250.jpg", caaBaseURL, r.id),
388
			})
389
		}
390
391
		releaseType := strings.TrimSpace(r.releaseType)
392
		title := fmt.Sprintf("%s — %s", displayName, r.title)
393
		if releaseType != "" {
394
			title += fmt.Sprintf(" (%s)", releaseType)
395
		}
396
397
		feed.Add(&atom.Entry{
398
			ID:      r.id,
399
			Title:   title,
400
			Content: atom.NewText(content, contentType),
401
			Updated: atom.Time(r.date),
402
			Link:    links,
403
		})
404
	}
405
	return feed
406
}
407
408
func releaseContent(r release, displayName string) (string, string) {
409
	if !r.hasArtwork {
410
		releaseType := strings.TrimSpace(r.releaseType)
411
		if releaseType != "" {
412
			return fmt.Sprintf("%s by %s (%s)", r.title, displayName, releaseType), ""
413
		}
414
		return fmt.Sprintf("%s by %s", r.title, displayName), ""
415
	}
416
417
	imageURL := fmt.Sprintf("%s/release/%s/front-250.jpg", caaBaseURL, r.id)
418
	parts := make([]string, 0, 4)
419
	parts = append(parts, "<body>")
420
421
	releaseType := strings.TrimSpace(r.releaseType)
422
	var text string
423
	if releaseType != "" {
424
		text = fmt.Sprintf("%s by %s (%s)", r.title, displayName, releaseType)
425
	} else {
426
		text = fmt.Sprintf("%s by %s", r.title, displayName)
427
	}
428
	parts = append(parts, "<p>"+html.EscapeString(text)+"</p>")
429
	parts = append(parts,
430
		fmt.Sprintf(`<p><img src="%s" alt="%s"/></p>`, html.EscapeString(imageURL), html.EscapeString(r.title)))
431
	parts = append(parts, "</body>")
432
433
	return strings.Join(parts, ""), "xhtml"
434
}