all repos

rss-tools @ aeb1a59

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/musicfeed/musicfeed.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
add musicfeed, 14 days ago
1
package musicfeed
2
3
import (
4
	"context"
5
	"encoding/binary"
6
	"fmt"
7
	"html"
8
	"log/slog"
9
	"net/http"
10
	"sort"
11
	"strings"
12
	"sync"
13
	"sync/atomic"
14
	"time"
15
16
	"olexsmir.xyz/rss-tools/app"
17
	"olexsmir.xyz/rss-tools/app/atom"
18
)
19
20
type artistEntry struct {
21
	label string
22
	mbid  string
23
}
24
25
type release struct {
26
	id             string
27
	releaseGroupID string
28
	title          string
29
	date           time.Time
30
	releaseType    string
31
	artistName     string
32
	label          string
33
	hasArtwork     bool
34
}
35
36
type releaseFetcher interface {
37
	searchArtist(ctx context.Context, name string) (string, string, error)
38
	fetchArtist(ctx context.Context, mbid string) (string, error)
39
	fetchReleases(ctx context.Context, mbid string) ([]mbRelease, error)
40
}
41
42
type musicfeed struct {
43
	bucket    *app.Bucket
44
	artists   []string
45
	api       releaseFetcher
46
	maxAge    time.Duration
47
	logger    *slog.Logger
48
	refreshMu sync.Mutex
49
	refreshed atomic.Bool
50
}
51
52
func Register(a *app.App) error {
53
	if len(a.Config.MusicArtists) == 0 {
54
		return nil
55
	}
56
57
	bucket, err := a.Bucket("musicfeed")
58
	if err != nil {
59
		return err
60
	}
61
62
	maxAge := time.Duration(a.Config.MusicMaxAgeDays) * 24 * time.Hour
63
	if maxAge <= 0 {
64
		maxAge = 30 * 24 * time.Hour
65
	}
66
67
	mf := &musicfeed{
68
		bucket:  bucket,
69
		artists: a.Config.MusicArtists,
70
		api:     newMusicBrainzAPI(a.Client),
71
		maxAge:  maxAge,
72
		logger:  a.Logger,
73
	}
74
75
	a.AddWorker(mf.worker)
76
	a.Route("GET /music", mf.handleMusic)
77
78
	a.Logger.Info("musicfeed source registered")
79
	return nil
80
}
81
82
func (mf *musicfeed) handleMusic(w http.ResponseWriter, r *http.Request) {
83
	if !mf.refreshed.Load() {
84
		mf.refreshMu.Lock()
85
		if !mf.refreshed.Load() {
86
			mf.refresh(r.Context())
87
			mf.refreshed.Store(true)
88
		}
89
		mf.refreshMu.Unlock()
90
	}
91
92
	cached, err := mf.bucket.Get([]byte("feed"))
93
	if err != nil {
94
		slog.Error("failed to read cached feed", "err", err)
95
		http.Error(w, "Internal server error", http.StatusInternalServerError)
96
		return
97
	}
98
	if cached == nil {
99
		http.Error(w, "feed not yet available", http.StatusServiceUnavailable)
100
		return
101
	}
102
	w.Header().Set("Content-Type", "application/atom+xml; charset=utf-8")
103
	w.Write(cached)
104
}
105
106
func (mf *musicfeed) worker(ctx context.Context) error {
107
	mf.logger.Info("starting musicfeed worker")
108
109
	mf.maybeRefresh(ctx)
110
111
	ticker := time.NewTicker(1 * time.Hour)
112
	defer ticker.Stop()
113
114
	for {
115
		select {
116
		case <-ctx.Done():
117
			return nil
118
		case <-ticker.C:
119
			mf.maybeRefresh(ctx)
120
		}
121
	}
122
}
123
124
func (mf *musicfeed) maybeRefresh(ctx context.Context) {
125
	now := time.Now()
126
127
	mf.refreshMu.Lock()
128
	defer mf.refreshMu.Unlock()
129
130
	if mf.refreshed.Load() && now.Weekday() != time.Friday {
131
		return
132
	}
133
134
	if mf.refreshed.Load() {
135
		raw, err := mf.bucket.Get([]byte("refreshed_at"))
136
		if err == nil && raw != nil {
137
			lastRefresh := time.Unix(int64(binary.BigEndian.Uint64(raw)), 0)
138
			if isSameDay(lastRefresh, now) {
139
				return
140
			}
141
		}
142
	}
143
144
	mf.logger.Info("starting music feed refresh")
145
	mf.refresh(ctx)
146
	mf.refreshed.Store(true)
147
}
148
149
func isSameDay(a, b time.Time) bool {
150
	ay, am, ad := a.Date()
151
	by, bm, bd := b.Date()
152
	return ay == by && am == bm && ad == bd
153
}
154
155
func (mf *musicfeed) refresh(ctx context.Context) {
156
	type artistResult struct {
157
		releases []release
158
	}
159
160
	var mu sync.Mutex
161
	var all []release
162
	var wg sync.WaitGroup
163
	sem := make(chan struct{}, 5)
164
165
	for _, raw := range mf.artists {
166
		raw := raw
167
		wg.Add(1)
168
		sem <- struct{}{}
169
170
		go func() {
171
			defer wg.Done()
172
			defer func() { <-sem }()
173
174
			entry := parseArtistEntry(raw)
175
			mbid, label := mf.resolveArtist(ctx, entry)
176
			if mbid == "" {
177
				mf.logger.Warn("could not resolve artist, skipping", "entry", raw)
178
				return
179
			}
180
181
			mbReleases, err := mf.api.fetchReleases(ctx, mbid)
182
			if err != nil {
183
				mf.logger.Warn("failed to fetch releases", "artist", label, "err", err)
184
				return
185
			}
186
187
			var artistReleases []release
188
			for _, r := range mbReleases {
189
				if r.Date == "" {
190
					continue
191
				}
192
				date := parseMBDate(r.Date)
193
				if date.IsZero() {
194
					continue
195
				}
196
				if time.Since(date) > mf.maxAge || date.After(time.Now()) {
197
					continue
198
				}
199
				artistName := ""
200
				if len(r.ArtistCredit) > 0 {
201
					artistName = r.ArtistCredit[0].Name
202
				}
203
				artistReleases = append(artistReleases, release{
204
					id:             r.ID,
205
					releaseGroupID: r.ReleaseGroup.ID,
206
					title:          r.Title,
207
					date:           date,
208
					releaseType:    r.ReleaseGroup.PrimaryType,
209
					artistName:     artistName,
210
					label:          label,
211
					hasArtwork:     r.CoverArtArchive.Artwork,
212
				})
213
			}
214
215
			mu.Lock()
216
			all = append(all, artistReleases...)
217
			mu.Unlock()
218
		}()
219
	}
220
221
	wg.Wait()
222
223
	all = dedupeByReleaseGroup(all)
224
225
	sort.Slice(all, func(i, j int) bool {
226
		return all[i].date.After(all[j].date)
227
	})
228
229
	feed := generateFeed(all)
230
	bytes, err := feed.Bytes()
231
	if err != nil {
232
		mf.logger.Error("failed to serialize feed", "err", err)
233
		return
234
	}
235
236
	if err := mf.bucket.Set([]byte("feed"), bytes); err != nil {
237
		mf.logger.Error("failed to cache feed", "err", err)
238
	}
239
240
	var ts [8]byte
241
	binary.BigEndian.PutUint64(ts[:], uint64(time.Now().Unix()))
242
	if err := mf.bucket.Set([]byte("refreshed_at"), ts[:]); err != nil {
243
		mf.logger.Error("failed to save refresh timestamp", "err", err)
244
	}
245
246
	mf.logger.Info("music feed refreshed", "releases", len(all))
247
}
248
249
func (mf *musicfeed) resolveArtist(ctx context.Context, entry artistEntry) (string, string) {
250
	if entry.mbid != "" {
251
		return entry.mbid, entry.label
252
	}
253
254
	cached, err := mf.bucket.Get([]byte("mapping:" + entry.label))
255
	if err == nil && cached != nil {
256
		return string(cached), entry.label
257
	}
258
259
	if isMBID(entry.label) {
260
		name, err := mf.api.fetchArtist(ctx, entry.label)
261
		if err != nil {
262
			mf.logger.Warn("failed to fetch artist name", "mbid", entry.label, "err", err)
263
			return entry.label, entry.label
264
		}
265
		if err := mf.bucket.Set([]byte("mapping:"+name), []byte(entry.label)); err != nil {
266
			mf.logger.Warn("failed to cache artist mapping", "err", err)
267
		}
268
		return entry.label, name
269
	}
270
271
	mbid, name, err := mf.api.searchArtist(ctx, entry.label)
272
	if err != nil {
273
		mf.logger.Warn("failed to search artist", "label", entry.label, "err", err)
274
		return "", entry.label
275
	}
276
277
	if err := mf.bucket.Set([]byte("mapping:"+entry.label), []byte(mbid)); err != nil {
278
		mf.logger.Warn("failed to cache artist mapping", "err", err)
279
	}
280
281
	return mbid, name
282
}
283
284
func parseArtistEntry(raw string) artistEntry {
285
	label, mbid, found := strings.Cut(raw, "::")
286
	if found {
287
		return artistEntry{label: strings.TrimSpace(label), mbid: strings.TrimSpace(mbid)}
288
	}
289
	return artistEntry{label: strings.TrimSpace(raw)}
290
}
291
292
func isMBID(s string) bool {
293
	if len(s) != 36 {
294
		return false
295
	}
296
	for i, c := range s {
297
		switch i {
298
		case 8, 13, 18, 23:
299
			if c != '-' {
300
				return false
301
			}
302
		default:
303
			if (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F') {
304
				return false
305
			}
306
		}
307
	}
308
	return true
309
}
310
311
func dedupeByReleaseGroup(releases []release) []release {
312
	seen := make(map[string]int)
313
	var out []release
314
	for _, r := range releases {
315
		if r.releaseGroupID == "" {
316
			out = append(out, r)
317
			continue
318
		}
319
		if idx, ok := seen[r.releaseGroupID]; ok {
320
			if r.hasArtwork && !out[idx].hasArtwork {
321
				out[idx] = r
322
			}
323
			continue
324
		}
325
		seen[r.releaseGroupID] = len(out)
326
		out = append(out, r)
327
	}
328
	return out
329
}
330
331
func parseMBDate(s string) time.Time {
332
	formats := []string{"2006-01-02", "2006-01", "2006"}
333
	for _, f := range formats {
334
		if t, err := time.Parse(f, s); err == nil {
335
			return t
336
		}
337
	}
338
	return time.Time{}
339
}
340
341
func generateFeed(releases []release) *atom.Feed {
342
	feed := atom.NewFeed("New Music Releases", "musicfeed")
343
	for _, r := range releases {
344
		displayName := r.label
345
		if displayName == "" {
346
			displayName = r.artistName
347
		}
348
349
		links := []atom.Link{
350
			{
351
				Rel:  "alternate",
352
				Href: fmt.Sprintf("https://musicbrainz.org/release/%s", r.id),
353
			},
354
		}
355
356
		content, contentType := releaseContent(r, displayName)
357
358
		if r.hasArtwork {
359
			links = append(links, atom.Link{
360
				Rel:  "enclosure",
361
				Type: "image/jpeg",
362
				Href: fmt.Sprintf("%s/release/%s/front-250.jpg", caaBaseURL, r.id),
363
			})
364
		}
365
366
		releaseType := strings.TrimSpace(r.releaseType)
367
		title := fmt.Sprintf("%s — %s", displayName, r.title)
368
		if releaseType != "" {
369
			title += fmt.Sprintf(" (%s)", releaseType)
370
		}
371
372
		feed.Add(&atom.Entry{
373
			ID:      r.id,
374
			Title:   title,
375
			Content: atom.NewText(content, contentType),
376
			Updated: atom.Time(r.date),
377
			Link:    links,
378
		})
379
	}
380
	return feed
381
}
382
383
func releaseContent(r release, displayName string) (string, string) {
384
	if !r.hasArtwork {
385
		releaseType := strings.TrimSpace(r.releaseType)
386
		if releaseType != "" {
387
			return fmt.Sprintf("%s by %s (%s)", r.title, displayName, releaseType), ""
388
		}
389
		return fmt.Sprintf("%s by %s", r.title, displayName), ""
390
	}
391
392
	imageURL := fmt.Sprintf("%s/release/%s/front-250.jpg", caaBaseURL, r.id)
393
	parts := make([]string, 0, 4)
394
	parts = append(parts, "<body>")
395
396
	releaseType := strings.TrimSpace(r.releaseType)
397
	var text string
398
	if releaseType != "" {
399
		text = fmt.Sprintf("%s by %s (%s)", r.title, displayName, releaseType)
400
	} else {
401
		text = fmt.Sprintf("%s by %s", r.title, displayName)
402
	}
403
	parts = append(parts, "<p>"+html.EscapeString(text)+"</p>")
404
	parts = append(parts,
405
		fmt.Sprintf(`<p><img src="%s" alt="%s"/></p>`, html.EscapeString(imageURL), html.EscapeString(r.title)))
406
	parts = append(parts, "</body>")
407
408
	return strings.Join(parts, ""), "xhtml"
409
}