rss-tools/sources/musicfeed/musicfeed.go (view raw)
Oleksandr Smirnov
Oleksandr Smirnov
olexsmir@gmail.com only refresh music cache once a week, 9 days ago
olexsmir@gmail.com only refresh music cache once a week, 9 days ago
| 1 | package musicfeed |
| 2 | |
| 3 | import ( |
| 4 | "context" |
| 5 | "encoding/binary" |
| 6 | "fmt" |
| 7 | "html" |
| 8 | "log/slog" |
| 9 | "net/http" |
| 10 | "sort" |
| 11 | "strings" |
| 12 | "sync" |
| 13 | "sync/atomic" |
| 14 | "time" |
| 15 | |
| 16 | "olexsmir.xyz/rss-tools/app" |
| 17 | "olexsmir.xyz/rss-tools/app/atom" |
| 18 | ) |
| 19 | |
| 20 | type artistEntry struct { |
| 21 | label string |
| 22 | mbid string |
| 23 | } |
| 24 | |
| 25 | type release struct { |
| 26 | id string |
| 27 | releaseGroupID string |
| 28 | title string |
| 29 | date time.Time |
| 30 | releaseType string |
| 31 | artistName string |
| 32 | label string |
| 33 | hasArtwork bool |
| 34 | } |
| 35 | |
| 36 | type releaseFetcher interface { |
| 37 | searchArtist(ctx context.Context, name string) (string, string, error) |
| 38 | fetchArtist(ctx context.Context, mbid string) (string, error) |
| 39 | fetchReleases(ctx context.Context, mbid string) ([]mbRelease, error) |
| 40 | } |
| 41 | |
| 42 | type musicfeed struct { |
| 43 | bucket *app.Bucket |
| 44 | artists []string |
| 45 | api releaseFetcher |
| 46 | maxAge time.Duration |
| 47 | logger *slog.Logger |
| 48 | refreshMu sync.Mutex |
| 49 | refreshed atomic.Bool |
| 50 | } |
| 51 | |
| 52 | func Register(a *app.App) error { |
| 53 | if len(a.Config.MusicArtists) == 0 { |
| 54 | return nil |
| 55 | } |
| 56 | |
| 57 | bucket, err := a.Bucket("musicfeed") |
| 58 | if err != nil { |
| 59 | return err |
| 60 | } |
| 61 | |
| 62 | maxAge := time.Duration(a.Config.MusicMaxAgeDays) * 24 * time.Hour |
| 63 | if maxAge <= 0 { |
| 64 | maxAge = 30 * 24 * time.Hour |
| 65 | } |
| 66 | |
| 67 | mf := &musicfeed{ |
| 68 | bucket: bucket, |
| 69 | artists: a.Config.MusicArtists, |
| 70 | api: newMusicBrainzAPI(a.Client), |
| 71 | maxAge: maxAge, |
| 72 | logger: a.Logger, |
| 73 | } |
| 74 | |
| 75 | a.AddWorker(mf.worker) |
| 76 | a.Route("GET /music", mf.handleMusic) |
| 77 | |
| 78 | a.Logger.Info("musicfeed source registered") |
| 79 | return nil |
| 80 | } |
| 81 | |
| 82 | func (mf *musicfeed) handleMusic(w http.ResponseWriter, r *http.Request) { |
| 83 | if !mf.refreshed.Load() && (time.Now().Weekday() == time.Friday || mf.cacheMissing()) { |
| 84 | mf.refreshMu.Lock() |
| 85 | if !mf.refreshed.Load() { |
| 86 | mf.refresh(r.Context()) |
| 87 | mf.refreshed.Store(true) |
| 88 | } |
| 89 | mf.refreshMu.Unlock() |
| 90 | } |
| 91 | |
| 92 | cached, err := mf.bucket.Get([]byte("feed")) |
| 93 | if err != nil { |
| 94 | slog.Error("failed to read cached feed", "err", err) |
| 95 | http.Error(w, "Internal server error", http.StatusInternalServerError) |
| 96 | return |
| 97 | } |
| 98 | if cached == nil { |
| 99 | http.Error(w, "feed not yet available", http.StatusServiceUnavailable) |
| 100 | return |
| 101 | } |
| 102 | w.Header().Set("Content-Type", "application/atom+xml; charset=utf-8") |
| 103 | w.Write(cached) |
| 104 | } |
| 105 | |
| 106 | func (mf *musicfeed) worker(ctx context.Context) error { |
| 107 | mf.logger.Info("starting musicfeed worker") |
| 108 | |
| 109 | // Only refresh on Fridays — releases drop on Friday, so we fetch once weekly |
| 110 | // to avoid rate-limiting MusicBrainz. |
| 111 | if time.Now().Weekday() == time.Friday { |
| 112 | mf.maybeRefresh(ctx) |
| 113 | } |
| 114 | |
| 115 | for { |
| 116 | next := nextFridayRefresh(time.Now()) |
| 117 | dur := time.Until(next) |
| 118 | mf.logger.Info("next music feed refresh", "at", next.Format("2006-01-02 15:04"), "in", dur.Round(time.Second)) |
| 119 | |
| 120 | select { |
| 121 | case <-ctx.Done(): |
| 122 | return nil |
| 123 | case <-time.After(dur): |
| 124 | mf.maybeRefresh(ctx) |
| 125 | } |
| 126 | } |
| 127 | } |
| 128 | |
| 129 | func nextFridayRefresh(after time.Time) time.Time { |
| 130 | const targetHour = 19 |
| 131 | y, m, d := after.Date() |
| 132 | loc := after.Location() |
| 133 | |
| 134 | // If today is Friday before target hour, return today at target hour. |
| 135 | if after.Weekday() == time.Friday { |
| 136 | target := time.Date(y, m, d, targetHour, 0, 0, 0, loc) |
| 137 | if after.Before(target) { |
| 138 | return target |
| 139 | } |
| 140 | } |
| 141 | |
| 142 | // Otherwise advance to next Friday at target hour. |
| 143 | next := time.Date(y, m, d, 0, 0, 0, 0, loc).Add(24 * time.Hour) |
| 144 | for next.Weekday() != time.Friday { |
| 145 | next = next.Add(24 * time.Hour) |
| 146 | } |
| 147 | return time.Date(next.Year(), next.Month(), next.Day(), targetHour, 0, 0, 0, loc) |
| 148 | } |
| 149 | |
| 150 | func (mf *musicfeed) cacheMissing() bool { |
| 151 | _, err := mf.bucket.Get([]byte("feed")) |
| 152 | return err != nil |
| 153 | } |
| 154 | |
| 155 | func (mf *musicfeed) maybeRefresh(ctx context.Context) { |
| 156 | mf.refreshMu.Lock() |
| 157 | defer mf.refreshMu.Unlock() |
| 158 | |
| 159 | if mf.refreshed.Load() { |
| 160 | raw, err := mf.bucket.Get([]byte("refreshed_at")) |
| 161 | if err == nil && raw != nil { |
| 162 | lastRefresh := time.Unix(int64(binary.BigEndian.Uint64(raw)), 0) |
| 163 | if isSameDay(lastRefresh, time.Now()) { |
| 164 | return |
| 165 | } |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | mf.logger.Info("starting music feed refresh") |
| 170 | mf.refresh(ctx) |
| 171 | mf.refreshed.Store(true) |
| 172 | } |
| 173 | |
| 174 | func isSameDay(a, b time.Time) bool { |
| 175 | ay, am, ad := a.Date() |
| 176 | by, bm, bd := b.Date() |
| 177 | return ay == by && am == bm && ad == bd |
| 178 | } |
| 179 | |
| 180 | func (mf *musicfeed) refresh(ctx context.Context) { |
| 181 | type artistResult struct { |
| 182 | releases []release |
| 183 | } |
| 184 | |
| 185 | var mu sync.Mutex |
| 186 | var all []release |
| 187 | var wg sync.WaitGroup |
| 188 | sem := make(chan struct{}, 5) |
| 189 | |
| 190 | for _, raw := range mf.artists { |
| 191 | raw := raw |
| 192 | wg.Add(1) |
| 193 | sem <- struct{}{} |
| 194 | |
| 195 | go func() { |
| 196 | defer wg.Done() |
| 197 | defer func() { <-sem }() |
| 198 | |
| 199 | entry := parseArtistEntry(raw) |
| 200 | mbid, label := mf.resolveArtist(ctx, entry) |
| 201 | if mbid == "" { |
| 202 | mf.logger.Warn("could not resolve artist, skipping", "entry", raw) |
| 203 | return |
| 204 | } |
| 205 | |
| 206 | mbReleases, err := mf.api.fetchReleases(ctx, mbid) |
| 207 | if err != nil { |
| 208 | mf.logger.Warn("failed to fetch releases", "artist", label, "err", err) |
| 209 | return |
| 210 | } |
| 211 | |
| 212 | var artistReleases []release |
| 213 | for _, r := range mbReleases { |
| 214 | if r.Date == "" { |
| 215 | continue |
| 216 | } |
| 217 | date := parseMBDate(r.Date) |
| 218 | if date.IsZero() { |
| 219 | continue |
| 220 | } |
| 221 | if time.Since(date) > mf.maxAge || date.After(time.Now()) { |
| 222 | continue |
| 223 | } |
| 224 | artistName := "" |
| 225 | if len(r.ArtistCredit) > 0 { |
| 226 | artistName = r.ArtistCredit[0].Name |
| 227 | } |
| 228 | artistReleases = append(artistReleases, release{ |
| 229 | id: r.ID, |
| 230 | releaseGroupID: r.ReleaseGroup.ID, |
| 231 | title: r.Title, |
| 232 | date: date, |
| 233 | releaseType: r.ReleaseGroup.PrimaryType, |
| 234 | artistName: artistName, |
| 235 | label: label, |
| 236 | hasArtwork: r.CoverArtArchive.Artwork, |
| 237 | }) |
| 238 | } |
| 239 | |
| 240 | mu.Lock() |
| 241 | all = append(all, artistReleases...) |
| 242 | mu.Unlock() |
| 243 | }() |
| 244 | } |
| 245 | |
| 246 | wg.Wait() |
| 247 | |
| 248 | all = dedupeByReleaseGroup(all) |
| 249 | |
| 250 | sort.Slice(all, func(i, j int) bool { |
| 251 | return all[i].date.After(all[j].date) |
| 252 | }) |
| 253 | |
| 254 | feed := generateFeed(all) |
| 255 | bytes, err := feed.Bytes() |
| 256 | if err != nil { |
| 257 | mf.logger.Error("failed to serialize feed", "err", err) |
| 258 | return |
| 259 | } |
| 260 | |
| 261 | if err := mf.bucket.Set([]byte("feed"), bytes); err != nil { |
| 262 | mf.logger.Error("failed to cache feed", "err", err) |
| 263 | } |
| 264 | |
| 265 | var ts [8]byte |
| 266 | binary.BigEndian.PutUint64(ts[:], uint64(time.Now().Unix())) |
| 267 | if err := mf.bucket.Set([]byte("refreshed_at"), ts[:]); err != nil { |
| 268 | mf.logger.Error("failed to save refresh timestamp", "err", err) |
| 269 | } |
| 270 | |
| 271 | mf.logger.Info("music feed refreshed", "releases", len(all)) |
| 272 | } |
| 273 | |
| 274 | func (mf *musicfeed) resolveArtist(ctx context.Context, entry artistEntry) (string, string) { |
| 275 | if entry.mbid != "" { |
| 276 | return entry.mbid, entry.label |
| 277 | } |
| 278 | |
| 279 | cached, err := mf.bucket.Get([]byte("mapping:" + entry.label)) |
| 280 | if err == nil && cached != nil { |
| 281 | return string(cached), entry.label |
| 282 | } |
| 283 | |
| 284 | if isMBID(entry.label) { |
| 285 | name, err := mf.api.fetchArtist(ctx, entry.label) |
| 286 | if err != nil { |
| 287 | mf.logger.Warn("failed to fetch artist name", "mbid", entry.label, "err", err) |
| 288 | return entry.label, entry.label |
| 289 | } |
| 290 | if err := mf.bucket.Set([]byte("mapping:"+name), []byte(entry.label)); err != nil { |
| 291 | mf.logger.Warn("failed to cache artist mapping", "err", err) |
| 292 | } |
| 293 | return entry.label, name |
| 294 | } |
| 295 | |
| 296 | mbid, name, err := mf.api.searchArtist(ctx, entry.label) |
| 297 | if err != nil { |
| 298 | mf.logger.Warn("failed to search artist", "label", entry.label, "err", err) |
| 299 | return "", entry.label |
| 300 | } |
| 301 | |
| 302 | if err := mf.bucket.Set([]byte("mapping:"+entry.label), []byte(mbid)); err != nil { |
| 303 | mf.logger.Warn("failed to cache artist mapping", "err", err) |
| 304 | } |
| 305 | |
| 306 | return mbid, name |
| 307 | } |
| 308 | |
| 309 | func parseArtistEntry(raw string) artistEntry { |
| 310 | label, mbid, found := strings.Cut(raw, "::") |
| 311 | if found { |
| 312 | return artistEntry{label: strings.TrimSpace(label), mbid: strings.TrimSpace(mbid)} |
| 313 | } |
| 314 | return artistEntry{label: strings.TrimSpace(raw)} |
| 315 | } |
| 316 | |
| 317 | func isMBID(s string) bool { |
| 318 | if len(s) != 36 { |
| 319 | return false |
| 320 | } |
| 321 | for i, c := range s { |
| 322 | switch i { |
| 323 | case 8, 13, 18, 23: |
| 324 | if c != '-' { |
| 325 | return false |
| 326 | } |
| 327 | default: |
| 328 | if (c < '0' || c > '9') && (c < 'a' || c > 'f') && (c < 'A' || c > 'F') { |
| 329 | return false |
| 330 | } |
| 331 | } |
| 332 | } |
| 333 | return true |
| 334 | } |
| 335 | |
| 336 | func dedupeByReleaseGroup(releases []release) []release { |
| 337 | seen := make(map[string]int) |
| 338 | var out []release |
| 339 | for _, r := range releases { |
| 340 | if r.releaseGroupID == "" { |
| 341 | out = append(out, r) |
| 342 | continue |
| 343 | } |
| 344 | if idx, ok := seen[r.releaseGroupID]; ok { |
| 345 | if r.hasArtwork && !out[idx].hasArtwork { |
| 346 | out[idx] = r |
| 347 | } |
| 348 | continue |
| 349 | } |
| 350 | seen[r.releaseGroupID] = len(out) |
| 351 | out = append(out, r) |
| 352 | } |
| 353 | return out |
| 354 | } |
| 355 | |
| 356 | func parseMBDate(s string) time.Time { |
| 357 | formats := []string{"2006-01-02", "2006-01", "2006"} |
| 358 | for _, f := range formats { |
| 359 | if t, err := time.Parse(f, s); err == nil { |
| 360 | return t |
| 361 | } |
| 362 | } |
| 363 | return time.Time{} |
| 364 | } |
| 365 | |
| 366 | func generateFeed(releases []release) *atom.Feed { |
| 367 | feed := atom.NewFeed("New Music Releases", "musicfeed") |
| 368 | for _, r := range releases { |
| 369 | displayName := r.label |
| 370 | if displayName == "" { |
| 371 | displayName = r.artistName |
| 372 | } |
| 373 | |
| 374 | links := []atom.Link{ |
| 375 | { |
| 376 | Rel: "alternate", |
| 377 | Href: fmt.Sprintf("https://musicbrainz.org/release/%s", r.id), |
| 378 | }, |
| 379 | } |
| 380 | |
| 381 | content, contentType := releaseContent(r, displayName) |
| 382 | |
| 383 | if r.hasArtwork { |
| 384 | links = append(links, atom.Link{ |
| 385 | Rel: "enclosure", |
| 386 | Type: "image/jpeg", |
| 387 | Href: fmt.Sprintf("%s/release/%s/front-250.jpg", caaBaseURL, r.id), |
| 388 | }) |
| 389 | } |
| 390 | |
| 391 | releaseType := strings.TrimSpace(r.releaseType) |
| 392 | title := fmt.Sprintf("%s — %s", displayName, r.title) |
| 393 | if releaseType != "" { |
| 394 | title += fmt.Sprintf(" (%s)", releaseType) |
| 395 | } |
| 396 | |
| 397 | feed.Add(&atom.Entry{ |
| 398 | ID: r.id, |
| 399 | Title: title, |
| 400 | Content: atom.NewText(content, contentType), |
| 401 | Updated: atom.Time(r.date), |
| 402 | Link: links, |
| 403 | }) |
| 404 | } |
| 405 | return feed |
| 406 | } |
| 407 | |
| 408 | func releaseContent(r release, displayName string) (string, string) { |
| 409 | if !r.hasArtwork { |
| 410 | releaseType := strings.TrimSpace(r.releaseType) |
| 411 | if releaseType != "" { |
| 412 | return fmt.Sprintf("%s by %s (%s)", r.title, displayName, releaseType), "" |
| 413 | } |
| 414 | return fmt.Sprintf("%s by %s", r.title, displayName), "" |
| 415 | } |
| 416 | |
| 417 | imageURL := fmt.Sprintf("%s/release/%s/front-250.jpg", caaBaseURL, r.id) |
| 418 | parts := make([]string, 0, 4) |
| 419 | parts = append(parts, "<body>") |
| 420 | |
| 421 | releaseType := strings.TrimSpace(r.releaseType) |
| 422 | var text string |
| 423 | if releaseType != "" { |
| 424 | text = fmt.Sprintf("%s by %s (%s)", r.title, displayName, releaseType) |
| 425 | } else { |
| 426 | text = fmt.Sprintf("%s by %s", r.title, displayName) |
| 427 | } |
| 428 | parts = append(parts, "<p>"+html.EscapeString(text)+"</p>") |
| 429 | parts = append(parts, |
| 430 | fmt.Sprintf(`<p><img src="%s" alt="%s"/></p>`, html.EscapeString(imageURL), html.EscapeString(r.title))) |
| 431 | parts = append(parts, "</body>") |
| 432 | |
| 433 | return strings.Join(parts, ""), "xhtml" |
| 434 | } |