3 files changed,
75 insertions(+),
1 deletions(-)
Author:
Oleksandr Smirnov
olexsmir@gmail.com
Committed at:
2026-05-27 18:35:54 +0300
Authored at:
2026-05-27 18:06:28 +0300
Change ID:
uxwpuronyxxnonkkmouvlnsntoqtpuzp
Parent:
62a30de
M
app/app.go
··· 57 57 } 58 58 59 59 const ( 60 - defaultScraperUserAgent = "rss-tools/1.0" // todo: i wanna be a chrome 60 + defaultScraperUserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36" 61 61 defaultScraperAccept = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" 62 62 ) 63 63
M
sources/telegram/page_title.go
··· 2 2 3 3 import ( 4 4 "context" 5 + "encoding/json" 5 6 "fmt" 6 7 "io" 7 8 "net/http" 9 + "net/url" 8 10 "strings" 9 11 10 12 "github.com/PuerkitoBio/goquery" ··· 43 45 title = metaPageTitle(doc) 44 46 } 45 47 if !isMeaningfulPageTitle(title) { 48 + if videoID, _, ok := youtubeCanonicalLink(rawURL); ok { 49 + ytTitle, ytErr := fetchYouTubeVideoTitle(ctx, get, videoID) 50 + if ytErr == nil { 51 + return ytTitle, nil 52 + } 53 + } 46 54 return "", fmt.Errorf("page title is empty") 47 55 } 48 56 return title, nil ··· 68 76 } 69 77 } 70 78 return "" 79 +} 80 + 81 +type youtubeOEmbedResponse struct { 82 + Title string `json:"title"` 83 +} 84 + 85 +func fetchYouTubeVideoTitle(ctx context.Context, get func(context.Context, string) (*http.Response, error), videoID string) (string, error) { 86 + u := url.URL{ 87 + Scheme: "https", 88 + Host: "www.youtube.com", 89 + Path: "/oembed", 90 + } 91 + q := u.Query() 92 + q.Set("url", "https://www.youtube.com/watch?v="+videoID) 93 + q.Set("format", "json") 94 + u.RawQuery = q.Encode() 95 + 96 + resp, err := get(ctx, u.String()) 97 + if err != nil { 98 + return "", err 99 + } 100 + defer resp.Body.Close() 101 + 102 + if resp.StatusCode != http.StatusOK { 103 + return "", fmt.Errorf("oEmbed request failed with status %d", resp.StatusCode) 104 + } 105 + 106 + var oembed youtubeOEmbedResponse 107 + if err := json.NewDecoder(resp.Body).Decode(&oembed); err != nil { 108 + return "", err 109 + } 110 + 111 + title := normalizePageTitle(oembed.Title) 112 + if !isMeaningfulPageTitle(title) { 113 + return "", fmt.Errorf("oEmbed returned empty title") 114 + } 115 + 116 + return title, nil 71 117 } 72 118 73 119 func normalizePageTitle(raw string) string {
M
sources/telegram/page_title_test.go
··· 118 118 t.Fatalf("expected an error for placeholder title") 119 119 } 120 120 } 121 + 122 +func TestFetchPageTitleFallsBackToYouTubeOEmbed(t *testing.T) { 123 + calls := 0 124 + title, err := fetchPageTitle(context.Background(), func(_ context.Context, url string) (*http.Response, error) { 125 + calls++ 126 + if strings.Contains(url, "oembed") { 127 + return &http.Response{ 128 + StatusCode: http.StatusOK, 129 + Header: http.Header{ 130 + "Content-Type": []string{"application/json; charset=utf-8"}, 131 + }, 132 + Body: io.NopCloser(strings.NewReader(`{"title":"Rick Astley - Never Gonna Give You Up"}`)), 133 + }, nil 134 + } 135 + return &http.Response{ 136 + StatusCode: http.StatusOK, 137 + Header: http.Header{ 138 + "Content-Type": []string{"text/html; charset=utf-8"}, 139 + }, 140 + Body: io.NopCloser(strings.NewReader(`<html><head><title> - YouTube </title></head></html>`)), 141 + }, nil 142 + }, "https://www.youtube.com/watch?v=dQw4w9WgXcQ") 143 + if err != nil { 144 + t.Fatalf("unexpected error: %v", err) 145 + } 146 + is.Equal(t, "Rick Astley - Never Gonna Give You Up", title) 147 + is.Equal(t, 2, calls) 148 +}