package telegram import ( "context" "encoding/json" "fmt" "io" "net/http" "net/url" "strings" "github.com/PuerkitoBio/goquery" "golang.org/x/net/html/charset" ) const maxPageBytes = 2 << 20 // 2 MiB func fetchPageTitle(ctx context.Context, get func(context.Context, string) (*http.Response, error), rawURL string) (string, error) { if get == nil { return "", fmt.Errorf("missing page getter") } resp, err := get(ctx, rawURL) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode < http.StatusOK || resp.StatusCode >= http.StatusMultipleChoices { return "", fmt.Errorf("unexpected status code: %d", resp.StatusCode) } decoded, err := charset.NewReader(io.LimitReader(resp.Body, maxPageBytes), resp.Header.Get("Content-Type")) if err != nil { return "", err } doc, err := goquery.NewDocumentFromReader(decoded) if err != nil { return "", err } title := normalizePageTitle(doc.Find("title").First().Text()) if !isMeaningfulPageTitle(title) { title = metaPageTitle(doc) } if !isMeaningfulPageTitle(title) { if videoID, _, ok := youtubeCanonicalLink(rawURL); ok { ytTitle, ytErr := fetchYouTubeVideoTitle(ctx, get, videoID) if ytErr == nil { return ytTitle, nil } } return "", fmt.Errorf("page title is empty") } return title, nil } func metaPageTitle(doc *goquery.Document) string { selectors := []string{ `meta[property="og:title"]`, `meta[name="og:title"]`, `meta[property="twitter:title"]`, `meta[name="twitter:title"]`, `meta[itemprop="name"]`, } for _, selector := range selectors { content, ok := doc.Find(selector).First().Attr("content") if !ok { continue } title := normalizePageTitle(content) if isMeaningfulPageTitle(title) { return title } } return "" } type youtubeOEmbedResponse struct { Title string `json:"title"` } func fetchYouTubeVideoTitle(ctx context.Context, get func(context.Context, string) (*http.Response, error), videoID string) (string, error) { u := url.URL{ Scheme: "https", Host: "www.youtube.com", Path: "/oembed", } q := u.Query() q.Set("url", "https://www.youtube.com/watch?v="+videoID) q.Set("format", "json") u.RawQuery = q.Encode() resp, err := get(ctx, u.String()) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("oEmbed request failed with status %d", resp.StatusCode) } var oembed youtubeOEmbedResponse if err := json.NewDecoder(resp.Body).Decode(&oembed); err != nil { return "", err } title := normalizePageTitle(oembed.Title) if !isMeaningfulPageTitle(title) { return "", fmt.Errorf("oEmbed returned empty title") } return title, nil } func normalizePageTitle(raw string) string { return strings.Join(strings.Fields(raw), " ") } func isMeaningfulPageTitle(title string) bool { switch strings.ToLower(strings.TrimSpace(title)) { case "", "- youtube", "youtube": return false default: return true } }