all repos

rss-tools @ 65589de

get rss feed from sources that(i need and) dont provide one
3 files changed, 75 insertions(+), 1 deletions(-)
youtube:  fix title fetcher
Author: Oleksandr Smirnov olexsmir@gmail.com
Committed at: 2026-05-27 18:35:54 +0300
Authored at: 2026-05-27 18:06:28 +0300
Change ID: uxwpuronyxxnonkkmouvlnsntoqtpuzp
Parent: 62a30de
M app/app.go
···
        57
        57
         }

      
        58
        58
         

      
        59
        59
         const (

      
        60
        
        -	defaultScraperUserAgent = "rss-tools/1.0" // todo: i wanna be a chrome

      
        
        60
        +	defaultScraperUserAgent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"

      
        61
        61
         	defaultScraperAccept    = "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"

      
        62
        62
         )

      
        63
        63
         

      
M sources/telegram/page_title.go
···
        2
        2
         

      
        3
        3
         import (

      
        4
        4
         	"context"

      
        
        5
        +	"encoding/json"

      
        5
        6
         	"fmt"

      
        6
        7
         	"io"

      
        7
        8
         	"net/http"

      
        
        9
        +	"net/url"

      
        8
        10
         	"strings"

      
        9
        11
         

      
        10
        12
         	"github.com/PuerkitoBio/goquery"

      ···
        43
        45
         		title = metaPageTitle(doc)

      
        44
        46
         	}

      
        45
        47
         	if !isMeaningfulPageTitle(title) {

      
        
        48
        +		if videoID, _, ok := youtubeCanonicalLink(rawURL); ok {

      
        
        49
        +			ytTitle, ytErr := fetchYouTubeVideoTitle(ctx, get, videoID)

      
        
        50
        +			if ytErr == nil {

      
        
        51
        +				return ytTitle, nil

      
        
        52
        +			}

      
        
        53
        +		}

      
        46
        54
         		return "", fmt.Errorf("page title is empty")

      
        47
        55
         	}

      
        48
        56
         	return title, nil

      ···
        68
        76
         		}

      
        69
        77
         	}

      
        70
        78
         	return ""

      
        
        79
        +}

      
        
        80
        +

      
        
        81
        +type youtubeOEmbedResponse struct {

      
        
        82
        +	Title string `json:"title"`

      
        
        83
        +}

      
        
        84
        +

      
        
        85
        +func fetchYouTubeVideoTitle(ctx context.Context, get func(context.Context, string) (*http.Response, error), videoID string) (string, error) {

      
        
        86
        +	u := url.URL{

      
        
        87
        +		Scheme: "https",

      
        
        88
        +		Host:   "www.youtube.com",

      
        
        89
        +		Path:   "/oembed",

      
        
        90
        +	}

      
        
        91
        +	q := u.Query()

      
        
        92
        +	q.Set("url", "https://www.youtube.com/watch?v="+videoID)

      
        
        93
        +	q.Set("format", "json")

      
        
        94
        +	u.RawQuery = q.Encode()

      
        
        95
        +

      
        
        96
        +	resp, err := get(ctx, u.String())

      
        
        97
        +	if err != nil {

      
        
        98
        +		return "", err

      
        
        99
        +	}

      
        
        100
        +	defer resp.Body.Close()

      
        
        101
        +

      
        
        102
        +	if resp.StatusCode != http.StatusOK {

      
        
        103
        +		return "", fmt.Errorf("oEmbed request failed with status %d", resp.StatusCode)

      
        
        104
        +	}

      
        
        105
        +

      
        
        106
        +	var oembed youtubeOEmbedResponse

      
        
        107
        +	if err := json.NewDecoder(resp.Body).Decode(&oembed); err != nil {

      
        
        108
        +		return "", err

      
        
        109
        +	}

      
        
        110
        +

      
        
        111
        +	title := normalizePageTitle(oembed.Title)

      
        
        112
        +	if !isMeaningfulPageTitle(title) {

      
        
        113
        +		return "", fmt.Errorf("oEmbed returned empty title")

      
        
        114
        +	}

      
        
        115
        +

      
        
        116
        +	return title, nil

      
        71
        117
         }

      
        72
        118
         

      
        73
        119
         func normalizePageTitle(raw string) string {

      
M sources/telegram/page_title_test.go
···
        118
        118
         		t.Fatalf("expected an error for placeholder title")

      
        119
        119
         	}

      
        120
        120
         }

      
        
        121
        +

      
        
        122
        +func TestFetchPageTitleFallsBackToYouTubeOEmbed(t *testing.T) {

      
        
        123
        +	calls := 0

      
        
        124
        +	title, err := fetchPageTitle(context.Background(), func(_ context.Context, url string) (*http.Response, error) {

      
        
        125
        +		calls++

      
        
        126
        +		if strings.Contains(url, "oembed") {

      
        
        127
        +			return &http.Response{

      
        
        128
        +				StatusCode: http.StatusOK,

      
        
        129
        +				Header: http.Header{

      
        
        130
        +					"Content-Type": []string{"application/json; charset=utf-8"},

      
        
        131
        +				},

      
        
        132
        +				Body: io.NopCloser(strings.NewReader(`{"title":"Rick Astley - Never Gonna Give You Up"}`)),

      
        
        133
        +			}, nil

      
        
        134
        +		}

      
        
        135
        +		return &http.Response{

      
        
        136
        +			StatusCode: http.StatusOK,

      
        
        137
        +			Header: http.Header{

      
        
        138
        +				"Content-Type": []string{"text/html; charset=utf-8"},

      
        
        139
        +			},

      
        
        140
        +			Body: io.NopCloser(strings.NewReader(`<html><head><title> - YouTube </title></head></html>`)),

      
        
        141
        +		}, nil

      
        
        142
        +	}, "https://www.youtube.com/watch?v=dQw4w9WgXcQ")

      
        
        143
        +	if err != nil {

      
        
        144
        +		t.Fatalf("unexpected error: %v", err)

      
        
        145
        +	}

      
        
        146
        +	is.Equal(t, "Rick Astley - Never Gonna Give You Up", title)

      
        
        147
        +	is.Equal(t, 2, calls)

      
        
        148
        +}