all repos

rss-tools @ 7235086

get rss feed from sources that(i need and) dont provide one
6 files changed, 283 insertions(+), 7 deletions(-)
telegram: turn links into links; return youtube links in youtube official feed format
Author: Oleksandr Smirnov olexsmir@gmail.com
Committed at: 2026-04-24 16:50:21 +0300
Authored at: 2026-04-24 16:46:10 +0300
Change ID: vxuvoxrzyntqrrusntxxwsypxvxkvypu
Parent: 6bffb9f
M app/atom.go
···
        24
        24
         	Title   string      `xml:"title"`

      
        25
        25
         	ID      string      `xml:"id"`

      
        26
        26
         	Updated string      `xml:"updated"`

      
        
        27
        +	Links   []AtomLink  `xml:"link,omitempty"`

      
        27
        28
         	Content AtomContent `xml:"content"`

      
        28
        29
         }

      
        29
        30
         

      ···
        33
        34
         	Value   string   `xml:",chardata"`

      
        34
        35
         }

      
        35
        36
         

      
        
        37
        +type AtomLink struct {

      
        
        38
        +	Rel  string `xml:"rel,attr,omitempty"`

      
        
        39
        +	Type string `xml:"type,attr,omitempty"`

      
        
        40
        +	Href string `xml:"href,attr"`

      
        
        41
        +}

      
        
        42
        +

      
        36
        43
         type FeedEntry struct {

      
        37
        44
         	Title       string

      
        38
        45
         	ID          string

      
        
        46
        +	Links       []FeedLink

      
        39
        47
         	Content     string

      
        40
        48
         	ContentType string // "text" or "html", defaults to "text"

      
        41
        49
         	Updated     time.Time

      
        42
        50
         }

      
        43
        51
         

      
        
        52
        +type FeedLink struct {

      
        
        53
        +	Rel  string

      
        
        54
        +	Type string

      
        
        55
        +	Href string

      
        
        56
        +}

      
        
        57
        +

      
        44
        58
         type FeedBuilder struct{ f AtomFeed }

      
        45
        59
         

      
        46
        60
         func NewFeed(title, id string) *FeedBuilder {

      ···
        78
        92
         		contentType = "text"

      
        79
        93
         	}

      
        80
        94
         

      
        
        95
        +	links := make([]AtomLink, 0, len(entry.Links))

      
        
        96
        +	for _, link := range entry.Links {

      
        
        97
        +		if link.Href == "" {

      
        
        98
        +			continue

      
        
        99
        +		}

      
        
        100
        +		links = append(links, AtomLink(link))

      
        
        101
        +	}

      
        
        102
        +

      
        81
        103
         	f.f.Entries = append(f.f.Entries, AtomEntry{

      
        82
        104
         		Title:   entry.Title,

      
        83
        105
         		ID:      entry.ID,

      
        84
        106
         		Updated: entry.Updated.Format(time.RFC3339),

      
        
        107
        +		Links:   links,

      
        85
        108
         		Content: AtomContent{

      
        86
        109
         			Type:  contentType,

      
        87
        110
         			Value: entry.Content,

      
M app/atom_test.go
···
        106
        106
         	is.Equal(t, htmlContent, entry.Content.Value)

      
        107
        107
         }

      
        108
        108
         

      
        
        109
        +func TestFeedEntryLinks(t *testing.T) {

      
        
        110
        +	feed := NewFeed("test", "feed-id").

      
        
        111
        +		Add(FeedEntry{

      
        
        112
        +			Title:   "entry",

      
        
        113
        +			Content: "hello",

      
        
        114
        +			Links: []FeedLink{

      
        
        115
        +				{Rel: "alternate", Type: "text/html", Href: "https://example.com/item"},

      
        
        116
        +			},

      
        
        117
        +			Updated: time.Date(2026, 4, 20, 12, 0, 0, 0, time.UTC),

      
        
        118
        +		})

      
        
        119
        +

      
        
        120
        +	raw, err := feed.Bytes()

      
        
        121
        +	is.Err(t, err, nil)

      
        
        122
        +	if !strings.Contains(string(raw), `<link rel="alternate" type="text/html" href="https://example.com/item"></link>`) {

      
        
        123
        +		t.Fatalf("expected link element in serialized feed")

      
        
        124
        +	}

      
        
        125
        +

      
        
        126
        +	var parsed AtomFeed

      
        
        127
        +	is.Err(t, xml.Unmarshal(raw, &parsed), nil)

      
        
        128
        +	is.Equal(t, 1, len(parsed.Entries))

      
        
        129
        +	is.Equal(t, 1, len(parsed.Entries[0].Links))

      
        
        130
        +	is.Equal(t, "https://example.com/item", parsed.Entries[0].Links[0].Href)

      
        
        131
        +}

      
        
        132
        +

      
        109
        133
         func TestFeedMultipleEntriesWithMixedContentTypes(t *testing.T) {

      
        110
        134
         	updated := time.Date(2026, 4, 20, 12, 0, 0, 0, time.UTC)

      
        111
        135
         	feed := NewFeed("test", "feed-id").

      
A sources/telegram/links.go
···
        
        1
        +package telegram

      
        
        2
        +

      
        
        3
        +import (

      
        
        4
        +	"fmt"

      
        
        5
        +	"html"

      
        
        6
        +	"net/url"

      
        
        7
        +	"regexp"

      
        
        8
        +	"strings"

      
        
        9
        +

      
        
        10
        +	"olexsmir.xyz/rss-tools/app"

      
        
        11
        +)

      
        
        12
        +

      
        
        13
        +var (

      
        
        14
        +	linkRe          = regexp.MustCompile(`https?://[^\s<>"']+`)

      
        
        15
        +	youtubeIDRe     = regexp.MustCompile(`^[A-Za-z0-9_-]{11}$`)

      
        
        16
        +	trailingPunctRe = regexp.MustCompile(`[.,!?:;)]+$`)

      
        
        17
        +)

      
        
        18
        +

      
        
        19
        +type foundLink struct {

      
        
        20
        +	start int

      
        
        21
        +	end   int

      
        
        22
        +	raw   string

      
        
        23
        +}

      
        
        24
        +

      
        
        25
        +func findLinks(text string) []foundLink {

      
        
        26
        +	indexes := linkRe.FindAllStringIndex(text, -1)

      
        
        27
        +	links := make([]foundLink, 0, len(indexes))

      
        
        28
        +	for _, idx := range indexes {

      
        
        29
        +		start, end := idx[0], idx[1]

      
        
        30
        +		candidate := text[start:end]

      
        
        31
        +		trimmed := trailingPunctRe.ReplaceAllString(candidate, "")

      
        
        32
        +		if trimmed == "" {

      
        
        33
        +			continue

      
        
        34
        +		}

      
        
        35
        +		trimmedEnd := start + len(trimmed)

      
        
        36
        +		if !isHTTPURL(trimmed) {

      
        
        37
        +			continue

      
        
        38
        +		}

      
        
        39
        +		links = append(links, foundLink{

      
        
        40
        +			start: start,

      
        
        41
        +			end:   trimmedEnd,

      
        
        42
        +			raw:   trimmed,

      
        
        43
        +		})

      
        
        44
        +	}

      
        
        45
        +	return links

      
        
        46
        +}

      
        
        47
        +

      
        
        48
        +func isHTTPURL(raw string) bool {

      
        
        49
        +	u, err := url.Parse(raw)

      
        
        50
        +	if err != nil || u.Host == "" {

      
        
        51
        +		return false

      
        
        52
        +	}

      
        
        53
        +	return u.Scheme == "http" || u.Scheme == "https"

      
        
        54
        +}

      
        
        55
        +

      
        
        56
        +func linkifyMessageText(text string) (string, []string) {

      
        
        57
        +	links := findLinks(text)

      
        
        58
        +	if len(links) == 0 {

      
        
        59
        +		return html.EscapeString(text), nil

      
        
        60
        +	}

      
        
        61
        +

      
        
        62
        +	var b strings.Builder

      
        
        63
        +	urls := make([]string, 0, len(links))

      
        
        64
        +	last := 0

      
        
        65
        +	for _, l := range links {

      
        
        66
        +		if l.start < last {

      
        
        67
        +			continue

      
        
        68
        +		}

      
        
        69
        +		b.WriteString(html.EscapeString(text[last:l.start]))

      
        
        70
        +		escaped := html.EscapeString(l.raw)

      
        
        71
        +		fmt.Fprintf(&b, `<a href="%s">%s</a>`, escaped, escaped)

      
        
        72
        +		urls = append(urls, l.raw)

      
        
        73
        +		last = l.end

      
        
        74
        +	}

      
        
        75
        +	b.WriteString(html.EscapeString(text[last:]))

      
        
        76
        +	return b.String(), urls

      
        
        77
        +}

      
        
        78
        +

      
        
        79
        +func messageLinks(text string) []string {

      
        
        80
        +	links := findLinks(text)

      
        
        81
        +	out := make([]string, 0, len(links))

      
        
        82
        +	seen := make(map[string]struct{}, len(links))

      
        
        83
        +	for _, link := range links {

      
        
        84
        +		if _, ok := seen[link.raw]; ok {

      
        
        85
        +			continue

      
        
        86
        +		}

      
        
        87
        +		seen[link.raw] = struct{}{}

      
        
        88
        +		out = append(out, link.raw)

      
        
        89
        +	}

      
        
        90
        +	return out

      
        
        91
        +}

      
        
        92
        +

      
        
        93
        +func feedLinks(urls []string) []app.FeedLink {

      
        
        94
        +	links := make([]app.FeedLink, 0, len(urls))

      
        
        95
        +	for _, u := range urls {

      
        
        96
        +		links = append(links, app.FeedLink{

      
        
        97
        +			Rel:  "alternate",

      
        
        98
        +			Type: "text/html",

      
        
        99
        +			Href: u,

      
        
        100
        +		})

      
        
        101
        +	}

      
        
        102
        +	return links

      
        
        103
        +}

      
        
        104
        +

      
        
        105
        +func youtubeCanonicalLink(raw string) (string, string, bool) {

      
        
        106
        +	u, err := url.Parse(raw)

      
        
        107
        +	if err != nil {

      
        
        108
        +		return "", "", false

      
        
        109
        +	}

      
        
        110
        +	host := strings.ToLower(u.Hostname())

      
        
        111
        +	host = strings.TrimPrefix(host, "www.")

      
        
        112
        +	host = strings.TrimPrefix(host, "m.")

      
        
        113
        +

      
        
        114
        +	videoID := ""

      
        
        115
        +	switch host {

      
        
        116
        +	case "youtube.com", "youtube-nocookie.com":

      
        
        117
        +		path := strings.TrimSuffix(u.Path, "/")

      
        
        118
        +		switch path {

      
        
        119
        +		case "/watch":

      
        
        120
        +			videoID = u.Query().Get("v")

      
        
        121
        +		default:

      
        
        122
        +			if afterShort, okShort := strings.CutPrefix(path, "/shorts/"); okShort {

      
        
        123
        +				videoID = afterShort

      
        
        124
        +			} else if afterLive, okLive := strings.CutPrefix(path, "/live/"); okLive {

      
        
        125
        +				videoID = afterLive

      
        
        126
        +			}

      
        
        127
        +		}

      
        
        128
        +	case "youtu.be":

      
        
        129
        +		videoID = strings.Trim(u.Path, "/")

      
        
        130
        +	default:

      
        
        131
        +		return "", "", false

      
        
        132
        +	}

      
        
        133
        +

      
        
        134
        +	if !youtubeIDRe.MatchString(videoID) {

      
        
        135
        +		return "", "", false

      
        
        136
        +	}

      
        
        137
        +

      
        
        138
        +	canonical := "https://www.youtube.com/watch?v=" + videoID

      
        
        139
        +	return canonical, videoID, true

      
        
        140
        +}

      
        
        141
        +

      
        
        142
        +func normalizeLinks(rawLinks []string) []string {

      
        
        143
        +	out := make([]string, 0, len(rawLinks))

      
        
        144
        +	seen := make(map[string]struct{}, len(rawLinks))

      
        
        145
        +	for _, raw := range rawLinks {

      
        
        146
        +		normalized := raw

      
        
        147
        +		if canonical, _, ok := youtubeCanonicalLink(raw); ok {

      
        
        148
        +			normalized = canonical

      
        
        149
        +		}

      
        
        150
        +		if _, ok := seen[normalized]; ok {

      
        
        151
        +			continue

      
        
        152
        +		}

      
        
        153
        +		seen[normalized] = struct{}{}

      
        
        154
        +		out = append(out, normalized)

      
        
        155
        +	}

      
        
        156
        +	return out

      
        
        157
        +}

      
        
        158
        +

      
        
        159
        +func firstYouTubeVideoID(urls []string) (string, bool) {

      
        
        160
        +	for _, u := range urls {

      
        
        161
        +		_, videoID, ok := youtubeCanonicalLink(u)

      
        
        162
        +		if ok {

      
        
        163
        +			return videoID, true

      
        
        164
        +		}

      
        
        165
        +	}

      
        
        166
        +	return "", false

      
        
        167
        +}

      
M sources/telegram/telegram.go
···
        6
        6
         	"encoding/binary"

      
        7
        7
         	"encoding/gob"

      
        8
        8
         	"fmt"

      
        9
        
        -	"html"

      
        10
        9
         	"log/slog"

      
        11
        10
         	"net/http"

      
        12
        11
         	"strings"

      ···
        156
        155
         func feedEntryFromMessage(m *Message) app.FeedEntry {

      
        157
        156
         	updated := time.Unix(m.Date, 0)

      
        158
        157
         	text := messageText(m)

      
        
        158
        +	normalizedLinks := normalizeLinks(messageLinks(text))

      
        
        159
        +	entryID := fmt.Sprintf("telegram-%d", m.MessageID)

      
        
        160
        +	if videoID, ok := firstYouTubeVideoID(normalizedLinks); ok {

      
        
        161
        +		entryID = "yt:video:" + videoID

      
        
        162
        +	}

      
        
        163
        +

      
        159
        164
         	if m.PhotoBase64 == "" {

      
        160
        165
         		title := text

      
        161
        166
         		if len(title) > 64 {

      
        162
        167
         			title = title[:64] + "..."

      
        163
        168
         		}

      
        
        169
        +

      
        
        170
        +		content := text

      
        
        171
        +		contentType := ""

      
        
        172
        +		if len(normalizedLinks) > 0 {

      
        
        173
        +			content, _ = linkifyMessageText(text)

      
        
        174
        +			contentType = "html"

      
        
        175
        +		}

      
        
        176
        +

      
        164
        177
         		return app.FeedEntry{

      
        165
        
        -			Title:   title,

      
        166
        
        -			ID:      fmt.Sprintf("telegram-%d", m.MessageID),

      
        167
        
        -			Content: text,

      
        168
        
        -			Updated: updated,

      
        
        178
        +			Title:       title,

      
        
        179
        +			ID:          entryID,

      
        
        180
        +			Links:       feedLinks(normalizedLinks),

      
        
        181
        +			Content:     content,

      
        
        182
        +			Updated:     updated,

      
        
        183
        +			ContentType: contentType,

      
        169
        184
         		}

      
        170
        185
         	}

      
        171
        186
         

      
        172
        187
         	parts := make([]string, 0, 2)

      
        173
        188
         	if t := strings.TrimSpace(text); t != "" {

      
        174
        
        -		parts = append(parts, "<p>"+html.EscapeString(t)+"</p>")

      
        
        189
        +		linkified, _ := linkifyMessageText(t)

      
        
        190
        +		parts = append(parts, "<p>"+linkified+"</p>")

      
        175
        191
         	}

      
        176
        192
         	mimeType := m.PhotoMIMEType

      
        177
        193
         	if mimeType == "" {

      ···
        181
        197
         

      
        182
        198
         	return app.FeedEntry{

      
        183
        199
         		Title:       fmt.Sprintf("🖼️ [%s]", updated.Format("2006-01-02")),

      
        184
        
        -		ID:          fmt.Sprintf("telegram-%d", m.MessageID),

      
        
        200
        +		ID:          entryID,

      
        
        201
        +		Links:       feedLinks(normalizedLinks),

      
        185
        202
         		Content:     strings.Join(parts, ""),

      
        186
        203
         		ContentType: "html",

      
        187
        204
         		Updated:     updated,

      
M sources/telegram/telegram_test.go
···
        40
        40
         	is.Equal(t, "", entry.ContentType)

      
        41
        41
         	is.Equal(t, "plain text", entry.Content)

      
        42
        42
         }

      
        
        43
        +

      
        
        44
        +func TestFeedEntryFromMessageLinkifiesAndAddsAtomLinks(t *testing.T) {

      
        
        45
        +	msg := &Message{

      
        
        46
        +		MessageID: 15,

      
        
        47
        +		Text:      "watch https://example.com and https://youtu.be/dQw4w9WgXcQ.",

      
        
        48
        +		Date:      time.Date(2026, 4, 23, 11, 0, 0, 0, time.UTC).Unix(),

      
        
        49
        +	}

      
        
        50
        +

      
        
        51
        +	entry := feedEntryFromMessage(msg)

      
        
        52
        +	is.Equal(t, "html", entry.ContentType)

      
        
        53
        +	if !strings.Contains(entry.Content, `<a href="https://example.com">https://example.com</a>`) {

      
        
        54
        +		t.Fatalf("expected generic link in content: %s", entry.Content)

      
        
        55
        +	}

      
        
        56
        +	if !strings.Contains(entry.Content, `<a href="https://youtu.be/dQw4w9WgXcQ">https://youtu.be/dQw4w9WgXcQ</a>`) {

      
        
        57
        +		t.Fatalf("expected youtube link in content: %s", entry.Content)

      
        
        58
        +	}

      
        
        59
        +

      
        
        60
        +	is.Equal(t, 2, len(entry.Links))

      
        
        61
        +	is.Equal(t, "https://example.com", entry.Links[0].Href)

      
        
        62
        +	is.Equal(t, "https://www.youtube.com/watch?v=dQw4w9WgXcQ", entry.Links[1].Href)

      
        
        63
        +	is.Equal(t, "yt:video:dQw4w9WgXcQ", entry.ID)

      
        
        64
        +}