6 files changed,
283 insertions(+),
7 deletions(-)
Author:
Oleksandr Smirnov
olexsmir@gmail.com
Committed at:
2026-04-24 16:50:21 +0300
Authored at:
2026-04-24 16:46:10 +0300
Change ID:
vxuvoxrzyntqrrusntxxwsypxvxkvypu
Parent:
6bffb9f
M
app/atom.go
··· 24 24 Title string `xml:"title"` 25 25 ID string `xml:"id"` 26 26 Updated string `xml:"updated"` 27 + Links []AtomLink `xml:"link,omitempty"` 27 28 Content AtomContent `xml:"content"` 28 29 } 29 30 ··· 33 34 Value string `xml:",chardata"` 34 35 } 35 36 37 +type AtomLink struct { 38 + Rel string `xml:"rel,attr,omitempty"` 39 + Type string `xml:"type,attr,omitempty"` 40 + Href string `xml:"href,attr"` 41 +} 42 + 36 43 type FeedEntry struct { 37 44 Title string 38 45 ID string 46 + Links []FeedLink 39 47 Content string 40 48 ContentType string // "text" or "html", defaults to "text" 41 49 Updated time.Time 42 50 } 43 51 52 +type FeedLink struct { 53 + Rel string 54 + Type string 55 + Href string 56 +} 57 + 44 58 type FeedBuilder struct{ f AtomFeed } 45 59 46 60 func NewFeed(title, id string) *FeedBuilder { ··· 78 92 contentType = "text" 79 93 } 80 94 95 + links := make([]AtomLink, 0, len(entry.Links)) 96 + for _, link := range entry.Links { 97 + if link.Href == "" { 98 + continue 99 + } 100 + links = append(links, AtomLink(link)) 101 + } 102 + 81 103 f.f.Entries = append(f.f.Entries, AtomEntry{ 82 104 Title: entry.Title, 83 105 ID: entry.ID, 84 106 Updated: entry.Updated.Format(time.RFC3339), 107 + Links: links, 85 108 Content: AtomContent{ 86 109 Type: contentType, 87 110 Value: entry.Content,
M
app/atom_test.go
··· 106 106 is.Equal(t, htmlContent, entry.Content.Value) 107 107 } 108 108 109 +func TestFeedEntryLinks(t *testing.T) { 110 + feed := NewFeed("test", "feed-id"). 111 + Add(FeedEntry{ 112 + Title: "entry", 113 + Content: "hello", 114 + Links: []FeedLink{ 115 + {Rel: "alternate", Type: "text/html", Href: "https://example.com/item"}, 116 + }, 117 + Updated: time.Date(2026, 4, 20, 12, 0, 0, 0, time.UTC), 118 + }) 119 + 120 + raw, err := feed.Bytes() 121 + is.Err(t, err, nil) 122 + if !strings.Contains(string(raw), `<link rel="alternate" type="text/html" href="https://example.com/item"></link>`) { 123 + t.Fatalf("expected link element in serialized feed") 124 + } 125 + 126 + var parsed AtomFeed 127 + is.Err(t, xml.Unmarshal(raw, &parsed), nil) 128 + is.Equal(t, 1, len(parsed.Entries)) 129 + is.Equal(t, 1, len(parsed.Entries[0].Links)) 130 + is.Equal(t, "https://example.com/item", parsed.Entries[0].Links[0].Href) 131 +} 132 + 109 133 func TestFeedMultipleEntriesWithMixedContentTypes(t *testing.T) { 110 134 updated := time.Date(2026, 4, 20, 12, 0, 0, 0, time.UTC) 111 135 feed := NewFeed("test", "feed-id").
A
sources/telegram/links.go
··· 1 +package telegram 2 + 3 +import ( 4 + "fmt" 5 + "html" 6 + "net/url" 7 + "regexp" 8 + "strings" 9 + 10 + "olexsmir.xyz/rss-tools/app" 11 +) 12 + 13 +var ( 14 + linkRe = regexp.MustCompile(`https?://[^\s<>"']+`) 15 + youtubeIDRe = regexp.MustCompile(`^[A-Za-z0-9_-]{11}$`) 16 + trailingPunctRe = regexp.MustCompile(`[.,!?:;)]+$`) 17 +) 18 + 19 +type foundLink struct { 20 + start int 21 + end int 22 + raw string 23 +} 24 + 25 +func findLinks(text string) []foundLink { 26 + indexes := linkRe.FindAllStringIndex(text, -1) 27 + links := make([]foundLink, 0, len(indexes)) 28 + for _, idx := range indexes { 29 + start, end := idx[0], idx[1] 30 + candidate := text[start:end] 31 + trimmed := trailingPunctRe.ReplaceAllString(candidate, "") 32 + if trimmed == "" { 33 + continue 34 + } 35 + trimmedEnd := start + len(trimmed) 36 + if !isHTTPURL(trimmed) { 37 + continue 38 + } 39 + links = append(links, foundLink{ 40 + start: start, 41 + end: trimmedEnd, 42 + raw: trimmed, 43 + }) 44 + } 45 + return links 46 +} 47 + 48 +func isHTTPURL(raw string) bool { 49 + u, err := url.Parse(raw) 50 + if err != nil || u.Host == "" { 51 + return false 52 + } 53 + return u.Scheme == "http" || u.Scheme == "https" 54 +} 55 + 56 +func linkifyMessageText(text string) (string, []string) { 57 + links := findLinks(text) 58 + if len(links) == 0 { 59 + return html.EscapeString(text), nil 60 + } 61 + 62 + var b strings.Builder 63 + urls := make([]string, 0, len(links)) 64 + last := 0 65 + for _, l := range links { 66 + if l.start < last { 67 + continue 68 + } 69 + b.WriteString(html.EscapeString(text[last:l.start])) 70 + escaped := html.EscapeString(l.raw) 71 + fmt.Fprintf(&b, `<a href="%s">%s</a>`, escaped, escaped) 72 + urls = append(urls, l.raw) 73 + last = l.end 74 + } 75 + b.WriteString(html.EscapeString(text[last:])) 76 + return b.String(), urls 77 +} 78 + 79 +func messageLinks(text string) []string { 80 + links := findLinks(text) 81 + out := make([]string, 0, len(links)) 82 + seen := make(map[string]struct{}, len(links)) 83 + for _, link := range links { 84 + if _, ok := seen[link.raw]; ok { 85 + continue 86 + } 87 + seen[link.raw] = struct{}{} 88 + out = append(out, link.raw) 89 + } 90 + return out 91 +} 92 + 93 +func feedLinks(urls []string) []app.FeedLink { 94 + links := make([]app.FeedLink, 0, len(urls)) 95 + for _, u := range urls { 96 + links = append(links, app.FeedLink{ 97 + Rel: "alternate", 98 + Type: "text/html", 99 + Href: u, 100 + }) 101 + } 102 + return links 103 +} 104 + 105 +func youtubeCanonicalLink(raw string) (string, string, bool) { 106 + u, err := url.Parse(raw) 107 + if err != nil { 108 + return "", "", false 109 + } 110 + host := strings.ToLower(u.Hostname()) 111 + host = strings.TrimPrefix(host, "www.") 112 + host = strings.TrimPrefix(host, "m.") 113 + 114 + videoID := "" 115 + switch host { 116 + case "youtube.com", "youtube-nocookie.com": 117 + path := strings.TrimSuffix(u.Path, "/") 118 + switch path { 119 + case "/watch": 120 + videoID = u.Query().Get("v") 121 + default: 122 + if afterShort, okShort := strings.CutPrefix(path, "/shorts/"); okShort { 123 + videoID = afterShort 124 + } else if afterLive, okLive := strings.CutPrefix(path, "/live/"); okLive { 125 + videoID = afterLive 126 + } 127 + } 128 + case "youtu.be": 129 + videoID = strings.Trim(u.Path, "/") 130 + default: 131 + return "", "", false 132 + } 133 + 134 + if !youtubeIDRe.MatchString(videoID) { 135 + return "", "", false 136 + } 137 + 138 + canonical := "https://www.youtube.com/watch?v=" + videoID 139 + return canonical, videoID, true 140 +} 141 + 142 +func normalizeLinks(rawLinks []string) []string { 143 + out := make([]string, 0, len(rawLinks)) 144 + seen := make(map[string]struct{}, len(rawLinks)) 145 + for _, raw := range rawLinks { 146 + normalized := raw 147 + if canonical, _, ok := youtubeCanonicalLink(raw); ok { 148 + normalized = canonical 149 + } 150 + if _, ok := seen[normalized]; ok { 151 + continue 152 + } 153 + seen[normalized] = struct{}{} 154 + out = append(out, normalized) 155 + } 156 + return out 157 +} 158 + 159 +func firstYouTubeVideoID(urls []string) (string, bool) { 160 + for _, u := range urls { 161 + _, videoID, ok := youtubeCanonicalLink(u) 162 + if ok { 163 + return videoID, true 164 + } 165 + } 166 + return "", false 167 +}
A
sources/telegram/links_test.go
··· 1 +package telegram 2 + 3 +import ( 4 + "testing" 5 + 6 + "olexsmir.xyz/x/is" 7 +) 8 + 9 +func TestLinkifyMessageTextEscapesAndPreservesText(t *testing.T) { 10 + text := `go <now> https://example.com/page?q=1.` 11 + html, urls := linkifyMessageText(text) 12 + 13 + is.Equal(t, `go <now> <a href="https://example.com/page?q=1">https://example.com/page?q=1</a>.`, html) 14 + is.Equal(t, 1, len(urls)) 15 + is.Equal(t, "https://example.com/page?q=1", urls[0]) 16 +} 17 + 18 +func TestYouTubeCanonicalLink(t *testing.T) { 19 + canonical, id, ok := youtubeCanonicalLink("https://youtu.be/dQw4w9WgXcQ?t=42") 20 + is.Equal(t, true, ok) 21 + is.Equal(t, "dQw4w9WgXcQ", id) 22 + is.Equal(t, "https://www.youtube.com/watch?v=dQw4w9WgXcQ", canonical) 23 +}
M
sources/telegram/telegram.go
··· 6 6 "encoding/binary" 7 7 "encoding/gob" 8 8 "fmt" 9 - "html" 10 9 "log/slog" 11 10 "net/http" 12 11 "strings" ··· 156 155 func feedEntryFromMessage(m *Message) app.FeedEntry { 157 156 updated := time.Unix(m.Date, 0) 158 157 text := messageText(m) 158 + normalizedLinks := normalizeLinks(messageLinks(text)) 159 + entryID := fmt.Sprintf("telegram-%d", m.MessageID) 160 + if videoID, ok := firstYouTubeVideoID(normalizedLinks); ok { 161 + entryID = "yt:video:" + videoID 162 + } 163 + 159 164 if m.PhotoBase64 == "" { 160 165 title := text 161 166 if len(title) > 64 { 162 167 title = title[:64] + "..." 163 168 } 169 + 170 + content := text 171 + contentType := "" 172 + if len(normalizedLinks) > 0 { 173 + content, _ = linkifyMessageText(text) 174 + contentType = "html" 175 + } 176 + 164 177 return app.FeedEntry{ 165 - Title: title, 166 - ID: fmt.Sprintf("telegram-%d", m.MessageID), 167 - Content: text, 168 - Updated: updated, 178 + Title: title, 179 + ID: entryID, 180 + Links: feedLinks(normalizedLinks), 181 + Content: content, 182 + Updated: updated, 183 + ContentType: contentType, 169 184 } 170 185 } 171 186 172 187 parts := make([]string, 0, 2) 173 188 if t := strings.TrimSpace(text); t != "" { 174 - parts = append(parts, "<p>"+html.EscapeString(t)+"</p>") 189 + linkified, _ := linkifyMessageText(t) 190 + parts = append(parts, "<p>"+linkified+"</p>") 175 191 } 176 192 mimeType := m.PhotoMIMEType 177 193 if mimeType == "" { ··· 181 197 182 198 return app.FeedEntry{ 183 199 Title: fmt.Sprintf("🖼️ [%s]", updated.Format("2006-01-02")), 184 - ID: fmt.Sprintf("telegram-%d", m.MessageID), 200 + ID: entryID, 201 + Links: feedLinks(normalizedLinks), 185 202 Content: strings.Join(parts, ""), 186 203 ContentType: "html", 187 204 Updated: updated,
M
sources/telegram/telegram_test.go
··· 40 40 is.Equal(t, "", entry.ContentType) 41 41 is.Equal(t, "plain text", entry.Content) 42 42 } 43 + 44 +func TestFeedEntryFromMessageLinkifiesAndAddsAtomLinks(t *testing.T) { 45 + msg := &Message{ 46 + MessageID: 15, 47 + Text: "watch https://example.com and https://youtu.be/dQw4w9WgXcQ.", 48 + Date: time.Date(2026, 4, 23, 11, 0, 0, 0, time.UTC).Unix(), 49 + } 50 + 51 + entry := feedEntryFromMessage(msg) 52 + is.Equal(t, "html", entry.ContentType) 53 + if !strings.Contains(entry.Content, `<a href="https://example.com">https://example.com</a>`) { 54 + t.Fatalf("expected generic link in content: %s", entry.Content) 55 + } 56 + if !strings.Contains(entry.Content, `<a href="https://youtu.be/dQw4w9WgXcQ">https://youtu.be/dQw4w9WgXcQ</a>`) { 57 + t.Fatalf("expected youtube link in content: %s", entry.Content) 58 + } 59 + 60 + is.Equal(t, 2, len(entry.Links)) 61 + is.Equal(t, "https://example.com", entry.Links[0].Href) 62 + is.Equal(t, "https://www.youtube.com/watch?v=dQw4w9WgXcQ", entry.Links[1].Href) 63 + is.Equal(t, "yt:video:dQw4w9WgXcQ", entry.ID) 64 +}