all repos

rss-tools @ 7235086

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/telegram/links.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
telegram: turn links into links; return youtube links in youtube official feed format, 1 month ago
1
package telegram
2
3
import (
4
	"fmt"
5
	"html"
6
	"net/url"
7
	"regexp"
8
	"strings"
9
10
	"olexsmir.xyz/rss-tools/app"
11
)
12
13
var (
14
	linkRe          = regexp.MustCompile(`https?://[^\s<>"']+`)
15
	youtubeIDRe     = regexp.MustCompile(`^[A-Za-z0-9_-]{11}$`)
16
	trailingPunctRe = regexp.MustCompile(`[.,!?:;)]+$`)
17
)
18
19
type foundLink struct {
20
	start int
21
	end   int
22
	raw   string
23
}
24
25
func findLinks(text string) []foundLink {
26
	indexes := linkRe.FindAllStringIndex(text, -1)
27
	links := make([]foundLink, 0, len(indexes))
28
	for _, idx := range indexes {
29
		start, end := idx[0], idx[1]
30
		candidate := text[start:end]
31
		trimmed := trailingPunctRe.ReplaceAllString(candidate, "")
32
		if trimmed == "" {
33
			continue
34
		}
35
		trimmedEnd := start + len(trimmed)
36
		if !isHTTPURL(trimmed) {
37
			continue
38
		}
39
		links = append(links, foundLink{
40
			start: start,
41
			end:   trimmedEnd,
42
			raw:   trimmed,
43
		})
44
	}
45
	return links
46
}
47
48
func isHTTPURL(raw string) bool {
49
	u, err := url.Parse(raw)
50
	if err != nil || u.Host == "" {
51
		return false
52
	}
53
	return u.Scheme == "http" || u.Scheme == "https"
54
}
55
56
func linkifyMessageText(text string) (string, []string) {
57
	links := findLinks(text)
58
	if len(links) == 0 {
59
		return html.EscapeString(text), nil
60
	}
61
62
	var b strings.Builder
63
	urls := make([]string, 0, len(links))
64
	last := 0
65
	for _, l := range links {
66
		if l.start < last {
67
			continue
68
		}
69
		b.WriteString(html.EscapeString(text[last:l.start]))
70
		escaped := html.EscapeString(l.raw)
71
		fmt.Fprintf(&b, `<a href="%s">%s</a>`, escaped, escaped)
72
		urls = append(urls, l.raw)
73
		last = l.end
74
	}
75
	b.WriteString(html.EscapeString(text[last:]))
76
	return b.String(), urls
77
}
78
79
func messageLinks(text string) []string {
80
	links := findLinks(text)
81
	out := make([]string, 0, len(links))
82
	seen := make(map[string]struct{}, len(links))
83
	for _, link := range links {
84
		if _, ok := seen[link.raw]; ok {
85
			continue
86
		}
87
		seen[link.raw] = struct{}{}
88
		out = append(out, link.raw)
89
	}
90
	return out
91
}
92
93
func feedLinks(urls []string) []app.FeedLink {
94
	links := make([]app.FeedLink, 0, len(urls))
95
	for _, u := range urls {
96
		links = append(links, app.FeedLink{
97
			Rel:  "alternate",
98
			Type: "text/html",
99
			Href: u,
100
		})
101
	}
102
	return links
103
}
104
105
func youtubeCanonicalLink(raw string) (string, string, bool) {
106
	u, err := url.Parse(raw)
107
	if err != nil {
108
		return "", "", false
109
	}
110
	host := strings.ToLower(u.Hostname())
111
	host = strings.TrimPrefix(host, "www.")
112
	host = strings.TrimPrefix(host, "m.")
113
114
	videoID := ""
115
	switch host {
116
	case "youtube.com", "youtube-nocookie.com":
117
		path := strings.TrimSuffix(u.Path, "/")
118
		switch path {
119
		case "/watch":
120
			videoID = u.Query().Get("v")
121
		default:
122
			if afterShort, okShort := strings.CutPrefix(path, "/shorts/"); okShort {
123
				videoID = afterShort
124
			} else if afterLive, okLive := strings.CutPrefix(path, "/live/"); okLive {
125
				videoID = afterLive
126
			}
127
		}
128
	case "youtu.be":
129
		videoID = strings.Trim(u.Path, "/")
130
	default:
131
		return "", "", false
132
	}
133
134
	if !youtubeIDRe.MatchString(videoID) {
135
		return "", "", false
136
	}
137
138
	canonical := "https://www.youtube.com/watch?v=" + videoID
139
	return canonical, videoID, true
140
}
141
142
func normalizeLinks(rawLinks []string) []string {
143
	out := make([]string, 0, len(rawLinks))
144
	seen := make(map[string]struct{}, len(rawLinks))
145
	for _, raw := range rawLinks {
146
		normalized := raw
147
		if canonical, _, ok := youtubeCanonicalLink(raw); ok {
148
			normalized = canonical
149
		}
150
		if _, ok := seen[normalized]; ok {
151
			continue
152
		}
153
		seen[normalized] = struct{}{}
154
		out = append(out, normalized)
155
	}
156
	return out
157
}
158
159
func firstYouTubeVideoID(urls []string) (string, bool) {
160
	for _, u := range urls {
161
		_, videoID, ok := youtubeCanonicalLink(u)
162
		if ok {
163
			return videoID, true
164
		}
165
	}
166
	return "", false
167
}