all repos

rss-tools @ 1e9eff1

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/telegram/telegram.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
telegram: fetch links title, 1 month ago
1
package telegram
2
3
import (
4
	"bytes"
5
	"context"
6
	"encoding/binary"
7
	"encoding/gob"
8
	"fmt"
9
	"log/slog"
10
	"net/http"
11
	"strings"
12
	"time"
13
14
	"olexsmir.xyz/rss-tools/app"
15
)
16
17
type telegram struct {
18
	db        *app.Bucket
19
	messages  *app.Bucket
20
	client    *http.Client
21
	get       func(context.Context, string) (*http.Response, error)
22
	tg        *TelegramSDK
23
	allowedID int64
24
	logger    *slog.Logger
25
}
26
27
func Register(a *app.App) error {
28
	db, err := a.Bucket("telegram")
29
	if err != nil {
30
		return err
31
	}
32
33
	messages, err := a.Bucket("telegram:messages")
34
	if err != nil {
35
		return err
36
	}
37
38
	t := &telegram{
39
		db:        db,
40
		messages:  messages,
41
		client:    a.Client,
42
		get:       a.Get,
43
		tg:        NewSDK(a.Client, a.Config.TGToken),
44
		allowedID: a.Config.TGUserID,
45
		logger:    a.Logger,
46
	}
47
48
	a.AddWorker(t.worker)
49
	a.Route("GET /telegram", t.handler)
50
	return nil
51
}
52
53
func (t *telegram) handler(w http.ResponseWriter, r *http.Request) {
54
	// todo: cache feed contruction
55
	// todo: dont include messages older than N days
56
57
	messages, err := t.loadMessages(r.Context())
58
	if err != nil {
59
		http.Error(w, "failed to load messages", http.StatusInternalServerError)
60
		return
61
	}
62
63
	feed := app.NewFeed("Telegram feed", "telegram-feed")
64
	for _, m := range messages {
65
		if changed := t.enrichMessageWithLinkTitles(r.Context(), m); changed {
66
			if err := t.saveMessage(m); err != nil {
67
				http.Error(w, "failed to update cached titles", http.StatusInternalServerError)
68
				return
69
			}
70
		}
71
		feed.Add(feedEntryFromMessage(m))
72
	}
73
74
	if err := feed.Render(w); err != nil {
75
		http.Error(w, "failed to render feed", http.StatusInternalServerError)
76
		return
77
	}
78
}
79
80
func (t *telegram) worker(ctx context.Context) error {
81
	t.logger.Info("starting telegram bot")
82
83
	offset, err := t.loadOffset()
84
	if err != nil {
85
		return err
86
	}
87
88
	for {
89
		updates, err := t.tg.GetUpdates(ctx, offset)
90
		if err != nil {
91
			t.logger.ErrorContext(ctx, "getUpdates failed", "err", err)
92
			select {
93
			case <-ctx.Done():
94
				return nil
95
			case <-time.After(5 * time.Second):
96
				continue
97
			}
98
		}
99
100
		for _, u := range updates {
101
			if u.Message != nil && u.Message.From != nil {
102
				t.logger.InfoContext(ctx, "message from", "user_id", u.Message.From.ID, "username", u.Message.From.Username, "msg", messageText(u.Message))
103
			}
104
105
			if u.Message == nil || u.Message.From == nil || u.Message.From.ID != t.allowedID {
106
				offset = u.UpdateID + 1
107
				continue
108
			}
109
110
			_ = t.enrichMessageWithLinkTitles(ctx, u.Message)
111
112
			if err := t.saveMessage(u.Message); err != nil {
113
				t.logger.ErrorContext(ctx, "failed to save message", "err", err)
114
			}
115
116
			if err := t.tg.SetReaction(ctx, u.Message.From.ID, u.Message.MessageID, "👍"); err != nil {
117
				slog.ErrorContext(ctx, "failed to set reaction", "err", err)
118
			}
119
120
			offset = u.UpdateID + 1
121
		}
122
123
		if err := t.saveOffset(offset); err != nil {
124
			slog.ErrorContext(ctx, "failed to save offset", "err", err)
125
		}
126
127
		select {
128
		case <-ctx.Done():
129
			return nil
130
		case <-time.After(time.Second):
131
		}
132
	}
133
}
134
135
func (t *telegram) saveOffset(offset int64) error {
136
	return t.db.Set([]byte("offset"), binary.BigEndian.AppendUint64(nil, uint64(offset)))
137
}
138
139
func (t *telegram) loadOffset() (int64, error) {
140
	val, err := t.db.Get([]byte("offset"))
141
	if err != nil || val == nil {
142
		return 0, err
143
	}
144
	return int64(binary.BigEndian.Uint64(val)), nil
145
}
146
147
func (t *telegram) saveMessage(m *Message) error {
148
	var buf bytes.Buffer
149
	if err := gob.NewEncoder(&buf).Encode(m); err != nil {
150
		return err
151
	}
152
	key := binary.BigEndian.AppendUint64(nil, uint64(m.MessageID))
153
	return t.messages.Set(key, buf.Bytes())
154
}
155
156
func (t *telegram) loadMessages(ctx context.Context) ([]*Message, error) {
157
	var messages []*Message
158
	err := t.messages.ForEach(func(k, v []byte) error {
159
		var m Message
160
		if err := gob.NewDecoder(bytes.NewReader(v)).Decode(&m); err != nil {
161
			t.logger.WarnContext(ctx, "failed to decode telegram message, skipping", "key", fmt.Sprintf("%x", k), "err", err)
162
			return nil
163
		}
164
		messages = append(messages, &m)
165
		return nil
166
	})
167
	return messages, err
168
}
169
170
func (t *telegram) enrichMessageWithLinkTitles(ctx context.Context, m *Message) bool {
171
	text := messageText(m)
172
	if !isSingleLinkMessage(text) {
173
		return false
174
	}
175
176
	links := normalizeLinks(messageLinks(text))
177
	if len(links) == 0 {
178
		return false
179
	}
180
	if m.LinkTitles == nil {
181
		m.LinkTitles = make(map[string]string, len(links))
182
	}
183
184
	changed := false
185
	for _, link := range links {
186
		cachedTitle := normalizePageTitle(m.LinkTitles[link])
187
		if isMeaningfulPageTitle(cachedTitle) {
188
			continue
189
		}
190
		if cachedTitle != "" {
191
			delete(m.LinkTitles, link)
192
			changed = true
193
		}
194
		title, err := fetchPageTitle(ctx, t.get, link)
195
		if err != nil {
196
			t.logger.WarnContext(ctx, "failed to lookup page title", "url", link, "err", err)
197
			continue
198
		}
199
		m.LinkTitles[link] = title
200
		changed = true
201
	}
202
	return changed
203
}
204
205
func feedEntryFromMessage(m *Message) app.FeedEntry {
206
	updated := time.Unix(m.Date, 0)
207
	text := messageText(m)
208
	normalizedLinks := normalizeLinks(messageLinks(text))
209
	entryID := fmt.Sprintf("telegram-%d", m.MessageID)
210
	if videoID, ok := firstYouTubeVideoID(normalizedLinks); ok {
211
		entryID = "yt:video:" + videoID
212
	}
213
214
	if m.PhotoBase64 == "" {
215
		title := text
216
		if isSingleLinkMessage(text) {
217
			for _, link := range normalizedLinks {
218
				if t := strings.TrimSpace(m.LinkTitles[link]); t != "" {
219
					title = t
220
					break
221
				}
222
			}
223
		}
224
		if len(title) > 64 {
225
			title = title[:64] + "..."
226
		}
227
228
		content := text
229
		contentType := ""
230
		if len(normalizedLinks) > 0 {
231
			content, _ = linkifyMessageText(text)
232
			contentType = "html"
233
		}
234
235
		return app.FeedEntry{
236
			Title:       title,
237
			ID:          entryID,
238
			Links:       feedLinks(normalizedLinks),
239
			Content:     content,
240
			Updated:     updated,
241
			ContentType: contentType,
242
		}
243
	}
244
245
	parts := make([]string, 0, 2)
246
	if t := strings.TrimSpace(text); t != "" {
247
		linkified, _ := linkifyMessageText(t)
248
		parts = append(parts, "<p>"+linkified+"</p>")
249
	}
250
	mimeType := m.PhotoMIMEType
251
	if mimeType == "" {
252
		mimeType = "image/jpeg"
253
	}
254
	parts = append(parts, fmt.Sprintf(`<p><img src="data:%s;base64,%s" alt="telegram image"/></p>`, mimeType, m.PhotoBase64))
255
256
	return app.FeedEntry{
257
		Title:       fmt.Sprintf("🖼️ [%s]", updated.Format("2006-01-02")),
258
		ID:          entryID,
259
		Links:       feedLinks(normalizedLinks),
260
		Content:     strings.Join(parts, ""),
261
		ContentType: "html",
262
		Updated:     updated,
263
	}
264
}
265
266
func isSingleLinkMessage(text string) bool {
267
	links := findLinks(text)
268
	if len(links) != 1 {
269
		return false
270
	}
271
	link := links[0]
272
	if strings.TrimSpace(text[:link.start]) != "" {
273
		return false
274
	}
275
	after := strings.TrimSpace(text[link.end:])
276
	return trailingPunctRe.ReplaceAllString(after, "") == ""
277
}
278
279
func messageText(m *Message) string {
280
	if m == nil {
281
		return ""
282
	}
283
	if caption := strings.TrimSpace(m.Caption); caption != "" {
284
		return m.Caption
285
	}
286
	return m.Text
287
}