all repos

rss-tools @ 71f9578bfe2969b6b22984c4264c8bf6c067e608

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/telegram/telegram.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
refactor atom feed builder, 14 days ago
1
package telegram
2
3
import (
4
	"bytes"
5
	"context"
6
	"encoding/binary"
7
	"encoding/gob"
8
	"fmt"
9
	"html"
10
	"log/slog"
11
	"net/http"
12
	"strings"
13
	"time"
14
15
	"olexsmir.xyz/rss-tools/app"
16
	"olexsmir.xyz/rss-tools/app/atom"
17
)
18
19
type telegram struct {
20
	db        *app.Bucket
21
	messages  *app.Bucket
22
	client    *http.Client
23
	get       func(context.Context, string) (*http.Response, error)
24
	tg        *TelegramSDK
25
	allowedID int64
26
	logger    *slog.Logger
27
}
28
29
func Register(a *app.App) error {
30
	db, err := a.Bucket("telegram")
31
	if err != nil {
32
		return err
33
	}
34
35
	messages, err := a.Bucket("telegram:messages")
36
	if err != nil {
37
		return err
38
	}
39
40
	t := &telegram{
41
		db:        db,
42
		messages:  messages,
43
		client:    a.Client,
44
		get:       a.Get,
45
		tg:        NewSDK(a.Client, a.Config.TGToken),
46
		allowedID: a.Config.TGUserID,
47
		logger:    a.Logger,
48
	}
49
50
	a.AddWorker(t.worker)
51
	a.Route("GET /telegram", t.handler)
52
	a.Logger.Info("telegram source registered")
53
	return nil
54
}
55
56
func (t *telegram) handler(w http.ResponseWriter, r *http.Request) {
57
	// todo: cache feed contruction
58
	// todo: dont include messages older than N days
59
60
	messages, err := t.loadMessages(r.Context())
61
	if err != nil {
62
		http.Error(w, "failed to load messages", http.StatusInternalServerError)
63
		return
64
	}
65
66
	feed := atom.NewFeed("Telegram feed", "telegram-feed")
67
	for _, m := range messages {
68
		if changed := t.enrichMessageWithLinkTitles(r.Context(), m); changed {
69
			if err := t.saveMessage(m); err != nil {
70
				http.Error(w, "failed to update cached titles", http.StatusInternalServerError)
71
				return
72
			}
73
		}
74
	}
75
76
	for _, m := range groupMessages(messages) {
77
		feed.Add(feedEntryFromMessage(m))
78
	}
79
80
	if err := feed.Render(w); err != nil {
81
		http.Error(w, "failed to render feed", http.StatusInternalServerError)
82
		return
83
	}
84
}
85
86
func (t *telegram) worker(ctx context.Context) error {
87
	t.logger.Info("starting telegram bot")
88
89
	offset, err := t.loadOffset()
90
	if err != nil {
91
		return err
92
	}
93
94
	for {
95
		updates, err := t.tg.GetUpdates(ctx, offset)
96
		if err != nil {
97
			t.logger.ErrorContext(ctx, "getUpdates failed", "err", err)
98
			select {
99
			case <-ctx.Done():
100
				return nil
101
			case <-time.After(5 * time.Second):
102
				continue
103
			}
104
		}
105
106
		for _, u := range updates {
107
			if u.Message != nil && u.Message.From != nil {
108
				t.logger.InfoContext(ctx, "message from", "user_id", u.Message.From.ID, "username", u.Message.From.Username, "msg", messageText(u.Message))
109
			}
110
111
			if u.Message == nil || u.Message.From == nil || u.Message.From.ID != t.allowedID {
112
				offset = u.UpdateID + 1
113
				continue
114
			}
115
116
			_ = t.enrichMessageWithLinkTitles(ctx, u.Message)
117
118
			if err := t.saveMessage(u.Message); err != nil {
119
				t.logger.ErrorContext(ctx, "failed to save message", "err", err)
120
			}
121
122
			if err := t.tg.SetReaction(ctx, u.Message.From.ID, u.Message.MessageID, "👍"); err != nil {
123
				slog.ErrorContext(ctx, "failed to set reaction", "err", err)
124
			}
125
126
			offset = u.UpdateID + 1
127
		}
128
129
		if err := t.saveOffset(offset); err != nil {
130
			slog.ErrorContext(ctx, "failed to save offset", "err", err)
131
		}
132
133
		select {
134
		case <-ctx.Done():
135
			return nil
136
		case <-time.After(time.Second):
137
		}
138
	}
139
}
140
141
func (t *telegram) saveOffset(offset int64) error {
142
	return t.db.Set([]byte("offset"), binary.BigEndian.AppendUint64(nil, uint64(offset)))
143
}
144
145
func (t *telegram) loadOffset() (int64, error) {
146
	val, err := t.db.Get([]byte("offset"))
147
	if err != nil || val == nil {
148
		return 0, err
149
	}
150
	return int64(binary.BigEndian.Uint64(val)), nil
151
}
152
153
func (t *telegram) saveMessage(m *Message) error {
154
	var buf bytes.Buffer
155
	if err := gob.NewEncoder(&buf).Encode(m); err != nil {
156
		return err
157
	}
158
	key := binary.BigEndian.AppendUint64(nil, uint64(m.MessageID))
159
	return t.messages.Set(key, buf.Bytes())
160
}
161
162
func (t *telegram) loadMessages(ctx context.Context) ([]*Message, error) {
163
	var messages []*Message
164
	err := t.messages.ForEach(func(k, v []byte) error {
165
		var m Message
166
		if err := gob.NewDecoder(bytes.NewReader(v)).Decode(&m); err != nil {
167
			t.logger.WarnContext(ctx, "failed to decode telegram message, skipping", "key", fmt.Sprintf("%x", k), "err", err)
168
			return nil
169
		}
170
		messages = append(messages, &m)
171
		return nil
172
	})
173
	return messages, err
174
}
175
176
func groupMessages(messages []*Message) []*Message {
177
	if len(messages) == 0 {
178
		return messages
179
	}
180
181
	groups := make(map[string]*Message)
182
	out := make([]*Message, 0, len(messages))
183
	for _, m := range messages {
184
		if m == nil || strings.TrimSpace(m.MediaGroupID) == "" {
185
			out = append(out, m)
186
			continue
187
		}
188
189
		group, ok := groups[m.MediaGroupID]
190
		if !ok {
191
			group = &Message{
192
				MessageID:    m.MessageID,
193
				From:         m.From,
194
				Chat:         m.Chat,
195
				Text:         m.Text,
196
				Caption:      m.Caption,
197
				Date:         m.Date,
198
				MediaGroupID: m.MediaGroupID,
199
				LinkTitles:   m.LinkTitles,
200
			}
201
			groups[m.MediaGroupID] = group
202
			out = append(out, group)
203
		}
204
205
		if m.MessageID != 0 && (group.MessageID == 0 || m.MessageID < group.MessageID) {
206
			group.MessageID = m.MessageID
207
		}
208
		if m.Date != 0 && (group.Date == 0 || m.Date < group.Date) {
209
			group.Date = m.Date
210
		}
211
		if strings.TrimSpace(messageText(group)) == "" && strings.TrimSpace(messageText(m)) != "" {
212
			group.Caption = m.Caption
213
			group.Text = m.Text
214
			if len(m.LinkTitles) > 0 {
215
				group.LinkTitles = m.LinkTitles
216
			}
217
		} else if len(group.LinkTitles) == 0 && len(m.LinkTitles) > 0 {
218
			group.LinkTitles = m.LinkTitles
219
		}
220
221
		group.PhotoAttachments = append(group.PhotoAttachments, messagePhotos(m)...)
222
		if group.PhotoBase64 == "" && m.PhotoBase64 != "" {
223
			group.PhotoBase64 = m.PhotoBase64
224
			group.PhotoMIMEType = m.PhotoMIMEType
225
		}
226
	}
227
	return out
228
}
229
230
func (t *telegram) enrichMessageWithLinkTitles(ctx context.Context, m *Message) bool {
231
	text := messageText(m)
232
	if !isSingleLinkMessage(text) {
233
		return false
234
	}
235
236
	links := normalizeLinks(messageLinks(text))
237
	if len(links) == 0 {
238
		return false
239
	}
240
	if m.LinkTitles == nil {
241
		m.LinkTitles = make(map[string]string, len(links))
242
	}
243
244
	changed := false
245
	for _, link := range links {
246
		cachedTitle := normalizePageTitle(m.LinkTitles[link])
247
		if isMeaningfulPageTitle(cachedTitle) {
248
			continue
249
		}
250
		if cachedTitle != "" {
251
			delete(m.LinkTitles, link)
252
			changed = true
253
		}
254
		title, err := fetchPageTitle(ctx, t.get, link)
255
		if err != nil {
256
			t.logger.WarnContext(ctx, "failed to lookup page title", "url", link, "err", err)
257
			continue
258
		}
259
		m.LinkTitles[link] = title
260
		changed = true
261
	}
262
	return changed
263
}
264
265
func feedEntryFromMessage(m *Message) *atom.Entry {
266
	updated := time.Unix(m.Date, 0)
267
	text := normalizeMessageText(messageText(m))
268
	normalizedLinks := normalizeLinks(messageLinks(text))
269
	entryID := fmt.Sprintf("telegram-%d", m.MessageID)
270
	if videoID, ok := firstYouTubeVideoID(normalizedLinks); ok {
271
		entryID = "yt:video:" + videoID
272
	}
273
274
	photos := messagePhotos(m)
275
	if len(photos) == 0 {
276
		title := text
277
		if isSingleLinkMessage(text) {
278
			for _, link := range normalizedLinks {
279
				if t := strings.TrimSpace(m.LinkTitles[link]); t != "" {
280
					title = t
281
					break
282
				}
283
			}
284
		}
285
		if len(title) > 64 {
286
			title = title[:64] + "..."
287
		}
288
289
		content := text
290
		contentType := ""
291
		if len(normalizedLinks) > 0 {
292
			content, _ = linkifyMessageText(text)
293
			content = preserveLineBreaks(content)
294
			contentType = "html"
295
		} else if strings.Contains(text, "\n") {
296
			content = preserveLineBreaks(html.EscapeString(text))
297
			contentType = "html"
298
		}
299
300
		return &atom.Entry{
301
			Title:   title,
302
			ID:      entryID,
303
			Link:    feedLinks(normalizedLinks),
304
			Content: atom.NewText(content, contentType),
305
			Updated: atom.Time(updated),
306
		}
307
	}
308
309
	parts := make([]string, 0, 1+len(photos))
310
	if t := strings.TrimSpace(text); t != "" {
311
		linkified, _ := linkifyMessageText(text)
312
		linkified = preserveLineBreaks(linkified)
313
		parts = append(parts, "<p>"+linkified+"</p>")
314
	}
315
	for _, photo := range photos {
316
		if photo.Base64 == "" {
317
			continue
318
		}
319
		mimeType := photo.MIMEType
320
		if mimeType == "" {
321
			mimeType = "image/jpeg"
322
		}
323
		parts = append(parts, fmt.Sprintf(`<p><img src="data:%s;base64,%s" alt="telegram image"/></p>`, mimeType, photo.Base64))
324
	}
325
326
	return &atom.Entry{
327
		Title:   fmt.Sprintf("🖼️ [%s]", updated.Format("2006-01-02")),
328
		ID:      entryID,
329
		Link:    feedLinks(normalizedLinks),
330
		Content: atom.NewText(strings.Join(parts, ""), "html"),
331
		Updated: atom.Time(updated),
332
	}
333
}
334
335
func isSingleLinkMessage(text string) bool {
336
	links := findLinks(text)
337
	if len(links) != 1 {
338
		return false
339
	}
340
	link := links[0]
341
	if strings.TrimSpace(text[:link.start]) != "" {
342
		return false
343
	}
344
	after := strings.TrimSpace(text[link.end:])
345
	return trailingPunctRe.ReplaceAllString(after, "") == ""
346
}
347
348
func messageText(m *Message) string {
349
	if m == nil {
350
		return ""
351
	}
352
	if caption := strings.TrimSpace(m.Caption); caption != "" {
353
		return m.Caption
354
	}
355
	return m.Text
356
}
357
358
func normalizeMessageText(text string) string {
359
	text = strings.ReplaceAll(text, "\r\n", "\n")
360
	return strings.ReplaceAll(text, "\r", "\n")
361
}
362
363
func preserveLineBreaks(text string) string {
364
	if !strings.Contains(text, "\n") {
365
		return text
366
	}
367
	return strings.ReplaceAll(text, "\n", "<br/>")
368
}
369
370
func messagePhotos(m *Message) []PhotoAttachment {
371
	if m == nil {
372
		return nil
373
	}
374
	if len(m.PhotoAttachments) > 0 {
375
		out := make([]PhotoAttachment, len(m.PhotoAttachments))
376
		copy(out, m.PhotoAttachments)
377
		return out
378
	}
379
	if m.PhotoBase64 == "" {
380
		return nil
381
	}
382
	mimeType := m.PhotoMIMEType
383
	if mimeType == "" {
384
		mimeType = "image/jpeg"
385
	}
386
	return []PhotoAttachment{{Base64: m.PhotoBase64, MIMEType: mimeType}}
387
}