all repos

rss-tools @ 19e8c1c7e354df8337b753281d599f9c45a33036

get rss feed from sources that(i need and) dont provide one

rss-tools/sources/telegram/telegram.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
telegram: preserve \n in multiline messages, support multi image messages, 14 days ago
1
package telegram
2
3
import (
4
	"bytes"
5
	"context"
6
	"encoding/binary"
7
	"encoding/gob"
8
	"fmt"
9
	"html"
10
	"log/slog"
11
	"net/http"
12
	"strings"
13
	"time"
14
15
	"olexsmir.xyz/rss-tools/app"
16
)
17
18
type telegram struct {
19
	db        *app.Bucket
20
	messages  *app.Bucket
21
	client    *http.Client
22
	get       func(context.Context, string) (*http.Response, error)
23
	tg        *TelegramSDK
24
	allowedID int64
25
	logger    *slog.Logger
26
}
27
28
func Register(a *app.App) error {
29
	db, err := a.Bucket("telegram")
30
	if err != nil {
31
		return err
32
	}
33
34
	messages, err := a.Bucket("telegram:messages")
35
	if err != nil {
36
		return err
37
	}
38
39
	t := &telegram{
40
		db:        db,
41
		messages:  messages,
42
		client:    a.Client,
43
		get:       a.Get,
44
		tg:        NewSDK(a.Client, a.Config.TGToken),
45
		allowedID: a.Config.TGUserID,
46
		logger:    a.Logger,
47
	}
48
49
	a.AddWorker(t.worker)
50
	a.Route("GET /telegram", t.handler)
51
	a.Logger.Info("telegram source registered")
52
	return nil
53
}
54
55
func (t *telegram) handler(w http.ResponseWriter, r *http.Request) {
56
	// todo: cache feed contruction
57
	// todo: dont include messages older than N days
58
59
	messages, err := t.loadMessages(r.Context())
60
	if err != nil {
61
		http.Error(w, "failed to load messages", http.StatusInternalServerError)
62
		return
63
	}
64
65
	feed := app.NewFeed("Telegram feed", "telegram-feed")
66
	for _, m := range messages {
67
		if changed := t.enrichMessageWithLinkTitles(r.Context(), m); changed {
68
			if err := t.saveMessage(m); err != nil {
69
				http.Error(w, "failed to update cached titles", http.StatusInternalServerError)
70
				return
71
			}
72
		}
73
	}
74
75
	for _, m := range groupMessages(messages) {
76
		feed.Add(feedEntryFromMessage(m))
77
	}
78
79
	if err := feed.Render(w); err != nil {
80
		http.Error(w, "failed to render feed", http.StatusInternalServerError)
81
		return
82
	}
83
}
84
85
func (t *telegram) worker(ctx context.Context) error {
86
	t.logger.Info("starting telegram bot")
87
88
	offset, err := t.loadOffset()
89
	if err != nil {
90
		return err
91
	}
92
93
	for {
94
		updates, err := t.tg.GetUpdates(ctx, offset)
95
		if err != nil {
96
			t.logger.ErrorContext(ctx, "getUpdates failed", "err", err)
97
			select {
98
			case <-ctx.Done():
99
				return nil
100
			case <-time.After(5 * time.Second):
101
				continue
102
			}
103
		}
104
105
		for _, u := range updates {
106
			if u.Message != nil && u.Message.From != nil {
107
				t.logger.InfoContext(ctx, "message from", "user_id", u.Message.From.ID, "username", u.Message.From.Username, "msg", messageText(u.Message))
108
			}
109
110
			if u.Message == nil || u.Message.From == nil || u.Message.From.ID != t.allowedID {
111
				offset = u.UpdateID + 1
112
				continue
113
			}
114
115
			_ = t.enrichMessageWithLinkTitles(ctx, u.Message)
116
117
			if err := t.saveMessage(u.Message); err != nil {
118
				t.logger.ErrorContext(ctx, "failed to save message", "err", err)
119
			}
120
121
			if err := t.tg.SetReaction(ctx, u.Message.From.ID, u.Message.MessageID, "👍"); err != nil {
122
				slog.ErrorContext(ctx, "failed to set reaction", "err", err)
123
			}
124
125
			offset = u.UpdateID + 1
126
		}
127
128
		if err := t.saveOffset(offset); err != nil {
129
			slog.ErrorContext(ctx, "failed to save offset", "err", err)
130
		}
131
132
		select {
133
		case <-ctx.Done():
134
			return nil
135
		case <-time.After(time.Second):
136
		}
137
	}
138
}
139
140
func (t *telegram) saveOffset(offset int64) error {
141
	return t.db.Set([]byte("offset"), binary.BigEndian.AppendUint64(nil, uint64(offset)))
142
}
143
144
func (t *telegram) loadOffset() (int64, error) {
145
	val, err := t.db.Get([]byte("offset"))
146
	if err != nil || val == nil {
147
		return 0, err
148
	}
149
	return int64(binary.BigEndian.Uint64(val)), nil
150
}
151
152
func (t *telegram) saveMessage(m *Message) error {
153
	var buf bytes.Buffer
154
	if err := gob.NewEncoder(&buf).Encode(m); err != nil {
155
		return err
156
	}
157
	key := binary.BigEndian.AppendUint64(nil, uint64(m.MessageID))
158
	return t.messages.Set(key, buf.Bytes())
159
}
160
161
func (t *telegram) loadMessages(ctx context.Context) ([]*Message, error) {
162
	var messages []*Message
163
	err := t.messages.ForEach(func(k, v []byte) error {
164
		var m Message
165
		if err := gob.NewDecoder(bytes.NewReader(v)).Decode(&m); err != nil {
166
			t.logger.WarnContext(ctx, "failed to decode telegram message, skipping", "key", fmt.Sprintf("%x", k), "err", err)
167
			return nil
168
		}
169
		messages = append(messages, &m)
170
		return nil
171
	})
172
	return messages, err
173
}
174
175
func groupMessages(messages []*Message) []*Message {
176
	if len(messages) == 0 {
177
		return messages
178
	}
179
180
	groups := make(map[string]*Message)
181
	out := make([]*Message, 0, len(messages))
182
	for _, m := range messages {
183
		if m == nil || strings.TrimSpace(m.MediaGroupID) == "" {
184
			out = append(out, m)
185
			continue
186
		}
187
188
		group, ok := groups[m.MediaGroupID]
189
		if !ok {
190
			group = &Message{
191
				MessageID:    m.MessageID,
192
				From:         m.From,
193
				Chat:         m.Chat,
194
				Text:         m.Text,
195
				Caption:      m.Caption,
196
				Date:         m.Date,
197
				MediaGroupID: m.MediaGroupID,
198
				LinkTitles:   m.LinkTitles,
199
			}
200
			groups[m.MediaGroupID] = group
201
			out = append(out, group)
202
		}
203
204
		if m.MessageID != 0 && (group.MessageID == 0 || m.MessageID < group.MessageID) {
205
			group.MessageID = m.MessageID
206
		}
207
		if m.Date != 0 && (group.Date == 0 || m.Date < group.Date) {
208
			group.Date = m.Date
209
		}
210
		if strings.TrimSpace(messageText(group)) == "" && strings.TrimSpace(messageText(m)) != "" {
211
			group.Caption = m.Caption
212
			group.Text = m.Text
213
			if len(m.LinkTitles) > 0 {
214
				group.LinkTitles = m.LinkTitles
215
			}
216
		} else if len(group.LinkTitles) == 0 && len(m.LinkTitles) > 0 {
217
			group.LinkTitles = m.LinkTitles
218
		}
219
220
		group.PhotoAttachments = append(group.PhotoAttachments, messagePhotos(m)...)
221
		if group.PhotoBase64 == "" && m.PhotoBase64 != "" {
222
			group.PhotoBase64 = m.PhotoBase64
223
			group.PhotoMIMEType = m.PhotoMIMEType
224
		}
225
	}
226
	return out
227
}
228
229
func (t *telegram) enrichMessageWithLinkTitles(ctx context.Context, m *Message) bool {
230
	text := messageText(m)
231
	if !isSingleLinkMessage(text) {
232
		return false
233
	}
234
235
	links := normalizeLinks(messageLinks(text))
236
	if len(links) == 0 {
237
		return false
238
	}
239
	if m.LinkTitles == nil {
240
		m.LinkTitles = make(map[string]string, len(links))
241
	}
242
243
	changed := false
244
	for _, link := range links {
245
		cachedTitle := normalizePageTitle(m.LinkTitles[link])
246
		if isMeaningfulPageTitle(cachedTitle) {
247
			continue
248
		}
249
		if cachedTitle != "" {
250
			delete(m.LinkTitles, link)
251
			changed = true
252
		}
253
		title, err := fetchPageTitle(ctx, t.get, link)
254
		if err != nil {
255
			t.logger.WarnContext(ctx, "failed to lookup page title", "url", link, "err", err)
256
			continue
257
		}
258
		m.LinkTitles[link] = title
259
		changed = true
260
	}
261
	return changed
262
}
263
264
func feedEntryFromMessage(m *Message) app.FeedEntry {
265
	updated := time.Unix(m.Date, 0)
266
	text := normalizeMessageText(messageText(m))
267
	normalizedLinks := normalizeLinks(messageLinks(text))
268
	entryID := fmt.Sprintf("telegram-%d", m.MessageID)
269
	if videoID, ok := firstYouTubeVideoID(normalizedLinks); ok {
270
		entryID = "yt:video:" + videoID
271
	}
272
273
	photos := messagePhotos(m)
274
	if len(photos) == 0 {
275
		title := text
276
		if isSingleLinkMessage(text) {
277
			for _, link := range normalizedLinks {
278
				if t := strings.TrimSpace(m.LinkTitles[link]); t != "" {
279
					title = t
280
					break
281
				}
282
			}
283
		}
284
		if len(title) > 64 {
285
			title = title[:64] + "..."
286
		}
287
288
		content := text
289
		contentType := ""
290
		if len(normalizedLinks) > 0 {
291
			content, _ = linkifyMessageText(text)
292
			content = preserveLineBreaks(content)
293
			contentType = "html"
294
		} else if strings.Contains(text, "\n") {
295
			content = preserveLineBreaks(html.EscapeString(text))
296
			contentType = "html"
297
		}
298
299
		return app.FeedEntry{
300
			Title:       title,
301
			ID:          entryID,
302
			Links:       feedLinks(normalizedLinks),
303
			Content:     content,
304
			Updated:     updated,
305
			ContentType: contentType,
306
		}
307
	}
308
309
	parts := make([]string, 0, 1+len(photos))
310
	if t := strings.TrimSpace(text); t != "" {
311
		linkified, _ := linkifyMessageText(text)
312
		linkified = preserveLineBreaks(linkified)
313
		parts = append(parts, "<p>"+linkified+"</p>")
314
	}
315
	for _, photo := range photos {
316
		if photo.Base64 == "" {
317
			continue
318
		}
319
		mimeType := photo.MIMEType
320
		if mimeType == "" {
321
			mimeType = "image/jpeg"
322
		}
323
		parts = append(parts, fmt.Sprintf(`<p><img src="data:%s;base64,%s" alt="telegram image"/></p>`, mimeType, photo.Base64))
324
	}
325
326
	return app.FeedEntry{
327
		Title:       fmt.Sprintf("🖼️ [%s]", updated.Format("2006-01-02")),
328
		ID:          entryID,
329
		Links:       feedLinks(normalizedLinks),
330
		Content:     strings.Join(parts, ""),
331
		ContentType: "html",
332
		Updated:     updated,
333
	}
334
}
335
336
func isSingleLinkMessage(text string) bool {
337
	links := findLinks(text)
338
	if len(links) != 1 {
339
		return false
340
	}
341
	link := links[0]
342
	if strings.TrimSpace(text[:link.start]) != "" {
343
		return false
344
	}
345
	after := strings.TrimSpace(text[link.end:])
346
	return trailingPunctRe.ReplaceAllString(after, "") == ""
347
}
348
349
func messageText(m *Message) string {
350
	if m == nil {
351
		return ""
352
	}
353
	if caption := strings.TrimSpace(m.Caption); caption != "" {
354
		return m.Caption
355
	}
356
	return m.Text
357
}
358
359
func normalizeMessageText(text string) string {
360
	text = strings.ReplaceAll(text, "\r\n", "\n")
361
	return strings.ReplaceAll(text, "\r", "\n")
362
}
363
364
func preserveLineBreaks(text string) string {
365
	if !strings.Contains(text, "\n") {
366
		return text
367
	}
368
	return strings.ReplaceAll(text, "\n", "<br/>")
369
}
370
371
func messagePhotos(m *Message) []PhotoAttachment {
372
	if m == nil {
373
		return nil
374
	}
375
	if len(m.PhotoAttachments) > 0 {
376
		out := make([]PhotoAttachment, len(m.PhotoAttachments))
377
		copy(out, m.PhotoAttachments)
378
		return out
379
	}
380
	if m.PhotoBase64 == "" {
381
		return nil
382
	}
383
	mimeType := m.PhotoMIMEType
384
	if mimeType == "" {
385
		mimeType = "image/jpeg"
386
	}
387
	return []PhotoAttachment{{Base64: m.PhotoBase64, MIMEType: mimeType}}
388
}