all repos

clerk @ 8e80bf966dac8f581e04ce06b823f0ec9dfdaa96

missing tooling for ledger/hledger

clerk/journal/lexer/lexer_test.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
rename to clerk, 14 days ago
1
package lexer
2
3
import (
4
	"testing"
5
6
	"olexsmir.xyz/clerk/internal/testutil/golden"
7
	"olexsmir.xyz/clerk/journal/token"
8
)
9
10
func TestLexer(t *testing.T) {
11
	tests := []struct {
12
		name  string
13
		input string
14
	}{
15
		{"simple transaction", `2024/01/01 groceries
16
    expenses:food  $10.00
17
    assets:checking
18
`},
19
		{"transaction, accounts with uppercase latters", `
20
2011/01/27 Book Store
21
    Expenses:Books                       $20.00
22
    Liabilities:MasterCard
23
`},
24
		{"cleared transaction", `2024/01/01 * groceries
25
    expenses:food  $10.00
26
    assets:checking
27
`},
28
		{"automated transaction", `= ^income
29
    (liabilities:tax)  *.33
30
31
= expenses:gifts
32
    budget:gifts  (amount * -1)
33
`},
34
		{"transaction with code", `2024/01/01 (123) groceries
35
    expenses:food  $10.00
36
    assets:checking
37
`},
38
		{"transaction with virtual accounts", `2024/01/01 * groceries
39
	(virtual:account)  1 PESO
40
	[something:else]   5 PESO
41
`},
42
		{"transaction with unicode commodity symbols", `2024/01/01 groceries
43
    expenses:food  €10.00
44
    expenses:food  £5.00
45
    expenses:food  ₹700.00
46
    expenses:food  40.00 гривні
47
    assets:cash
48
`},
49
		{"date with secondary", `2024/01/01=2024/01/02 groceries`},
50
		{"better date", `2024-01-02`},
51
		{"comment line", `; this is a comment`},
52
		{"star comment", `* this is a comment`},
53
		{"hash comment", `# this is a comment`},
54
		{"account directive", `account expenses:food`},
55
		{"commodity directive", `commodity 1,000.00 UAH`},
56
		{"market price directive", "P 2024-01-01 USD 40.50 UAH\n"},
57
		{"market price directive with time", "P 2024-01-01 12:00:00 USD 40.50 UAH\n"},
58
		{"inline comment", `2024/01/01 groceries ; a note`},
59
		{"empty", ``},
60
		{"blank lines", "\n\n\n"},
61
		{"comment block directive", "comment\ncontent\nend\n"},
62
		{"comment block directive without end", "comment\ncontent\n"},
63
	}
64
	for _, tt := range tests {
65
		t.Run(tt.name, func(t *testing.T) {
66
			l := New("j", []byte(tt.input))
67
			golden.Assert(t, l.Dump())
68
		})
69
	}
70
}
71
72
// token category bounds, ensures fuzzer never sees out-of-range token types.
73
const maxKnownTokenType = token.N
74
75
func FuzzLexer(f *testing.F) {
76
	f.Add([]byte("2024/01/01 groceries\n  expenses:food  $10.00\n  assets:checking\n"))
77
	f.Add([]byte("2024/01/01 * groceries\n  expenses:food  $10.00\n  assets:checking\n"))
78
	f.Add([]byte("2024/01/01 ! groceries\n  expenses:food  $10.00\n  assets:checking\n"))
79
	f.Add([]byte("2024/01/01 t ; inline comment\n  a  $10\n"))
80
	f.Add([]byte("2024/01/01 t\n  (a)  10 @@ $20\n  [b]  30\n"))
81
	f.Add([]byte("2008/06/03 * eat & shop\n    expenses:food      $1\n    expenses:supplies  $1\n    assets:cash\n"))
82
	f.Add([]byte("2015-01-03 * Money exchange office\n    Assets:Cash  -20 EUR @ 7.53 HRK\n    Assets:Cash  150.60 HRK\n"))
83
	f.Add([]byte("2024/01/01 ß\n  (ß)  10 ß\n"))
84
	f.Add([]byte("2024/01/01 t\n  (! a)  10\n"))
85
	f.Add([]byte("comment\nbody\nend\n"))
86
	f.Add([]byte("apply tag foo\nend\n"))
87
	f.Add([]byte("; a comment\n"))
88
	f.Add([]byte("# a comment\n"))
89
	f.Add([]byte("* a comment\n"))
90
	f.Add([]byte("account expenses:food\n"))
91
	f.Add([]byte("commodity 1,000.00 UAH\n"))
92
	f.Add([]byte("N $\n"))
93
	f.Add([]byte("P 2024-01-01 USD 41.50 UAH\n"))
94
	f.Add([]byte("P 2024-01-01 12:00:00 USD 41.50 UAH\n"))
95
	f.Add([]byte("P 2024-01-01 12:00 USD 41.50 UAH\n"))
96
	f.Add([]byte("~ monthly\n  a  $10\n  b\n"))
97
	f.Add([]byte("= /^Income/\n  expenses:food  $10\n"))
98
	f.Add([]byte("перевірка\n"))
99
	f.Add([]byte(""))
100
	f.Add([]byte("\n\n\n"))
101
	f.Add([]byte("@@@\n"))
102
	f.Add([]byte("   \n"))
103
	f.Add([]byte("0\n"))
104
	f.Add([]byte{0xff, 0xfe, 0x00})
105
106
	f.Fuzz(func(t *testing.T, data []byte) {
107
		// Pass 1: lex and validate token stream
108
		l := New("j", data)
109
		var tokens []token.Token
110
		maxTokens := max(len(data)*2, 16)
111
		prevEnd := -1
112
		for range maxTokens {
113
			tok := l.Next()
114
115
			// Monotonic span
116
			if tok.Span.Start.Offset < prevEnd {
117
				t.Fatalf("non-monotonic span: prevEnd=%d current=%s %d",
118
					prevEnd, tok.Type, tok.Span.Start.Offset)
119
			}
120
121
			// Token type in range (no garbage from memory corruption)
122
			if tok.Type < 0 || tok.Type > maxKnownTokenType {
123
				t.Fatalf("token type out of range: %d", tok.Type)
124
			}
125
126
			// Span in bounds (EOF/NEWLINE sentinels may extend one past input)
127
			maxEnd := len(data)
128
			if tok.Type == token.NEWLINE || tok.Type == token.EOF {
129
				maxEnd = len(data) + 1
130
			}
131
			if tok.Span.Start.Offset < 0 || tok.Span.End.Offset > maxEnd ||
132
				tok.Span.Start.Offset > tok.Span.End.Offset {
133
				t.Fatalf("span out of bounds: [%d,%d] for len=%d type=%s",
134
					tok.Span.Start.Offset, tok.Span.End.Offset, len(data), tok.Type)
135
			}
136
137
			if tok.Type == token.EOF {
138
				break
139
			}
140
141
			// Non-zero-length for non-EOF tokens (NEWLINE sentinel is exempt)
142
			if tok.Type != token.NEWLINE && tok.Span.End.Offset <= tok.Span.Start.Offset {
143
				t.Fatalf("non-progressing token: %s %q at %d:%d-%d:%d",
144
					tok.Type, tok.Literal,
145
					tok.Span.Start.Line, tok.Span.Start.Col,
146
					tok.Span.End.Line, tok.Span.End.Col)
147
			}
148
149
			tokens = append(tokens, tok)
150
			prevEnd = tok.Span.End.Offset
151
		}
152
153
		if prevEnd > len(data)+1 {
154
			t.Fatalf("token consumed beyond input: end=%d len=%d", prevEnd, len(data))
155
		}
156
157
		// Pass 2: re-lex the same input — token stream must be identical
158
		l2 := New("j", data)
159
		for _, expected := range tokens {
160
			tok := l2.Next()
161
			if tok.Type != expected.Type || tok.Literal != expected.Literal {
162
				t.Fatalf("re-lex mismatch at offset %d: expected (%s %q), got (%s %q)",
163
					expected.Span.Start.Offset, expected.Type, expected.Literal, tok.Type, tok.Literal)
164
			}
165
		}
166
	})
167
}