clerk/journal/lexer/lexer_test.go (view raw)
| 1 | package lexer |
| 2 | |
| 3 | import ( |
| 4 | "testing" |
| 5 | |
| 6 | "olexsmir.xyz/clerk/internal/testutil/golden" |
| 7 | "olexsmir.xyz/clerk/journal/token" |
| 8 | ) |
| 9 | |
| 10 | func TestLexer(t *testing.T) { |
| 11 | tests := []struct { |
| 12 | name string |
| 13 | input string |
| 14 | }{ |
| 15 | {"simple transaction", `2024/01/01 groceries |
| 16 | expenses:food $10.00 |
| 17 | assets:checking |
| 18 | `}, |
| 19 | {"transaction, accounts with uppercase latters", ` |
| 20 | 2011/01/27 Book Store |
| 21 | Expenses:Books $20.00 |
| 22 | Liabilities:MasterCard |
| 23 | `}, |
| 24 | {"cleared transaction", `2024/01/01 * groceries |
| 25 | expenses:food $10.00 |
| 26 | assets:checking |
| 27 | `}, |
| 28 | {"automated transaction", `= ^income |
| 29 | (liabilities:tax) *.33 |
| 30 | |
| 31 | = expenses:gifts |
| 32 | budget:gifts (amount * -1) |
| 33 | `}, |
| 34 | {"transaction with code", `2024/01/01 (123) groceries |
| 35 | expenses:food $10.00 |
| 36 | assets:checking |
| 37 | `}, |
| 38 | {"transaction with virtual accounts", `2024/01/01 * groceries |
| 39 | (virtual:account) 1 PESO |
| 40 | [something:else] 5 PESO |
| 41 | `}, |
| 42 | {"transaction with unicode commodity symbols", `2024/01/01 groceries |
| 43 | expenses:food €10.00 |
| 44 | expenses:food £5.00 |
| 45 | expenses:food ₹700.00 |
| 46 | expenses:food 40.00 гривні |
| 47 | assets:cash |
| 48 | `}, |
| 49 | {"date with secondary", `2024/01/01=2024/01/02 groceries`}, |
| 50 | {"better date", `2024-01-02`}, |
| 51 | {"comment line", `; this is a comment`}, |
| 52 | {"star comment", `* this is a comment`}, |
| 53 | {"hash comment", `# this is a comment`}, |
| 54 | {"account directive", `account expenses:food`}, |
| 55 | {"commodity directive", `commodity 1,000.00 UAH`}, |
| 56 | {"market price directive", "P 2024-01-01 USD 40.50 UAH\n"}, |
| 57 | {"market price directive with time", "P 2024-01-01 12:00:00 USD 40.50 UAH\n"}, |
| 58 | {"inline comment", `2024/01/01 groceries ; a note`}, |
| 59 | {"empty", ``}, |
| 60 | {"blank lines", "\n\n\n"}, |
| 61 | {"comment block directive", "comment\ncontent\nend\n"}, |
| 62 | {"comment block directive without end", "comment\ncontent\n"}, |
| 63 | } |
| 64 | for _, tt := range tests { |
| 65 | t.Run(tt.name, func(t *testing.T) { |
| 66 | l := New("j", []byte(tt.input)) |
| 67 | golden.Assert(t, l.Dump()) |
| 68 | }) |
| 69 | } |
| 70 | } |
| 71 | |
| 72 | // token category bounds, ensures fuzzer never sees out-of-range token types. |
| 73 | const maxKnownTokenType = token.N |
| 74 | |
| 75 | func FuzzLexer(f *testing.F) { |
| 76 | f.Add([]byte("2024/01/01 groceries\n expenses:food $10.00\n assets:checking\n")) |
| 77 | f.Add([]byte("2024/01/01 * groceries\n expenses:food $10.00\n assets:checking\n")) |
| 78 | f.Add([]byte("2024/01/01 ! groceries\n expenses:food $10.00\n assets:checking\n")) |
| 79 | f.Add([]byte("2024/01/01 t ; inline comment\n a $10\n")) |
| 80 | f.Add([]byte("2024/01/01 t\n (a) 10 @@ $20\n [b] 30\n")) |
| 81 | f.Add([]byte("2008/06/03 * eat & shop\n expenses:food $1\n expenses:supplies $1\n assets:cash\n")) |
| 82 | f.Add([]byte("2015-01-03 * Money exchange office\n Assets:Cash -20 EUR @ 7.53 HRK\n Assets:Cash 150.60 HRK\n")) |
| 83 | f.Add([]byte("2024/01/01 ß\n (ß) 10 ß\n")) |
| 84 | f.Add([]byte("2024/01/01 t\n (! a) 10\n")) |
| 85 | f.Add([]byte("comment\nbody\nend\n")) |
| 86 | f.Add([]byte("apply tag foo\nend\n")) |
| 87 | f.Add([]byte("; a comment\n")) |
| 88 | f.Add([]byte("# a comment\n")) |
| 89 | f.Add([]byte("* a comment\n")) |
| 90 | f.Add([]byte("account expenses:food\n")) |
| 91 | f.Add([]byte("commodity 1,000.00 UAH\n")) |
| 92 | f.Add([]byte("N $\n")) |
| 93 | f.Add([]byte("P 2024-01-01 USD 41.50 UAH\n")) |
| 94 | f.Add([]byte("P 2024-01-01 12:00:00 USD 41.50 UAH\n")) |
| 95 | f.Add([]byte("P 2024-01-01 12:00 USD 41.50 UAH\n")) |
| 96 | f.Add([]byte("~ monthly\n a $10\n b\n")) |
| 97 | f.Add([]byte("= /^Income/\n expenses:food $10\n")) |
| 98 | f.Add([]byte("перевірка\n")) |
| 99 | f.Add([]byte("")) |
| 100 | f.Add([]byte("\n\n\n")) |
| 101 | f.Add([]byte("@@@\n")) |
| 102 | f.Add([]byte(" \n")) |
| 103 | f.Add([]byte("0\n")) |
| 104 | f.Add([]byte{0xff, 0xfe, 0x00}) |
| 105 | |
| 106 | f.Fuzz(func(t *testing.T, data []byte) { |
| 107 | // Pass 1: lex and validate token stream |
| 108 | l := New("j", data) |
| 109 | var tokens []token.Token |
| 110 | maxTokens := max(len(data)*2, 16) |
| 111 | prevEnd := -1 |
| 112 | for range maxTokens { |
| 113 | tok := l.Next() |
| 114 | |
| 115 | // Monotonic span |
| 116 | if tok.Span.Start.Offset < prevEnd { |
| 117 | t.Fatalf("non-monotonic span: prevEnd=%d current=%s %d", |
| 118 | prevEnd, tok.Type, tok.Span.Start.Offset) |
| 119 | } |
| 120 | |
| 121 | // Token type in range (no garbage from memory corruption) |
| 122 | if tok.Type < 0 || tok.Type > maxKnownTokenType { |
| 123 | t.Fatalf("token type out of range: %d", tok.Type) |
| 124 | } |
| 125 | |
| 126 | // Span in bounds (EOF/NEWLINE sentinels may extend one past input) |
| 127 | maxEnd := len(data) |
| 128 | if tok.Type == token.NEWLINE || tok.Type == token.EOF { |
| 129 | maxEnd = len(data) + 1 |
| 130 | } |
| 131 | if tok.Span.Start.Offset < 0 || tok.Span.End.Offset > maxEnd || |
| 132 | tok.Span.Start.Offset > tok.Span.End.Offset { |
| 133 | t.Fatalf("span out of bounds: [%d,%d] for len=%d type=%s", |
| 134 | tok.Span.Start.Offset, tok.Span.End.Offset, len(data), tok.Type) |
| 135 | } |
| 136 | |
| 137 | if tok.Type == token.EOF { |
| 138 | break |
| 139 | } |
| 140 | |
| 141 | // Non-zero-length for non-EOF tokens (NEWLINE sentinel is exempt) |
| 142 | if tok.Type != token.NEWLINE && tok.Span.End.Offset <= tok.Span.Start.Offset { |
| 143 | t.Fatalf("non-progressing token: %s %q at %d:%d-%d:%d", |
| 144 | tok.Type, tok.Literal, |
| 145 | tok.Span.Start.Line, tok.Span.Start.Col, |
| 146 | tok.Span.End.Line, tok.Span.End.Col) |
| 147 | } |
| 148 | |
| 149 | tokens = append(tokens, tok) |
| 150 | prevEnd = tok.Span.End.Offset |
| 151 | } |
| 152 | |
| 153 | if prevEnd > len(data)+1 { |
| 154 | t.Fatalf("token consumed beyond input: end=%d len=%d", prevEnd, len(data)) |
| 155 | } |
| 156 | |
| 157 | // Pass 2: re-lex the same input — token stream must be identical |
| 158 | l2 := New("j", data) |
| 159 | for _, expected := range tokens { |
| 160 | tok := l2.Next() |
| 161 | if tok.Type != expected.Type || tok.Literal != expected.Literal { |
| 162 | t.Fatalf("re-lex mismatch at offset %d: expected (%s %q), got (%s %q)", |
| 163 | expected.Span.Start.Offset, expected.Type, expected.Literal, tok.Type, tok.Literal) |
| 164 | } |
| 165 | } |
| 166 | }) |
| 167 | } |