17 files changed,
339 insertions(+),
40 deletions(-)
Author:
Oleksandr Smirnov
olexsmir@gmail.com
Committed at:
2026-05-23 17:52:23 +0300
Authored at:
2026-05-18 17:31:59 +0300
Change ID:
vzlwtmvkrklttywvonxvywlywrwskupl
Parent:
0c4384e
jump to
A
journal/journal_test.go
··· 1 +package journal_test 2 + 3 +import ( 4 + "path/filepath" 5 + "testing" 6 + 7 + "olexsmir.xyz/clerk/journal" 8 +) 9 + 10 +type test struct { 11 + desc string 12 + err bool // file has intentional syntax errors, parser should report them 13 +} 14 + 15 +var tests = map[string]test{ 16 + "actual-1ktxns-100accts.journal": {desc: "hledger: stress test: 1000 transactions, 100 accounts, number-only account names"}, 17 + "actual-accounttypes.journal": {desc: "hledger: account type annotations (type:A, type:L) via comments"}, // todo: tags are not supported yet 18 + "actual-alias.journal": {desc: "hledger: account alias directives for renaming"}, 19 + "actual-borrowing.journal": {desc: "hledger: borrowing/lending example with liabilities"}, 20 + "actual-business.journal": {desc: "hledger: simple business transactions with commodities"}, 21 + "actual-goal-budget.journal": {desc: "hledger: goal budget using periodic transactions"}, 22 + "actual-i18n-en.journal": {desc: "hledger: internationalization with account types in English"}, 23 + "actual-ledger-baseline-opt-lots-basis.dat": {desc: "ledger: G/S prefixes"}, 24 + "actual-ledger-input-divzero.dat": {desc: "ledger: fuzz corpus, designed to cause divide-by-zero"}, 25 + "actual-ledger-input-parsing.dat": {desc: "ledger: fuzz corpus, tests EOF without newline"}, 26 + "actual-ledger-input-sample.dat": {desc: "ledger: fuzz corpus, default commodity directive"}, 27 + "actual-ledger-input-standard.dat": {desc: "ledger: fuzz corpus, standard ledger format"}, 28 + "actual-ledger-input-transfer.dat": {desc: "ledger: fuzz corpus, byte quantity (non-monetary)"}, 29 + "actual-ledger-input-wow.dat": {desc: "ledger-cli: fuzz corpus, World of Warcraft currency (1G=100s)"}, 30 + "actual-multicurrency.journal": {desc: "hledger: multi-currency transactions with HRK/EUR"}, 31 + "actual-personal.journal": {desc: "hledger: simple personal finance example"}, 32 + "actual-quickstart.journal": {desc: "hledger: quickstart guide with commodity directive"}, 33 + "actual-sample.journal": {desc: "hledger: comprehensive sample with account tree"}, 34 + "actual-sample2.journal": {desc: "hledger: sample2 with balance assertions and account directives"}, 35 + "actual-status.journal": {desc: "hledger: tests all transaction statuses (unmarked, pending, cleared)"}, 36 + "actual-templates.journal": {desc: "hledger: entry template examples with comments"}, 37 + "actual-unicode.journal": {desc: "hledger: unicode in descriptions, account names, currency"}, 38 + "actual-vat.journal": {desc: "hledger: VAT tracking example"}, 39 + "apply-tag-block.dat": {desc: "ledger: apply-tag block directive"}, 40 + "automated-posting-rule.dat": {desc: "ledger: automated posting rules (= /^Expenses/)"}, 41 + "basic-ledger.dat": {desc: "ledger: basic income/expense transaction"}, 42 + "basic.journal": {desc: "hledger: minimal setup with account/commodity directives"}, 43 + "broken-double-at.journal": {err: true, desc: "synthetic: intentionally broken (@@) syntax"}, 44 + "broken-rparen.journal": {err: true, desc: "synthetic: intentionally broken unmatched )"}, 45 + "broken-unknown-directive.journal": {err: true, desc: "synthetic: intentionally broken unknown directive (??)"}, 46 + "code-note.dat": {desc: "ledger: transaction with code, payee, comments"}, 47 + "commodity-space.dat": {desc: "ledger: commodity with space before amount ($ 10.00)"}, 48 + "cost-balance-assertion.dat": {desc: "ledger: cost notation and balance assertion (@ 1 USD = 20.00 UAH)"}, 49 + "directives-supported.journal": {desc: "mixed: tests account, commodity, include, alias directives"}, 50 + "ext-hledger-i18n-no.journal": {desc: "hledger i18n example: uppercase directive values currently mis-tokenized"}, 51 + "ext-hledger-self-tracking-d.dat": {desc: "hledger self-tracking example with date+time transaction headers"}, 52 + "ext-hledger-status.journal": {desc: "hledger status example: ! (virtual:posting)"}, 53 + "ext-ledger-parsing.dat": {desc: "ledger parsing corpus: -$ amount form"}, 54 + "header-comments.journal": {desc: "hledger: transaction with header comment"}, 55 + "inclusive-balance-star.journal": {desc: "hledger: inclusive balance with ==*"}, 56 + "multicurrency-supported.journal": {desc: "hledger: working multi-currency with EUR exchange"}, 57 + "periodic-basic.journal": {desc: "hledger: periodic transaction (~ monthly)"}, 58 + "secondary-date-note.journal": {desc: "hledger: secondary date and transaction note"}, 59 + "status-basic.journal": {desc: "hledger: transaction status (pending with !)"}, 60 + "unicode-cjk-emoji.journal": {desc: "synthetic: unicode CJK and emoji in transaction descriptions"}, 61 + "unicode-cyrillic.journal": {desc: "synthetic: unicode Cyrillic in descriptions and account names"}, 62 + "unicode-mixed-languages.journal": {desc: "synthetic: mixed latin/cyrillic/cjk in descriptions and account names"}, 63 + "virtual-posting.dat": {desc: "ledger: virtual/balanced postings with [brackets]"}, 64 +} 65 + 66 +func TestParserOnRealJournals(t *testing.T) { 67 + for tname, tt := range tests { 68 + t.Run(tname, func(t *testing.T) { 69 + loader := journal.NewLoader() 70 + pf, err := loader.Load(filepath.Join("testdata/journals", tname)) 71 + if err != nil { 72 + t.Fatalf("load err: %v", err) 73 + } 74 + 75 + if tt.err { 76 + if len(pf.Errors)+len(pf.FileErrors) == 0 { 77 + t.Errorf("expected parse errors but got none") 78 + } 79 + return 80 + } 81 + 82 + for _, e := range pf.Errors { 83 + t.Errorf("parse error: %s", e.Message) 84 + } 85 + for _, e := range pf.FileErrors { 86 + t.Errorf("file error [%s]: %s", e.Path, e.Message) 87 + } 88 + }) 89 + } 90 +}
M
journal/lexer/lexer.go
··· 121 121 return l.lexSingle(token.MINUS) 122 122 case l.ch == '.': 123 123 return l.lexSingle(token.TEXT) 124 + case l.ch == '!': 125 + return l.lexSingle(token.BANG) 126 + case l.ch == '@': 127 + return l.lexSingle(token.AT) 124 128 case l.isAlpha(): 125 129 return l.lexKeyword() 126 130 case l.isDigit(): ··· 191 195 return l.lexSingle(token.MINUS) 192 196 case '=': 193 197 return l.lexEquals() 198 + case '"', '\'': 199 + return l.lexString() 194 200 default: // description / payee 195 201 if l.isDate() { // secondsry date after = 196 202 return l.lexDate() ··· 317 323 return l.lexSingle(token.MINUS) 318 324 case '.': 319 325 return l.lexSingle(token.TEXT) 326 + case '"', '\'': 327 + return l.lexString() 320 328 default: 321 329 if l.isCommodityStart() { 322 330 return l.lexCommodityMark() ··· 444 452 445 453 func (l *Lexer) lexNumber() token.Token { 446 454 s := l.save() 447 - for l.isDigit() || l.ch == '.' || l.ch == ',' || l.ch == '_' { 448 - l.advance() 455 + for { 456 + if l.isDigit() || l.ch == '.' || l.ch == ',' || l.ch == '_' || l.ch == '\'' { 457 + l.advance() 458 + } else if l.ch == ' ' && (l.peek() >= '0' && l.peek() <= '9') { 459 + l.advance() 460 + } else { 461 + break 462 + } 449 463 } 450 464 lit := string(l.input[s.offset:l.pos]) 451 465 kind := token.INT 452 - if strings.ContainsAny(lit, ".,") { 466 + if strings.ContainsAny(lit, "., ") { 453 467 kind = token.DECIMAL 454 468 } 455 469 return token.Token{Type: kind, Literal: lit, Span: l.span(s)} ··· 478 492 return token.Token{Type: token.DATE, Literal: string(l.input[s.offset:l.pos]), Span: l.span(s)} 479 493 } 480 494 495 +func isSymbolChar(r rune) bool { 496 + return r == '$' || unicode.In(r, unicode.Sc) 497 +} 498 + 499 +func (l *Lexer) lexString() token.Token { 500 + s := l.save() 501 + quote := l.ch 502 + l.advance() // consume the quote character 503 + for l.ch != quote && l.ch != '\n' && l.ch != 0 { 504 + l.advance() 505 + } 506 + if l.ch == quote { 507 + l.advance() 508 + } 509 + return token.Token{Type: token.STRING, Literal: string(l.input[s.offset:l.pos]), Span: l.span(s)} 510 +} 511 + 481 512 func (l *Lexer) lexCommodityMark() token.Token { 482 513 s := l.save() 483 514 ··· 494 525 495 526 if unicode.IsLetter(l.ch) { 496 527 for unicode.IsLetter(l.ch) || unicode.IsDigit(l.ch) { 528 + l.advance() 529 + } 530 + return token.Token{Type: token.COMMODITYMARK, Literal: string(l.input[s.offset:l.pos]), Span: l.span(s)} 531 + } 532 + 533 + if isSymbolChar(l.ch) { 534 + for isSymbolChar(l.ch) { 497 535 l.advance() 498 536 } 499 537 return token.Token{Type: token.COMMODITYMARK, Literal: string(l.input[s.offset:l.pos]), Span: l.span(s)}
M
journal/parser/parser.go
··· 36 36 return f 37 37 } 38 38 39 +func isDirectiveKeyword(t token.Type) bool { 40 + switch t { 41 + case token.COMMENTKW, token.ACCOUNT, token.COMMODITY, token.INCLUDE, 42 + token.ALIAS, token.PAYEE, token.TAG, token.APPLY, token.END, 43 + token.YEAR, token.DECIMALMARK, token.D, token.P, token.N: 44 + return true 45 + } 46 + return false 47 +} 48 + 39 49 func (p *Parser) parseEntry() ast.Entry { 50 + if p.got(token.BANG) || p.got(token.AT) { 51 + if isDirectiveKeyword(p.peek.Type) { 52 + p.advance() // consume prefix 53 + } 54 + } 40 55 switch p.cur.Type { 41 56 case token.ILLEGAL: 42 57 p.errorf("illegal character %q", p.cur.Literal) ··· 122 137 } 123 138 124 139 // optional payee | note 125 - if p.got(token.TEXT) { 140 + if p.got(token.TEXT) || p.got(token.STRING) { 126 141 tx.Payee = p.parsePayee() 127 142 128 143 // check for | separator ··· 164 179 return tx 165 180 } 166 181 182 +func unquote(s string) string { 183 + if len(s) >= 2 && ((s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'')) { 184 + return s[1 : len(s)-1] 185 + } 186 + return s 187 +} 188 + 167 189 func (p *Parser) parsePayee() *ast.Payee { 168 190 s := p.cur.Span 191 + 192 + if p.got(token.STRING) { 193 + name := unquote(p.cur.Literal) 194 + p.advance() 195 + return &ast.Payee{Name: name, Span: p.span(s)} 196 + } 169 197 170 198 // keep spaces/tags between text tokens; stop before trailing whitespace 171 199 var name strings.Builder 172 - for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) { 200 + for p.got(token.TEXT) || p.got(token.INT) || p.got(token.DECIMAL) || (p.got(token.WHITESPACE) && (p.willGet(token.TEXT) || p.willGet(token.INT) || p.willGet(token.DECIMAL))) { 173 201 _, _ = name.WriteString(p.cur.Literal) 174 202 p.advance() 175 203 } 176 - return &ast.Payee{Name: name.String(), Span: p.span(s)} 204 + return &ast.Payee{Name: unquote(name.String()), Span: p.span(s)} 177 205 } 178 206 179 207 func (p *Parser) parsePeriodicTransaction() *ast.PeriodicTransaction { ··· 322 350 account := p.parseAccount() 323 351 comment := p.parseOptInlineComment() 324 352 p.expectNewline() 353 + 354 + for p.got(token.INDENT) { 355 + p.advance() 356 + for !p.got(token.NEWLINE) && !p.got(token.EOF) { 357 + p.advance() 358 + } 359 + p.expectNewline() 360 + } 361 + 325 362 return &ast.AccountDirective{ 326 363 Account: account, 327 364 Comment: comment, ··· 363 400 comment := p.parseOptInlineComment() 364 401 p.expectNewline() 365 402 403 + for p.got(token.INDENT) { 404 + p.advance() 405 + p.skipWhitespace() 406 + if p.got(token.COMMODITYMARK) && p.cur.Literal == "format" { 407 + p.advance() 408 + p.skipWhitespace() 409 + format = p.parseAmount() 410 + } else { 411 + for !p.got(token.NEWLINE) && !p.got(token.EOF) { 412 + p.advance() 413 + } 414 + } 415 + p.expectNewline() 416 + } 417 + 366 418 cd := &ast.CommodityDirective{ 367 419 Commodity: commodity, 368 420 Comment: comment, ··· 420 472 p.skipWhitespace() 421 473 422 474 name := "" 423 - if p.got(token.TEXT) { 475 + if p.got(token.TEXT) || p.got(token.STRING) { 424 476 name = p.parsePayee().Name 425 477 } 426 478 ··· 1194 1246 } 1195 1247 1196 1248 func detectFormat(lit string) ast.QuantityFormat { 1197 - // find all separator positions 1198 1249 var separators []int 1199 1250 for i, ch := range []byte(lit) { 1200 - if ch == '.' || ch == ',' { 1251 + if ch == '.' || ch == ',' || ch == ' ' || ch == '_' || ch == '\'' { 1201 1252 separators = append(separators, i) 1202 1253 } 1203 1254 } 1204 1255 1205 1256 if len(separators) == 0 { 1206 - // "1000" — no separators, integer 1207 1257 return ast.QuantityFormat{Decimal: '.', Thousands: 0, Precision: 0} 1208 1258 } 1209 1259 ··· 1212 1262 precision := 0 1213 1263 1214 1264 if len(separators) == 1 { 1215 - // "10.00" or "10,00" — single separator is the decimal mark 1216 1265 pos := separators[0] 1217 - decimal = lit[pos] 1218 - precision = len(lit) - pos - 1 1266 + sepChar := lit[pos] 1267 + if sepChar == ' ' || sepChar == '_' || sepChar == '\'' { 1268 + thousands = sepChar 1269 + decimal = '.' // default 1270 + precision = 0 1271 + } else { 1272 + decimal = sepChar 1273 + precision = len(lit) - pos - 1 1274 + } 1219 1275 } else { 1220 - // "1,000.00" or "1.000,00" — last separator is decimal, first is thousands 1221 1276 last := separators[len(separators)-1] 1222 1277 decimal = lit[last] 1223 1278 thousands = lit[separators[0]]
M
journal/parser/parser_test.go
··· 43 43 `}, 44 44 {"account directive", "account expenses:food\n"}, 45 45 {"account directive with comment", "account expenses:food ; my account\n"}, 46 + {"account with subdirectives", `account expenses:food 47 + note some note 48 + alias food ; this gets ignored 49 +`}, 46 50 {"comodity directive", "commodity $\n"}, 47 51 {"comodity directive word", "commodity UAH\n"}, 48 52 {"comodity directive no space", "commodity $1000.00\n"}, 49 53 {"commodity quantity first", "commodity 1,000.00 UAH\n"}, 50 54 {"commodity quantity after", "commodity UAH 1,000.00\n"}, 51 - {"payee directive with spaces", "payee grocery store\n"}, 55 + {"commodity with subdirectives", `commodity UAH 56 + format 1 000.00 UAH 57 + note Божествена Гривня ; this gets ignored 58 +`}, 59 + {"payee directive", `payee grocery store 60 +payee 'grocery store 3' 61 +payee "grocery store 2" 62 +payee grocery store 1 63 +`}, 52 64 {"transaction", "2024/01/01\n"}, 53 65 {"automated transaction", `= ^income 54 66 (liabilities:tax) *.33 ··· 109 121 expenses:food ₹700.00 110 122 assets:checking 111 123 `}, 124 + {"transaction with strange commodity symbols", `2024-01-01 groceries 125 +2026-05-20 126 + asdf 123 $€£ 127 + asdf2 128 + 129 +2026-05-20 130 + asdf 123 bytes 131 + asdf2 132 +`}, 133 + 112 134 {"transaction with tabs", `2024-01-01 groceries 113 135 expenses:food $10.00 114 136 assets:checking ··· 229 251 @bad2 230 252 @bad3 231 253 assets:checking 254 +`}, 255 + {"quoted payee names", `2024-01-01 "groceries store" 256 + expenses:food $10 257 + assets:checking 258 +`}, 259 + {"digit group marks", `2024-01-01 groceries 260 + expenses:food 1 000.00 UAH 261 + expenses:supplies 1'000.00 USD 262 + assets:checking -2_000.00 USD 263 +`}, 264 + {"directive prefixes", `!account expenses:food 265 +@commodity USD 232 266 `}, 233 267 {"bad between good", `2024/01/01 groceries 234 268 expenses:food $10
A
journal/parser/testdata/golden/Parser_ParseFile__account_with_subdirectives.golden
··· 1 +Journal 2 + AccountDirective j:1:1-4:1 3 + Account "expenses:food" j:1:9-2:0
A
journal/parser/testdata/golden/Parser_ParseFile__commodity_with_subdirectives.golden
··· 1 +Journal 2 + CommodityDirective j:1:1-4:1 3 + Commodity: "UAH"
A
journal/parser/testdata/golden/Parser_ParseFile__digit_group_marks.golden
··· 1 +Journal 2 + Transaction j:1:1-5:1 3 + Date: 2024-01-01 4 + Payee: "groceries" j:1:12-2:0 5 + Posting j:2:1-3:1 6 + Account "expenses:food" j:2:3-2:16 7 + Amount j:2:18-2:18 8 + Quantity: 1000 9 + Commodity: "UAH" 10 + CommodityPos: After 11 + HasSpace: true 12 + Precision: 2 13 + Decimal: "." 14 + Thousands: " " 15 + Posting j:3:1-4:1 16 + Account "expenses:supplies" j:3:3-3:20 17 + Amount j:3:22-3:22 18 + Quantity: 1000 19 + Commodity: "USD" 20 + CommodityPos: After 21 + HasSpace: true 22 + Precision: 2 23 + Decimal: "." 24 + Thousands: "'" 25 + Posting j:4:1-5:1 26 + Account "assets:checking" j:4:3-4:18 27 + Amount j:4:20-4:20 28 + Quantity: -2000 29 + Commodity: "USD" 30 + CommodityPos: After 31 + HasSpace: true 32 + Precision: 2 33 + Decimal: "." 34 + Thousands: "_"
M
journal/parser/testdata/golden/Parser_ParseFile__illegal_between_transactions.golden
··· 14 14 Posting j:3:1-4:1 15 15 Account "assets:checking" j:3:5-4:0 16 16 Amount: <elided> 17 - BlankLine j:5:0-5:1 18 17 Transaction j:5:1-8:1 19 18 Date: 2024/01/02 20 19 Payee: "salary" j:5:12-6:0 ··· 31 30 Account "assets:checking" j:7:5-8:0 32 31 Amount: <elided> 33 32 Errors 34 - j:4:1-4:2: illegal character "@" 35 - j:4:2-4:3: illegal character "@" 36 - j:4:3-5:0: illegal character "@" 33 + j:4:1-4:2: unexpected token AT
A
journal/parser/testdata/golden/Parser_ParseFile__payee_directive.golden
··· 1 +Journal 2 + PayeeDirective j:1:1-2:1 3 + Name: "grocery store" 4 + PayeeDirective j:2:1-3:1 5 + Name: "grocery store 3" 6 + PayeeDirective j:3:1-4:1 7 + Name: "grocery store 2" 8 + PayeeDirective j:4:1-5:1 9 + Name: "grocery store 1"
D
journal/parser/testdata/golden/Parser_ParseFile__payee_directive_with_spaces.golden
··· 1 -Journal 2 - PayeeDirective j:1:1-2:1 3 - Name: "grocery store"
A
journal/parser/testdata/golden/Parser_ParseFile__quoted_payee_names.golden
··· 1 +Journal 2 + Transaction j:1:1-4:1 3 + Date: 2024-01-01 4 + Payee: "groceries store" j:1:12-2:0 5 + Posting j:2:1-3:1 6 + Account "expenses:food" j:2:3-2:16 7 + Amount j:2:18-2:18 8 + Quantity: 10 9 + Commodity: "$" 10 + CommodityPos: Before 11 + HasSpace: false 12 + Precision: 0 13 + Decimal: "." 14 + Posting j:3:1-4:1 15 + Account "assets:checking" j:3:3-4:0 16 + Amount: <elided>
A
journal/parser/testdata/golden/Parser_ParseFile__transaction_with_strange_commodity_symbols.golden
··· 1 +Journal 2 + Transaction j:1:1-2:1 3 + Date: 2024-01-01 4 + Payee: "groceries" j:1:12-2:0 5 + Transaction j:2:1-6:0 6 + Date: 2026-05-20 7 + Posting j:3:1-4:1 8 + Account "asdf" j:3:3-3:7 9 + Amount j:3:9-3:9 10 + Quantity: 123 11 + Commodity: "$€£" 12 + CommodityPos: After 13 + HasSpace: true 14 + Precision: 0 15 + Decimal: "." 16 + Posting j:4:1-6:0 17 + Account "asdf2" j:4:3-5:0 18 + Amount: <elided> 19 + BlankLine j:6:0-6:1 20 + Transaction j:6:1-9:1 21 + Date: 2026-05-20 22 + Posting j:7:1-8:1 23 + Account "asdf" j:7:3-7:7 24 + Amount j:7:9-7:9 25 + Quantity: 123 26 + Commodity: "bytes" 27 + CommodityPos: After 28 + HasSpace: true 29 + Precision: 0 30 + Decimal: "." 31 + Posting j:8:1-9:1 32 + Account "asdf2" j:8:3-9:0 33 + Amount: <elided>