all repos

clerk @ e586ae2516f828afd5a62156055812e727a17d1c

missing tooling for ledger/hledger

clerk/journal/parser/parser.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
lexer & parser & ast..., 13 days ago
1
package parser
2
3
import (
4
	"fmt"
5
	"strconv"
6
	"strings"
7
8
	"github.com/olexsmir/ledger-tools/journal/ast"
9
	"github.com/olexsmir/ledger-tools/journal/lexer"
10
	"github.com/olexsmir/ledger-tools/journal/token"
11
	"github.com/shopspring/decimal"
12
)
13
14
type Parser struct {
15
	lexer  *lexer.Lexer
16
	errors []*ast.ParseError
17
	cur    token.Token
18
	peek   token.Token
19
}
20
21
func New(lex *lexer.Lexer) *Parser {
22
	p := &Parser{lexer: lex}
23
	p.advance() // populate .peek
24
	p.advance() // populate .cur
25
	return p
26
}
27
28
func (p *Parser) ParseJournal() *ast.Journal {
29
	f := &ast.Journal{}
30
	for p.cur.Type != token.EOF {
31
		if e := p.parseEntry(); e != nil {
32
			f.Entries = append(f.Entries, e)
33
		}
34
	}
35
	f.Errors = p.errors
36
	return f
37
}
38
39
func (p *Parser) parseEntry() ast.Entry {
40
	switch p.cur.Type {
41
	case token.ILLEGAL:
42
		p.errorf("illegal character %q", p.cur.Literal)
43
		p.advance()
44
		return nil
45
	case token.INDENT:
46
		p.errorf("unexpected indent")
47
		p.syncToNextline()
48
		return nil
49
	case token.DATE:
50
		return p.parseTransaction()
51
	case token.TILDE:
52
		return p.parsePeriodicTransaction()
53
	case token.EQ:
54
		return p.parseAutomatedTransaction()
55
	case token.NEWLINE:
56
		return p.parseBlankLine()
57
	case token.SEMICOLON, token.HASH, token.PERCENT, token.STAR:
58
		return p.parseComment()
59
	case token.ACCOUNT:
60
		return p.parseAccountDirective()
61
	case token.COMMODITY:
62
		return p.parseCommodityDirective()
63
	case token.INCLUDE:
64
		return p.parseIncludeDirective()
65
	case token.ALIAS:
66
		return p.parseAliasDirective()
67
	case token.PAYEE:
68
		return p.parsePayeeDirective()
69
	case token.TAG:
70
		return p.parseTagDirective()
71
	case token.YEAR:
72
		return p.parseYearDirective()
73
	case token.DECIMALMARK:
74
		return p.parseDecimalMarkDirective()
75
	case token.D:
76
		return p.parseDefaultCommodityDirective()
77
	case token.P:
78
		return p.parseMarketPriceDirective()
79
	case token.N:
80
		return p.parseIgnoredDirective()
81
	case token.APPLY:
82
		return p.parseApplyDirective()
83
	case token.END:
84
		return p.parseEndDirective()
85
	case token.COMMENTKW:
86
		return p.parseCommentBlockDirective()
87
	default:
88
		p.errorf("unexpected token %s", p.cur.Type)
89
		p.sync()
90
		return nil
91
	}
92
}
93
94
func (p *Parser) parseTransaction() *ast.Transaction {
95
	s := p.cur.Span
96
	tx := &ast.Transaction{}
97
98
	tx.Date = p.parseDate()
99
100
	// optional secondary date
101
	if p.got(token.EQ) {
102
		p.advance()
103
		d := p.parseDate()
104
		tx.SecondDate = &d
105
	}
106
107
	p.skipWhitespace()
108
109
	// optional status
110
	tx.Status = p.parseStatus()
111
112
	// optional code
113
	if p.got(token.LPAREN) {
114
		p.advance()
115
		var code strings.Builder
116
		for p.cur.Type != token.RPAREN {
117
			_, _ = code.WriteString(p.cur.Literal)
118
			p.advance()
119
		}
120
		tx.Code = new(code.String())
121
		p.skipWhitespace()
122
	}
123
124
	// optional payee | note
125
	if p.got(token.TEXT) {
126
		tx.Payee = p.parsePayee()
127
128
		// check for | separator
129
		if p.got(token.WHITESPACE) {
130
			p.skipWhitespace()
131
		}
132
133
		if p.got(token.PIPE) {
134
			p.advance()
135
			p.skipWhitespace()
136
137
			var note strings.Builder
138
			for p.got(token.TEXT) || p.got(token.WHITESPACE) {
139
				_, _ = note.WriteString(p.cur.Literal)
140
				p.advance()
141
			}
142
			tx.Note = new(note.String())
143
		}
144
	}
145
146
	tx.Comment = p.parseOptInlineComment()
147
	p.expectNewline()
148
149
	// header comments — indented ; lines before first posting
150
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
151
		p.advance() // consume indent
152
		c := p.parseComment()
153
		tx.HeaderComments = append(tx.HeaderComments, *c)
154
	}
155
156
	// postings
157
	for p.got(token.INDENT) {
158
		if p := p.parsePosting(); p != nil {
159
			tx.Postings = append(tx.Postings, p)
160
		}
161
	}
162
163
	tx.Span = p.span(s)
164
	return tx
165
}
166
167
func (p *Parser) parsePayee() *ast.Payee {
168
	s := p.cur.Span
169
170
	// keep spaces/tags between text tokens; stop before trailing whitespace
171
	var name strings.Builder
172
	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {
173
		_, _ = name.WriteString(p.cur.Literal)
174
		p.advance()
175
	}
176
	return &ast.Payee{Name: name.String(), Span: p.span(s)}
177
}
178
179
func (p *Parser) parsePeriodicTransaction() *ast.PeriodicTransaction {
180
	s := p.cur.Span
181
	p.expect(token.TILDE)
182
	p.skipWhitespace()
183
184
	pt := &ast.PeriodicTransaction{}
185
186
	pt.Span = p.span(s)
187
	pt.Period = p.parsePeriod()
188
189
	if desc := p.parseOptPeriodicDescription(); desc != "" {
190
		pt.Description = &desc
191
	}
192
193
	comment := p.parseOptInlineComment()
194
	p.expectNewline()
195
196
	var headerComments []*ast.Comment
197
	var postings []*ast.Posting
198
	for p.got(token.INDENT) || p.got(token.SEMICOLON) {
199
		if p.got(token.SEMICOLON) {
200
			c := p.parseComment()
201
			headerComments = append(headerComments, c)
202
			continue
203
		}
204
		posting := p.parsePosting()
205
		if posting != nil {
206
			postings = append(postings, posting)
207
		}
208
	}
209
210
	pt.HeaderComments = headerComments
211
	pt.Postings = postings
212
	pt.Comment = comment
213
	return pt
214
}
215
216
func (p *Parser) parseAutomatedTransaction() *ast.AutomatedTransaction {
217
	s := p.cur.Span
218
	p.expect(token.EQ)
219
	p.skipWhitespace()
220
221
	at := &ast.AutomatedTransaction{}
222
	at.Span = p.span(s)
223
224
	at.Expr = p.parseDirectiveExpr()
225
	at.Comment = p.parseOptInlineComment()
226
	p.expectNewline()
227
228
	// header comments
229
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
230
		p.advance()
231
		at.HeaderComments = append(at.HeaderComments, p.parseComment())
232
	}
233
234
	// postings
235
	for p.got(token.INDENT) {
236
		if p := p.parsePosting(); p != nil {
237
			at.Postings = append(at.Postings, p)
238
		}
239
	}
240
241
	return at
242
}
243
244
func (p *Parser) parsePeriod() *ast.Period {
245
	s := p.cur.Span
246
247
	var periodBuf strings.Builder
248
249
	for !p.got(token.NEWLINE) && !p.got(token.EOF) &&
250
		!p.got(token.SEMICOLON) && !p.got(token.HASH) && !p.got(token.PERCENT) && !p.got(token.STAR) {
251
252
		if p.got(token.WHITESPACE) {
253
			if len(p.cur.Literal) >= 2 {
254
				break
255
			}
256
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) ||
257
				p.willGet(token.SEMICOLON) || p.willGet(token.HASH) ||
258
				p.willGet(token.PERCENT) || p.willGet(token.STAR) {
259
				p.advance()
260
				continue
261
			}
262
		}
263
264
		periodBuf.WriteString(p.cur.Literal)
265
		p.advance()
266
	}
267
268
	str := periodBuf.String()
269
	period := &ast.Period{Raw: str, Span: p.span(s)}
270
271
	if _, after, ok := strings.Cut(str, " from "); ok {
272
		end := strings.Index(after, " ")
273
		dateStr := after
274
		if end >= 0 {
275
			dateStr = after[:end]
276
		}
277
		if d := parseSimpleDate(dateStr); d.Year > 0 {
278
			period.From = &d
279
			rest := after
280
			if end >= 0 {
281
				rest = after[end:]
282
			}
283
			if _, toAfter, ok := strings.Cut(rest, " to "); ok {
284
				if toEnd := strings.Index(toAfter, " "); toEnd >= 0 {
285
					toAfter = toAfter[:toEnd]
286
				}
287
				if d := parseSimpleDate(toAfter); d.Year > 0 {
288
					period.To = &d
289
				}
290
			}
291
		}
292
	}
293
	return period
294
}
295
296
func (p *Parser) parseComment() *ast.Comment {
297
	s := p.cur.Span
298
	marker := p.cur.Literal[0]
299
	p.advance()
300
	p.skipWhitespace()
301
302
	var text string
303
	if p.got(token.TEXT) {
304
		text = p.cur.Literal
305
		p.advance()
306
	}
307
308
	p.expectNewline()
309
310
	return &ast.Comment{
311
		Marker: marker,
312
		Text:   text,
313
		Span:   p.span(s),
314
	}
315
}
316
317
func (p *Parser) parseAccountDirective() *ast.AccountDirective {
318
	s := p.cur.Span
319
	p.expect(token.ACCOUNT)
320
	p.skipWhitespace()
321
322
	account := p.parseAccount()
323
	comment := p.parseOptInlineComment()
324
	p.expectNewline()
325
	return &ast.AccountDirective{
326
		Account: account,
327
		Comment: comment,
328
		Span:    p.span(s),
329
	}
330
}
331
332
func (p *Parser) parseCommodityDirective() *ast.CommodityDirective {
333
	s := p.cur.Span
334
	p.expect(token.COMMODITY)
335
	p.skipWhitespace()
336
337
	var commodity string
338
	var format *ast.Amount
339
340
	switch p.cur.Type {
341
	case token.TEXT, token.INT, token.DECIMAL:
342
		format = p.parseAmount()
343
		commodity = format.Commodity
344
	case token.COMMODITYMARK:
345
		commodity = p.cur.Literal
346
		p.advance()
347
		hadSpace := p.got(token.WHITESPACE)
348
		p.skipWhitespace()
349
		if p.got(token.INT) || p.got(token.DECIMAL) || p.got(token.TEXT) {
350
			format = p.parseAmount()
351
			format.Commodity = commodity
352
			format.CommodityPos = ast.CommodityBefore
353
			format.HasSpace = hadSpace
354
		}
355
	default:
356
		p.errorf("expected commodity name or amount, got %s", p.cur.Type)
357
	}
358
359
	if commodity == "" {
360
		p.errorf("expected commodity name, got %s", p.cur.Type)
361
	}
362
363
	comment := p.parseOptInlineComment()
364
	p.expectNewline()
365
366
	return &ast.CommodityDirective{
367
		Commodity: commodity,
368
		Format:    format,
369
		Comment:   comment,
370
		Span:      p.span(s),
371
	}
372
}
373
374
func (p *Parser) parseIncludeDirective() *ast.IncludeDirective {
375
	s := p.cur.Span
376
	p.expect(token.INCLUDE)
377
	p.skipWhitespace()
378
379
	path := ""
380
	if p.got(token.TEXT) {
381
		path = p.cur.Literal
382
		p.advance()
383
	} else {
384
		p.errorf("expected file path, got %s", p.cur.Type)
385
	}
386
387
	comment := p.parseOptInlineComment()
388
	p.expectNewline()
389
390
	return &ast.IncludeDirective{
391
		Path:    path,
392
		Comment: comment,
393
		Span:    p.span(s),
394
	}
395
}
396
397
func (p *Parser) parseAliasDirective() *ast.AliasDirective {
398
	s := p.cur.Span
399
	p.expect(token.ALIAS)
400
	p.skipWhitespace()
401
402
	from := p.parseAccount().Name
403
	p.skipWhitespace()
404
	p.expect(token.EQ)
405
	p.skipWhitespace()
406
	to := p.parseAccount().Name
407
	p.skipWhitespace()
408
409
	p.expectNewline()
410
	return &ast.AliasDirective{
411
		From: from,
412
		To:   to,
413
		Span: p.span(s),
414
	}
415
}
416
417
func (p *Parser) parsePayeeDirective() *ast.PayeeDirective {
418
	s := p.cur.Span
419
	p.expect(token.PAYEE)
420
	p.skipWhitespace()
421
422
	name := ""
423
	if p.got(token.TEXT) {
424
		name = p.parsePayee().Name
425
	}
426
427
	comment := p.parseOptInlineComment()
428
	p.expectNewline()
429
430
	return &ast.PayeeDirective{
431
		Name:    name,
432
		Comment: comment,
433
		Span:    p.span(s),
434
	}
435
}
436
437
func (p *Parser) parseTagDirective() *ast.TagDirective {
438
	s := p.cur.Span
439
	p.expect(token.TAG)
440
	p.skipWhitespace()
441
442
	name := ""
443
	if p.got(token.TEXT) {
444
		name = p.cur.Literal
445
		p.advance()
446
	}
447
448
	comment := p.parseOptInlineComment()
449
	p.expectNewline()
450
451
	return &ast.TagDirective{
452
		Name:    name,
453
		Comment: comment,
454
		Span:    p.span(s),
455
	}
456
}
457
458
func (p *Parser) parseYearDirective() *ast.YearDirective {
459
	s := p.cur.Span
460
	p.expect(token.YEAR)
461
	p.skipWhitespace()
462
463
	year := 0
464
	if p.got(token.INT) {
465
		_, _ = fmt.Sscanf(p.cur.Literal, "%d", &year)
466
		p.advance()
467
	} else {
468
		p.errorf("expected year, got %s", p.cur.Type)
469
	}
470
471
	p.expectNewline()
472
	return &ast.YearDirective{
473
		Year: year,
474
		Span: p.span(s),
475
	}
476
}
477
478
func (p *Parser) parseDecimalMarkDirective() *ast.DecimalMarkDirective {
479
	s := p.cur.Span
480
	p.expect(token.DECIMALMARK)
481
	p.skipWhitespace()
482
483
	mark := byte('.')
484
	if p.got(token.TEXT) {
485
		if len(p.cur.Literal) > 0 {
486
			mark = p.cur.Literal[0]
487
		}
488
		p.advance()
489
	}
490
491
	p.expectNewline()
492
	return &ast.DecimalMarkDirective{
493
		Mark: mark,
494
		Span: p.span(s),
495
	}
496
}
497
498
func (p *Parser) parseDefaultCommodityDirective() *ast.DefaultCommodityDirective {
499
	s := p.cur.Span
500
	p.expect(token.D)
501
	p.skipWhitespace()
502
503
	amt := p.parseAmount()
504
	p.expectNewline()
505
506
	return &ast.DefaultCommodityDirective{
507
		Amount: *amt,
508
		Span:   p.span(s),
509
	}
510
}
511
512
func (p *Parser) parseIgnoredDirective() *ast.IgnoredDirective {
513
	s := p.cur.Span
514
	p.expect(token.N)
515
	p.skipWhitespace()
516
	if p.got(token.TEXT) || p.got(token.COMMODITYMARK) {
517
		p.advance()
518
	}
519
	p.parseOptInlineComment()
520
	p.expectNewline()
521
	return &ast.IgnoredDirective{Span: p.span(s)}
522
}
523
524
func (p *Parser) parseMarketPriceDirective() *ast.MarketPriceDirective {
525
	s := p.cur.Span
526
	p.expect(token.P)
527
	p.skipWhitespace()
528
529
	date := p.parseDate()
530
	p.skipWhitespace()
531
532
	var t *ast.Time
533
	if p.got(token.TIME) {
534
		tm := p.parseTime()
535
		t = &tm
536
		p.skipWhitespace()
537
	}
538
539
	tok, _ := p.expect(token.COMMODITYMARK)
540
	commodity := tok.Literal
541
	p.advance()
542
	p.skipWhitespace()
543
544
	amt := p.parseAmount()
545
	p.expectNewline()
546
547
	return &ast.MarketPriceDirective{
548
		DateTime:  ast.DateTime{Date: date, Time: t},
549
		Commodity: commodity,
550
		Amount:    *amt,
551
		Span:      p.span(s),
552
	}
553
}
554
555
func (p *Parser) parseTime() ast.Time {
556
	s := p.cur.Span
557
	tok, _ := p.expect(token.TIME)
558
	lit := tok.Literal
559
560
	parts := strings.Split(lit, ":")
561
	if len(parts) < 2 {
562
		p.errorf("invalid time format: %q", lit)
563
		return ast.Time{Span: p.span(s)}
564
	}
565
566
	hour, _ := strconv.Atoi(parts[0])
567
	minute, _ := strconv.Atoi(parts[1])
568
	second := 0
569
	if len(parts) > 2 {
570
		second, _ = strconv.Atoi(parts[2])
571
	}
572
573
	if hour < 0 || hour > 23 {
574
		p.errorf("invalid hour %d in time %q", hour, lit)
575
	}
576
	if minute < 0 || minute > 59 {
577
		p.errorf("invalid minute %d in time %q", minute, lit)
578
	}
579
	if second < 0 || second > 59 {
580
		p.errorf("invalid second %d in time %q", second, lit)
581
	}
582
583
	return ast.Time{
584
		Hour:   hour,
585
		Minute: minute,
586
		Second: second,
587
		Span:   p.span(s),
588
	}
589
}
590
591
func (p *Parser) parseApplyDirective() *ast.ApplyDirective {
592
	s := p.cur.Span
593
	p.expect(token.APPLY)
594
	p.skipWhitespace()
595
596
	expr := p.parseDirectiveExpr()
597
	comment := p.parseOptInlineComment()
598
	p.expectNewline()
599
600
	return &ast.ApplyDirective{
601
		Expr:    expr,
602
		Comment: comment,
603
		Span:    p.span(s),
604
	}
605
}
606
607
func (p *Parser) parseEndDirective() *ast.EndDirective {
608
	s := p.cur.Span
609
	p.expect(token.END)
610
	p.skipWhitespace()
611
612
	expr := p.parseDirectiveExpr()
613
	comment := p.parseOptInlineComment()
614
	p.expectNewline()
615
616
	return &ast.EndDirective{
617
		Expr:    expr,
618
		Comment: comment,
619
		Span:    p.span(s),
620
	}
621
}
622
623
func (p *Parser) parseCommentBlockDirective() *ast.CommentBlockDirective {
624
	start := p.cur.Span
625
	p.expect(token.COMMENTKW)
626
	p.skipWhitespace()
627
628
	header := p.parseDirectiveExpr()
629
	comment := p.parseOptInlineComment()
630
	p.expectNewline()
631
632
	var content strings.Builder
633
	for p.cur.Type != token.EOF {
634
		if p.got(token.END) {
635
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) {
636
				p.advance()
637
				p.expectNewline()
638
				break
639
			}
640
			if p.willGet(token.WHITESPACE) {
641
				endTok := p.cur
642
				p.advance()
643
				wsTok := p.cur
644
				p.advance()
645
				if p.got(token.TEXT) && p.cur.Literal == "comment" { // todo: this should check if it's an actual COMMENTKW token
646
					p.advance()
647
					p.parseDirectiveExpr()
648
					p.parseOptInlineComment()
649
					p.expectNewline()
650
					break
651
				}
652
				content.WriteString(endTok.Literal)
653
				content.WriteString(wsTok.Literal)
654
				continue
655
			}
656
		}
657
		content.WriteString(p.cur.Literal)
658
		p.advance()
659
	}
660
661
	return &ast.CommentBlockDirective{
662
		Header:  header,
663
		Content: content.String(),
664
		Comment: comment,
665
		Span:    p.span(start),
666
	}
667
}
668
669
func (p *Parser) parseStatus() *ast.Status {
670
	if p.got(token.STAR) || p.got(token.BANG) {
671
		status := ast.StatusPending
672
		if p.cur.Literal[0] == '*' {
673
			status = ast.StatusCleared
674
		}
675
		st := &ast.Status{Value: status, Span: p.cur.Span}
676
		p.advance()
677
		p.skipWhitespace()
678
		return st
679
	}
680
	return nil
681
}
682
683
func (p *Parser) isAmountStart() bool {
684
	switch p.cur.Type {
685
	default:
686
		return false
687
	case token.COMMODITYMARK, token.INT, token.DECIMAL, token.MINUS, token.PLUS, token.PARENEXPR:
688
		return true
689
	case token.TEXT:
690
		return len(p.cur.Literal) > 0 && p.cur.Literal[0] >= '0' && p.cur.Literal[0] <= '9'
691
	}
692
}
693
694
func (p *Parser) parseAmount() *ast.Amount {
695
	s := p.cur.Span
696
	amt := &ast.Amount{
697
		QuantityFmt: ast.QuantityFormat{Decimal: '.'},
698
		Span:        p.span(s),
699
	}
700
701
	// commodity before quantity: $10.00
702
	if p.got(token.COMMODITYMARK) {
703
		amt.Commodity = p.cur.Literal
704
		amt.CommodityPos = ast.CommodityBefore
705
		p.advance()
706
		if p.got(token.WHITESPACE) {
707
			amt.HasSpace = true
708
			p.skipWhitespace()
709
		}
710
		switch p.cur.Type {
711
		case token.MINUS:
712
			amt.IsNegative = true
713
			p.advance()
714
		case token.PLUS:
715
			p.advance()
716
		}
717
		p.parseQuantityInto(amt)
718
	} else {
719
		// optional sign
720
		switch p.cur.Type {
721
		case token.MINUS:
722
			amt.IsNegative = true
723
			p.advance()
724
		case token.PLUS:
725
			p.advance()
726
		}
727
728
		// commodity before quantity: -$120:
729
		if p.got(token.COMMODITYMARK) {
730
			amt.Commodity = p.cur.Literal
731
			amt.CommodityPos = ast.CommodityBefore
732
			p.advance()
733
			if p.got(token.WHITESPACE) {
734
				amt.HasSpace = true
735
				p.skipWhitespace()
736
			}
737
		}
738
739
		p.parseQuantityInto(amt)
740
741
		// commodity after quantity: 10.00 UAH (only if not set)
742
		if amt.Commodity == "" {
743
			switch p.cur.Type {
744
			case token.WHITESPACE:
745
				p.skipWhitespace()
746
				if p.got(token.COMMODITYMARK) {
747
					amt.HasSpace = true
748
					amt.Commodity = p.cur.Literal
749
					amt.CommodityPos = ast.CommodityAfter
750
					p.advance()
751
				}
752
			case token.COMMODITYMARK:
753
				amt.Commodity = p.cur.Literal
754
				amt.CommodityPos = ast.CommodityAfter
755
				p.advance()
756
			}
757
		}
758
	}
759
760
	return amt
761
}
762
763
func (p *Parser) parseAmountWithOptExpr() *ast.Amount {
764
	if p.got(token.STAR) {
765
		p.advance()
766
		p.skipWhitespace()
767
		amt := p.parseAmount()
768
		if amt != nil {
769
			amt.IsExpr = true
770
		}
771
		return amt
772
	}
773
	if p.got(token.PARENEXPR) {
774
		lit := p.cur.Literal
775
		amt := &ast.Amount{
776
			IsExpr:      true,
777
			QuantityFmt: ast.QuantityFormat{Decimal: '.'},
778
		}
779
		if len(lit) >= 2 && lit[0] == '(' && lit[len(lit)-1] == ')' {
780
			inner := lit[1 : len(lit)-1]
781
			i := 0
782
			for i < len(inner) && (inner[i] == ' ' || inner[i] == '\t') {
783
				i++
784
			}
785
			j := len(inner)
786
			for j > i && (inner[j-1] == ' ' || inner[j-1] == '\t') {
787
				j--
788
			}
789
			amt.Expr = inner[i:j]
790
		}
791
		amt.Span = p.cur.Span
792
		p.advance()
793
		return amt
794
	}
795
	return p.parseAmount()
796
}
797
798
func (p *Parser) parsePosting() *ast.Posting {
799
	s := p.cur.Span
800
	posting := &ast.Posting{}
801
	p.expect(token.INDENT)
802
803
	// exit if it's empty line
804
	if p.got(token.NEWLINE) || p.got(token.EOF) {
805
		p.syncToNextline()
806
		return nil
807
	}
808
809
	// optional status, outside of brackets, '! (account)'
810
	posting.Status = p.parseStatus()
811
812
	// detect virtual posting brackets
813
	switch p.cur.Type {
814
	case token.LPAREN:
815
		posting.Type = ast.PostingVirtualUnbalanced
816
		p.advance()
817
	case token.LBRACKET:
818
		posting.Type = ast.PostingVirtualBalanced
819
		p.advance()
820
	}
821
822
	// optional status, inside of brackets, '(* account)'
823
	if p.got(token.STAR) || p.got(token.BANG) {
824
		posting.Status = p.parseStatus()
825
	}
826
827
	// validate, must be account text
828
	if p.cur.Type != token.TEXT {
829
		p.errorf("expected account name, got %s", p.cur.Type)
830
		p.syncToNextline()
831
		return nil
832
	}
833
834
	posting.Account = p.parseAccount()
835
836
	// consume closing bracket
837
	switch p.cur.Type {
838
	case token.RPAREN:
839
		p.advance()
840
	case token.RBRACKET:
841
		p.advance()
842
	}
843
844
	// optional amount - after two spaces
845
	if p.got(token.WHITESPACE) {
846
		p.skipWhitespace()
847
		if p.isAmountStart() || p.got(token.STAR) {
848
			posting.Amount = p.parseAmountWithOptExpr()
849
		}
850
	}
851
852
	// optional cost '@' or '@@'
853
	if p.got(token.WHITESPACE) {
854
		p.skipWhitespace()
855
	}
856
	if p.got(token.AT) || p.got(token.ATAT) {
857
		posting.Cost = p.parseCost()
858
	}
859
860
	// optional balance assertion
861
	if p.got(token.WHITESPACE) {
862
		p.skipWhitespace()
863
	}
864
	if p.got(token.EQ) || p.got(token.EQEQ) || p.got(token.EQEQEQ) {
865
		posting.Balance = p.parseBalanceAssertion()
866
		p.skipWhitespace()
867
		if p.got(token.AT) || p.got(token.ATAT) {
868
			p.parseCost()
869
		}
870
	}
871
872
	posting.Comment = p.parseOptInlineComment()
873
	p.expectNewline()
874
875
	// continuation comments
876
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
877
		p.advance()
878
		c := p.parseComment()
879
		posting.Comments = append(posting.Comments, *c)
880
	}
881
882
	posting.Span = p.span(s)
883
	return posting
884
}
885
886
func (p *Parser) parseCost() *ast.Cost {
887
	s := p.cur.Span
888
	isTotal := p.got(token.ATAT)
889
	p.advance() // consume '@' '@@'
890
	p.skipWhitespace()
891
	return &ast.Cost{
892
		IsTotal: isTotal,
893
		Amount:  p.parseAmount(),
894
		Span:    p.span(s),
895
	}
896
}
897
898
func (p *Parser) parseBalanceAssertion() *ast.BalanceAssertion {
899
	s := p.cur.Span
900
901
	ba := &ast.BalanceAssertion{}
902
	switch p.cur.Type {
903
	case token.EQ: // basic assertion
904
	case token.EQEQ:
905
		ba.IsStrict = true
906
	case token.EQEQEQ:
907
		ba.IsStrict = true
908
		ba.IsInclusive = true
909
	}
910
	p.advance()
911
	p.skipWhitespace()
912
913
	ba.Amount = *p.parseAmount()
914
	ba.Span = p.span(s)
915
	return ba
916
}
917
918
func (p *Parser) parseAccount() ast.Account {
919
	s := p.cur.Span
920
	var name strings.Builder
921
922
	switch p.cur.Type {
923
	case token.TEXT:
924
		_, _ = name.WriteString(p.cur.Literal)
925
		p.advance()
926
		if p.got(token.WHITESPACE) && p.willGet(token.TEXT) && p.peek.Literal[0] != '(' {
927
			_, _ = name.WriteString(" ")
928
			p.advance()
929
			_, _ = name.WriteString(p.cur.Literal)
930
			p.advance()
931
		}
932
	case token.COMMODITYMARK:
933
		_, _ = name.WriteString(p.cur.Literal)
934
		p.advance()
935
		for p.got(token.TEXT) {
936
			_, _ = name.WriteString(p.cur.Literal)
937
			p.advance()
938
		}
939
	}
940
	return ast.Account{Name: name.String(), Span: p.span(s)}
941
}
942
943
func (p *Parser) parseDate() ast.Date {
944
	s := p.cur.Span
945
	tok, ok := p.expect(token.DATE)
946
	if !ok {
947
		return ast.Date{Span: p.span(s)}
948
	}
949
950
	sep := byte(0)
951
	lit := tok.Literal
952
	for i := 0; i < len(lit); i++ {
953
		if lit[i] == '/' || lit[i] == '-' || lit[i] == '.' {
954
			sep = lit[i]
955
			break
956
		}
957
	}
958
	if sep == 0 {
959
		p.errorf("invalid date format: %q", lit)
960
		return ast.Date{Span: p.span(s)}
961
	}
962
963
	parts := strings.Split(lit, string(sep))
964
965
	// M/D or MM/DD (year inferred)
966
	if len(parts) == 2 {
967
		month, err := strconv.Atoi(parts[0])
968
		day, err2 := strconv.Atoi(parts[1])
969
		if err != nil || err2 != nil {
970
			p.errorf("invalid date literal: %q", lit)
971
			return ast.Date{Span: p.span(s)}
972
		}
973
		if month < 1 || month > 12 {
974
			p.errorf("invalid month %d in %q", month, lit)
975
			return ast.Date{Span: p.span(s)}
976
		}
977
		if day < 1 || day > 31 {
978
			p.errorf("invalid day %d in %q", day, lit)
979
			return ast.Date{Span: p.span(s)}
980
		}
981
		return ast.Date{Month: month, Day: day, Sep: sep, Span: p.span(s)}
982
	}
983
984
	if len(parts) != 3 {
985
		p.errorf("invalid date format: %q", lit)
986
		return ast.Date{Span: p.span(s)}
987
	}
988
989
	year, err := strconv.Atoi(parts[0])
990
	month, err2 := strconv.Atoi(parts[1])
991
	day, err3 := strconv.Atoi(parts[2])
992
	if err != nil || err2 != nil || err3 != nil {
993
		p.errorf("invalid date literal: %q", lit)
994
		return ast.Date{Span: p.span(s)}
995
	}
996
	if month < 1 || month > 12 {
997
		p.errorf("invalid month %d in %q", month, lit)
998
		return ast.Date{Span: p.span(s)}
999
	}
1000
	if day < 1 || day > 31 {
1001
		p.errorf("invalid day %d in %q", day, lit)
1002
		return ast.Date{Span: p.span(s)}
1003
	}
1004
1005
	return ast.Date{
1006
		Year:  year,
1007
		Month: month,
1008
		Day:   day,
1009
		Sep:   sep,
1010
		Span:  p.span(s),
1011
	}
1012
}
1013
1014
func (p *Parser) parseOptInlineComment() *ast.Comment {
1015
	p.skipWhitespace() // todo:
1016
	if p.cur.Type != token.SEMICOLON && p.cur.Type != token.HASH {
1017
		return nil
1018
	}
1019
1020
	s := p.cur.Span
1021
	marker := p.cur.Literal[0]
1022
	p.advance() // consume marker
1023
	p.skipWhitespace()
1024
1025
	text := ""
1026
	if p.got(token.TEXT) {
1027
		text = p.cur.Literal
1028
		p.advance()
1029
	}
1030
1031
	return &ast.Comment{
1032
		Marker: marker,
1033
		Text:   text,
1034
		Span:   p.span(s),
1035
	}
1036
}
1037
1038
func (p *Parser) parseOptPeriodicDescription() string {
1039
	if p.cur.Type != token.WHITESPACE || len(p.cur.Literal) < 2 {
1040
		return ""
1041
	}
1042
1043
	p.skipWhitespace()
1044
1045
	if p.cur.Type != token.TEXT {
1046
		return ""
1047
	}
1048
1049
	return p.parseDescription()
1050
}
1051
1052
func (p *Parser) parseDescription() string {
1053
	var desc strings.Builder
1054
	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {
1055
		_, _ = desc.WriteString(p.cur.Literal)
1056
		p.advance()
1057
	}
1058
	return desc.String()
1059
}
1060
1061
func (p *Parser) parseDirectiveExpr() string {
1062
	var b strings.Builder
1063
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF && p.cur.Type != token.SEMICOLON {
1064
		_, _ = b.WriteString(p.cur.Literal)
1065
		p.advance()
1066
	}
1067
	return b.String()
1068
}
1069
1070
func (p *Parser) parseQuantityInto(amt *ast.Amount) {
1071
	if p.cur.Type != token.INT && p.cur.Type != token.DECIMAL && p.cur.Type != token.TEXT {
1072
		p.errorf("expected quantity, got %s", p.cur.Type)
1073
		return
1074
	}
1075
1076
	lit := p.cur.Literal
1077
	p.advance()
1078
1079
	// detect format metadata before normalizing
1080
	amt.QuantityFmt = detectFormat(lit)
1081
1082
	// normalize for decimal.NewFromString
1083
	// remove thousands separators, replace decimal mark with '.'
1084
	normalized := normalizeLiteral(lit, amt.QuantityFmt.Thousands, amt.QuantityFmt.Decimal)
1085
1086
	q, err := decimal.NewFromString(normalized)
1087
	if err != nil {
1088
		p.errorf("invalid quantity %q: %v", lit, err)
1089
		return
1090
	}
1091
1092
	if amt.IsNegative {
1093
		q = q.Neg()
1094
	}
1095
	amt.Quantity = q
1096
}
1097
1098
func (p *Parser) parseBlankLine() *ast.BlankLine {
1099
	s := p.cur.Span
1100
	p.expectNewline()
1101
	return &ast.BlankLine{Span: s}
1102
}
1103
1104
func (p *Parser) expectNewline() {
1105
	if p.got(token.NEWLINE) || p.got(token.EOF) {
1106
		if p.got(token.NEWLINE) {
1107
			p.advance()
1108
		}
1109
		return
1110
	}
1111
	p.errorf("expected %s, got %s", token.NEWLINE, p.cur.Type)
1112
}
1113
1114
func (p *Parser) advance() token.Token {
1115
	prev := p.cur
1116
	p.cur = p.peek
1117
	p.peek = p.lexer.Next()
1118
	return prev
1119
}
1120
1121
func (p *Parser) got(kind token.Type) bool     { return p.cur.Type == kind }
1122
func (p *Parser) willGet(kind token.Type) bool { return p.peek.Type == kind }
1123
1124
func (p *Parser) expect(kind token.Type) (token.Token, bool) {
1125
	if p.got(kind) {
1126
		return p.advance(), true
1127
	}
1128
	p.errorf("expected %s, got %s", kind, p.cur.Type)
1129
	return p.cur, false
1130
}
1131
1132
func (p *Parser) errorf(format string, args ...any) {
1133
	p.errors = append(p.errors, &ast.ParseError{
1134
		Span:    p.cur.Span,
1135
		Message: fmt.Sprintf(format, args...),
1136
	})
1137
}
1138
1139
func (p *Parser) sync() {
1140
	for {
1141
		switch p.cur.Type {
1142
		case token.EOF:
1143
			return
1144
		case token.NEWLINE:
1145
			p.advance()
1146
			switch p.cur.Type {
1147
			case token.DATE, token.ACCOUNT, token.COMMODITY,
1148
				token.INCLUDE, token.ALIAS, token.PAYEE,
1149
				token.TAG, token.YEAR, token.D, token.P,
1150
				token.APPLY, token.END, token.COMMENTKW,
1151
				token.DECIMALMARK, token.TILDE, token.N, token.EQ:
1152
				return
1153
			}
1154
		default:
1155
			p.advance()
1156
		}
1157
	}
1158
}
1159
1160
func (p *Parser) syncToNextline() {
1161
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF {
1162
		p.advance()
1163
	}
1164
	if p.got(token.NEWLINE) {
1165
		p.advance()
1166
	}
1167
}
1168
1169
func (p *Parser) skipWhitespace() {
1170
	for p.got(token.WHITESPACE) {
1171
		p.advance()
1172
	}
1173
}
1174
1175
func (p *Parser) span(s token.Span) token.Span {
1176
	return token.Span{Start: s.Start, End: p.cur.Span.Start}
1177
}
1178
1179
func normalizeLiteral(lit string, thousands, decimal byte) string {
1180
	var b strings.Builder
1181
	for _, ch := range []byte(lit) {
1182
		if thousands != 0 && ch == thousands {
1183
			continue // skip thousands separator
1184
		}
1185
		if ch == decimal {
1186
			b.WriteByte('.')
1187
		} else {
1188
			b.WriteByte(ch)
1189
		}
1190
	}
1191
	return b.String()
1192
}
1193
1194
func detectFormat(lit string) ast.QuantityFormat {
1195
	// find all separator positions
1196
	var separators []int
1197
	for i, ch := range []byte(lit) {
1198
		if ch == '.' || ch == ',' {
1199
			separators = append(separators, i)
1200
		}
1201
	}
1202
1203
	if len(separators) == 0 {
1204
		// "1000" — no separators, integer
1205
		return ast.QuantityFormat{Decimal: '.', Thousands: 0, Precision: 0}
1206
	}
1207
1208
	var decimal byte
1209
	thousands := byte(0)
1210
	precision := 0
1211
1212
	if len(separators) == 1 {
1213
		// "10.00" or "10,00" — single separator is the decimal mark
1214
		pos := separators[0]
1215
		decimal = lit[pos]
1216
		precision = len(lit) - pos - 1
1217
	} else {
1218
		// "1,000.00" or "1.000,00" — last separator is decimal, first is thousands
1219
		last := separators[len(separators)-1]
1220
		decimal = lit[last]
1221
		thousands = lit[separators[0]]
1222
		precision = len(lit) - last - 1
1223
	}
1224
1225
	return ast.QuantityFormat{
1226
		Decimal:   decimal,
1227
		Thousands: thousands,
1228
		Precision: precision,
1229
	}
1230
}
1231
1232
func parseSimpleDate(s string) ast.Date {
1233
	if len(s) < 8 {
1234
		return ast.Date{}
1235
	}
1236
	sep := byte('-')
1237
	if strings.Contains(s, "/") {
1238
		sep = byte('/')
1239
	} else if strings.Contains(s, ".") {
1240
		sep = byte('.')
1241
	}
1242
	parts := strings.Split(s, string(sep))
1243
	if len(parts) != 3 {
1244
		return ast.Date{}
1245
	}
1246
	year, _ := strconv.Atoi(parts[0])
1247
	month, _ := strconv.Atoi(parts[1])
1248
	day, _ := strconv.Atoi(parts[2])
1249
	return ast.Date{Year: year, Month: month, Day: day, Sep: sep}
1250
}