all repos

clerk @ 7136c82

missing tooling for ledger/hledger

clerk/journal/parser/parser.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
parser: some directives were missing inline comment & improve memory layout of some directives, 14 days ago
1
package parser
2
3
import (
4
	"fmt"
5
	"strconv"
6
	"strings"
7
8
	"github.com/shopspring/decimal"
9
10
	"olexsmir.xyz/clerk/journal/ast"
11
	"olexsmir.xyz/clerk/journal/lexer"
12
	"olexsmir.xyz/clerk/journal/token"
13
)
14
15
type Parser struct {
16
	lexer  *lexer.Lexer
17
	errors []*ast.ParseError
18
	cur    token.Token
19
	peek   token.Token
20
}
21
22
func New(lex *lexer.Lexer) *Parser {
23
	p := &Parser{lexer: lex}
24
	p.advance() // populate .peek
25
	p.advance() // populate .cur
26
	return p
27
}
28
29
func (p *Parser) ParseJournal() *ast.Journal {
30
	f := &ast.Journal{}
31
	for p.cur.Type != token.EOF {
32
		if e := p.parseEntry(); e != nil {
33
			f.Entries = append(f.Entries, e)
34
		}
35
	}
36
	f.Errors = p.errors
37
	return f
38
}
39
40
func (p *Parser) parseEntry() ast.Entry {
41
	switch p.cur.Type {
42
	case token.ILLEGAL:
43
		p.errorf("illegal character %q", p.cur.Literal)
44
		p.advance()
45
		return nil
46
	case token.INDENT:
47
		p.errorf("unexpected indent")
48
		p.syncToNextline()
49
		return nil
50
	case token.DATE:
51
		return p.parseTransaction()
52
	case token.TILDE:
53
		return p.parsePeriodicTransaction()
54
	case token.EQ:
55
		return p.parseAutomatedTransaction()
56
	case token.NEWLINE:
57
		return p.parseBlankLine()
58
	case token.SEMICOLON, token.HASH, token.PERCENT, token.STAR:
59
		return p.parseComment()
60
	case token.ACCOUNT:
61
		return p.parseAccountDirective()
62
	case token.COMMODITY:
63
		return p.parseCommodityDirective()
64
	case token.INCLUDE:
65
		return p.parseIncludeDirective()
66
	case token.ALIAS:
67
		return p.parseAliasDirective()
68
	case token.PAYEE:
69
		return p.parsePayeeDirective()
70
	case token.TAG:
71
		return p.parseTagDirective()
72
	case token.YEAR:
73
		return p.parseYearDirective()
74
	case token.DECIMALMARK:
75
		return p.parseDecimalMarkDirective()
76
	case token.D:
77
		return p.parseDefaultCommodityDirective()
78
	case token.P:
79
		return p.parseMarketPriceDirective()
80
	case token.N:
81
		return p.parseIgnoredDirective()
82
	case token.APPLY:
83
		return p.parseApplyDirective()
84
	case token.END:
85
		return p.parseEndDirective()
86
	case token.COMMENTKW:
87
		return p.parseCommentBlockDirective()
88
	default:
89
		p.errorf("unexpected token %s", p.cur.Type)
90
		p.sync()
91
		return nil
92
	}
93
}
94
95
func (p *Parser) parseTransaction() *ast.Transaction {
96
	s := p.cur.Span
97
	tx := &ast.Transaction{}
98
99
	tx.Date = p.parseDate()
100
101
	// optional secondary date
102
	if p.got(token.EQ) {
103
		p.advance()
104
		d := p.parseDate()
105
		tx.SecondDate = &d
106
	}
107
108
	p.skipWhitespace()
109
110
	// optional status
111
	tx.Status = p.parseStatus()
112
113
	// optional code
114
	if p.got(token.LPAREN) {
115
		p.advance()
116
		var code strings.Builder
117
		for p.cur.Type != token.RPAREN {
118
			_, _ = code.WriteString(p.cur.Literal)
119
			p.advance()
120
		}
121
		tx.Code = new(code.String())
122
		p.skipWhitespace()
123
	}
124
125
	// optional payee | note
126
	if p.got(token.TEXT) {
127
		tx.Payee = p.parsePayee()
128
129
		// check for | separator
130
		if p.got(token.WHITESPACE) {
131
			p.skipWhitespace()
132
		}
133
134
		if p.got(token.PIPE) {
135
			p.advance()
136
			p.skipWhitespace()
137
138
			var note strings.Builder
139
			for p.got(token.TEXT) || p.got(token.WHITESPACE) {
140
				_, _ = note.WriteString(p.cur.Literal)
141
				p.advance()
142
			}
143
			tx.Note = new(note.String())
144
		}
145
	}
146
147
	tx.Comment = p.parseOptInlineComment()
148
	p.expectNewline()
149
150
	// header comments — indented ; lines before first posting
151
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
152
		p.advance() // consume indent
153
		c := p.parseComment()
154
		tx.HeaderComments = append(tx.HeaderComments, *c)
155
	}
156
157
	// postings
158
	for p.got(token.INDENT) {
159
		if p := p.parsePosting(); p != nil {
160
			tx.Postings = append(tx.Postings, p)
161
		}
162
	}
163
164
	tx.Span = p.span(s)
165
	return tx
166
}
167
168
func (p *Parser) parsePayee() *ast.Payee {
169
	s := p.cur.Span
170
171
	// keep spaces/tags between text tokens; stop before trailing whitespace
172
	var name strings.Builder
173
	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {
174
		_, _ = name.WriteString(p.cur.Literal)
175
		p.advance()
176
	}
177
	return &ast.Payee{Name: name.String(), Span: p.span(s)}
178
}
179
180
func (p *Parser) parsePeriodicTransaction() *ast.PeriodicTransaction {
181
	s := p.cur.Span
182
	p.expect(token.TILDE)
183
	p.skipWhitespace()
184
185
	pt := &ast.PeriodicTransaction{}
186
187
	pt.Span = p.span(s)
188
	pt.Period = p.parsePeriod()
189
190
	if desc := p.parseOptPeriodicDescription(); desc != "" {
191
		pt.Description = &desc
192
	}
193
194
	comment := p.parseOptInlineComment()
195
	p.expectNewline()
196
197
	var headerComments []*ast.Comment
198
	var postings []*ast.Posting
199
	for p.got(token.INDENT) || p.got(token.SEMICOLON) {
200
		if p.got(token.SEMICOLON) {
201
			c := p.parseComment()
202
			headerComments = append(headerComments, c)
203
			continue
204
		}
205
		posting := p.parsePosting()
206
		if posting != nil {
207
			postings = append(postings, posting)
208
		}
209
	}
210
211
	pt.HeaderComments = headerComments
212
	pt.Postings = postings
213
	pt.Comment = comment
214
	return pt
215
}
216
217
func (p *Parser) parseAutomatedTransaction() *ast.AutomatedTransaction {
218
	s := p.cur.Span
219
	p.expect(token.EQ)
220
	p.skipWhitespace()
221
222
	at := &ast.AutomatedTransaction{}
223
	at.Span = p.span(s)
224
225
	at.Expr = p.parseDirectiveExpr()
226
	at.Comment = p.parseOptInlineComment()
227
	p.expectNewline()
228
229
	// header comments
230
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
231
		p.advance()
232
		at.HeaderComments = append(at.HeaderComments, p.parseComment())
233
	}
234
235
	// postings
236
	for p.got(token.INDENT) {
237
		if p := p.parsePosting(); p != nil {
238
			at.Postings = append(at.Postings, p)
239
		}
240
	}
241
242
	return at
243
}
244
245
func (p *Parser) parsePeriod() ast.Period {
246
	s := p.cur.Span
247
248
	var periodBuf strings.Builder
249
250
	for !p.got(token.NEWLINE) && !p.got(token.EOF) &&
251
		!p.got(token.SEMICOLON) && !p.got(token.HASH) && !p.got(token.PERCENT) && !p.got(token.STAR) {
252
253
		if p.got(token.WHITESPACE) {
254
			if len(p.cur.Literal) >= 2 {
255
				break
256
			}
257
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) ||
258
				p.willGet(token.SEMICOLON) || p.willGet(token.HASH) ||
259
				p.willGet(token.PERCENT) || p.willGet(token.STAR) {
260
				p.advance()
261
				continue
262
			}
263
		}
264
265
		periodBuf.WriteString(p.cur.Literal)
266
		p.advance()
267
	}
268
269
	str := periodBuf.String()
270
	period := ast.Period{Raw: str, Span: p.span(s)}
271
272
	if _, after, ok := strings.Cut(str, " from "); ok {
273
		end := strings.Index(after, " ")
274
		dateStr := after
275
		if end >= 0 {
276
			dateStr = after[:end]
277
		}
278
		if d := parseSimpleDate(dateStr); d.Year > 0 {
279
			period.From = &d
280
			rest := after
281
			if end >= 0 {
282
				rest = after[end:]
283
			}
284
			if _, toAfter, ok := strings.Cut(rest, " to "); ok {
285
				if toEnd := strings.Index(toAfter, " "); toEnd >= 0 {
286
					toAfter = toAfter[:toEnd]
287
				}
288
				if d := parseSimpleDate(toAfter); d.Year > 0 {
289
					period.To = &d
290
				}
291
			}
292
		}
293
	}
294
	return period
295
}
296
297
func (p *Parser) parseComment() *ast.Comment {
298
	s := p.cur.Span
299
	marker := p.cur.Literal[0]
300
	p.advance()
301
	p.skipWhitespace()
302
303
	var text string
304
	if p.got(token.TEXT) {
305
		text = p.cur.Literal
306
		p.advance()
307
	}
308
309
	p.expectNewline()
310
311
	return &ast.Comment{
312
		Marker: marker,
313
		Text:   text,
314
		Span:   p.span(s),
315
	}
316
}
317
318
func (p *Parser) parseAccountDirective() *ast.AccountDirective {
319
	s := p.cur.Span
320
	p.expect(token.ACCOUNT)
321
	p.skipWhitespace()
322
323
	account := p.parseAccount()
324
	comment := p.parseOptInlineComment()
325
	p.expectNewline()
326
	return &ast.AccountDirective{
327
		Account: account,
328
		Comment: comment,
329
		Span:    p.span(s),
330
	}
331
}
332
333
func (p *Parser) parseCommodityDirective() *ast.CommodityDirective {
334
	s := p.cur.Span
335
	p.expect(token.COMMODITY)
336
	p.skipWhitespace()
337
338
	var commodity string
339
	var format *ast.Amount
340
341
	switch p.cur.Type {
342
	case token.TEXT, token.INT, token.DECIMAL:
343
		format = p.parseAmount()
344
		commodity = format.Commodity
345
	case token.COMMODITYMARK:
346
		commodity = p.cur.Literal
347
		p.advance()
348
		hadSpace := p.got(token.WHITESPACE)
349
		p.skipWhitespace()
350
		if p.got(token.INT) || p.got(token.DECIMAL) || p.got(token.TEXT) {
351
			format = p.parseAmount()
352
			format.Commodity = commodity
353
			format.CommodityPos = ast.CommodityBefore
354
			format.HasSpace = hadSpace
355
		}
356
	default:
357
		p.errorf("expected commodity name or amount, got %s", p.cur.Type)
358
	}
359
360
	if commodity == "" {
361
		p.errorf("expected commodity name, got %s", p.cur.Type)
362
	}
363
364
	comment := p.parseOptInlineComment()
365
	p.expectNewline()
366
367
	return &ast.CommodityDirective{
368
		Commodity: commodity,
369
		Format:    *format,
370
		Comment:   comment,
371
		Span:      p.span(s),
372
	}
373
}
374
375
func (p *Parser) parseIncludeDirective() *ast.IncludeDirective {
376
	s := p.cur.Span
377
	p.expect(token.INCLUDE)
378
	p.skipWhitespace()
379
380
	path := ""
381
	if p.got(token.TEXT) {
382
		path = p.cur.Literal
383
		p.advance()
384
	} else {
385
		p.errorf("expected file path, got %s", p.cur.Type)
386
	}
387
388
	comment := p.parseOptInlineComment()
389
	p.expectNewline()
390
391
	return &ast.IncludeDirective{
392
		Path:    path,
393
		Comment: comment,
394
		Span:    p.span(s),
395
	}
396
}
397
398
func (p *Parser) parseAliasDirective() *ast.AliasDirective {
399
	s := p.cur.Span
400
	alias := &ast.AliasDirective{}
401
	p.expect(token.ALIAS)
402
	p.skipWhitespace()
403
	alias.From = p.parseAccount().Name
404
	p.skipWhitespace()
405
	p.expect(token.EQ)
406
	p.skipWhitespace()
407
	alias.To = p.parseAccount().Name
408
	p.skipWhitespace()
409
	alias.Comment = p.parseOptInlineComment()
410
	p.expectNewline()
411
	alias.Span = p.span(s)
412
	return alias
413
}
414
415
func (p *Parser) parsePayeeDirective() *ast.PayeeDirective {
416
	s := p.cur.Span
417
	p.expect(token.PAYEE)
418
	p.skipWhitespace()
419
420
	name := ""
421
	if p.got(token.TEXT) {
422
		name = p.parsePayee().Name
423
	}
424
425
	comment := p.parseOptInlineComment()
426
	p.expectNewline()
427
428
	return &ast.PayeeDirective{
429
		Name:    name,
430
		Comment: comment,
431
		Span:    p.span(s),
432
	}
433
}
434
435
func (p *Parser) parseTagDirective() *ast.TagDirective {
436
	s := p.cur.Span
437
	p.expect(token.TAG)
438
	p.skipWhitespace()
439
440
	name := ""
441
	if p.got(token.TEXT) {
442
		name = p.cur.Literal
443
		p.advance()
444
	}
445
446
	comment := p.parseOptInlineComment()
447
	p.expectNewline()
448
449
	return &ast.TagDirective{
450
		Name:    name,
451
		Comment: comment,
452
		Span:    p.span(s),
453
	}
454
}
455
456
func (p *Parser) parseYearDirective() *ast.YearDirective {
457
	s := p.cur.Span
458
	year := &ast.YearDirective{}
459
	p.expect(token.YEAR)
460
	p.skipWhitespace()
461
462
	if p.got(token.INT) {
463
		year.Year, _ = strconv.Atoi(p.cur.Literal)
464
		p.advance()
465
	} else {
466
		p.errorf("expected year, got %s", p.cur.Type)
467
	}
468
469
	p.skipWhitespace()
470
	year.Comment = p.parseOptInlineComment()
471
	p.expectNewline()
472
	year.Span = p.span(s)
473
	return year
474
}
475
476
func (p *Parser) parseDecimalMarkDirective() *ast.DecimalMarkDirective {
477
	s := p.cur.Span
478
	mark := &ast.DecimalMarkDirective{}
479
	p.expect(token.DECIMALMARK)
480
	p.skipWhitespace()
481
482
	mark.Mark = byte('.')
483
	if p.got(token.TEXT) {
484
		if len(p.cur.Literal) > 0 {
485
			mark.Mark = p.cur.Literal[0]
486
		}
487
		p.advance()
488
	}
489
490
	p.skipWhitespace()
491
	mark.Comment = p.parseOptInlineComment()
492
	p.expectNewline()
493
	mark.Span = p.span(s)
494
	return mark
495
}
496
497
func (p *Parser) parseDefaultCommodityDirective() *ast.DefaultCommodityDirective {
498
	s := p.cur.Span
499
	com := &ast.DefaultCommodityDirective{}
500
	p.expect(token.D)
501
	p.skipWhitespace()
502
	com.Amount = *p.parseAmount()
503
	p.skipWhitespace()
504
	com.Comment = p.parseOptInlineComment()
505
	p.expectNewline()
506
	com.Span = p.span(s)
507
	return com
508
}
509
510
func (p *Parser) parseIgnoredDirective() *ast.IgnoredDirective {
511
	s := p.cur.Span
512
	p.expect(token.N)
513
	p.skipWhitespace()
514
	if p.got(token.TEXT) || p.got(token.COMMODITYMARK) {
515
		p.advance()
516
	}
517
	p.skipWhitespace()
518
	comment := p.parseOptInlineComment()
519
	p.expectNewline()
520
	return &ast.IgnoredDirective{
521
		Comment: comment,
522
		Span:    p.span(s),
523
	}
524
}
525
526
func (p *Parser) parseMarketPriceDirective() *ast.MarketPriceDirective {
527
	s := p.cur.Span
528
	p.expect(token.P)
529
	p.skipWhitespace()
530
531
	mp := &ast.MarketPriceDirective{}
532
	mp.DateTime.Date = p.parseDate()
533
	p.skipWhitespace()
534
535
	if p.got(token.TIME) {
536
		mp.DateTime.Time = new(p.parseTime())
537
		p.skipWhitespace()
538
	}
539
540
	tok, _ := p.expect(token.COMMODITYMARK)
541
	mp.Commodity = tok.Literal
542
	p.advance()
543
	p.skipWhitespace()
544
545
	mp.Amount = *p.parseAmount()
546
547
	p.skipWhitespace()
548
	mp.Comment = p.parseOptInlineComment()
549
550
	p.expectNewline()
551
	mp.Span = p.span(s)
552
	return mp
553
}
554
555
func (p *Parser) parseTime() ast.Time {
556
	s := p.cur.Span
557
	tok, _ := p.expect(token.TIME)
558
	lit := tok.Literal
559
560
	parts := strings.Split(lit, ":")
561
	if len(parts) < 2 {
562
		p.errorf("invalid time format: %q", lit)
563
		return ast.Time{Span: p.span(s)}
564
	}
565
566
	hour, _ := strconv.Atoi(parts[0])
567
	minute, _ := strconv.Atoi(parts[1])
568
	second := 0
569
	if len(parts) > 2 {
570
		second, _ = strconv.Atoi(parts[2])
571
	}
572
573
	if hour < 0 || hour > 23 {
574
		p.errorf("invalid hour %d in time %q", hour, lit)
575
	}
576
	if minute < 0 || minute > 59 {
577
		p.errorf("invalid minute %d in time %q", minute, lit)
578
	}
579
	if second < 0 || second > 59 {
580
		p.errorf("invalid second %d in time %q", second, lit)
581
	}
582
583
	return ast.Time{
584
		Hour:   hour,
585
		Minute: minute,
586
		Second: second,
587
		Span:   p.span(s),
588
	}
589
}
590
591
func (p *Parser) parseApplyDirective() *ast.ApplyDirective {
592
	s := p.cur.Span
593
	p.expect(token.APPLY)
594
	p.skipWhitespace()
595
596
	expr := p.parseDirectiveExpr()
597
	comment := p.parseOptInlineComment()
598
	p.expectNewline()
599
600
	return &ast.ApplyDirective{
601
		Expr:    expr,
602
		Comment: comment,
603
		Span:    p.span(s),
604
	}
605
}
606
607
func (p *Parser) parseEndDirective() *ast.EndDirective {
608
	s := p.cur.Span
609
	p.expect(token.END)
610
	p.skipWhitespace()
611
612
	expr := p.parseDirectiveExpr()
613
	comment := p.parseOptInlineComment()
614
	p.expectNewline()
615
616
	return &ast.EndDirective{
617
		Expr:    expr,
618
		Comment: comment,
619
		Span:    p.span(s),
620
	}
621
}
622
623
func (p *Parser) parseCommentBlockDirective() *ast.CommentBlockDirective {
624
	start := p.cur.Span
625
	p.expect(token.COMMENTKW)
626
	p.skipWhitespace()
627
628
	header := p.parseDirectiveExpr()
629
	comment := p.parseOptInlineComment()
630
	p.expectNewline()
631
632
	var content strings.Builder
633
	for p.cur.Type != token.EOF {
634
		if p.got(token.END) {
635
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) {
636
				p.advance()
637
				p.expectNewline()
638
				break
639
			}
640
			if p.willGet(token.WHITESPACE) {
641
				endTok := p.cur
642
				p.advance()
643
				wsTok := p.cur
644
				p.advance()
645
				if p.got(token.TEXT) && p.cur.Literal == "comment" { // todo: this should check if it's an actual COMMENTKW token
646
					p.advance()
647
					p.parseDirectiveExpr()
648
					p.parseOptInlineComment()
649
					p.expectNewline()
650
					break
651
				}
652
				content.WriteString(endTok.Literal)
653
				content.WriteString(wsTok.Literal)
654
				continue
655
			}
656
		}
657
		content.WriteString(p.cur.Literal)
658
		p.advance()
659
	}
660
661
	return &ast.CommentBlockDirective{
662
		Header:  header,
663
		Content: content.String(),
664
		Comment: comment,
665
		Span:    p.span(start),
666
	}
667
}
668
669
func (p *Parser) parseStatus() *ast.Status {
670
	if p.got(token.STAR) || p.got(token.BANG) {
671
		status := ast.StatusPending
672
		if p.cur.Literal[0] == '*' {
673
			status = ast.StatusCleared
674
		}
675
		st := &ast.Status{Value: status, Span: p.cur.Span}
676
		p.advance()
677
		p.skipWhitespace()
678
		return st
679
	}
680
	return nil
681
}
682
683
func (p *Parser) isAmountStart() bool {
684
	switch p.cur.Type {
685
	default:
686
		return false
687
	case token.COMMODITYMARK, token.INT, token.DECIMAL, token.MINUS, token.PLUS, token.PARENEXPR:
688
		return true
689
	case token.TEXT:
690
		return len(p.cur.Literal) > 0 && p.cur.Literal[0] >= '0' && p.cur.Literal[0] <= '9'
691
	}
692
}
693
694
func (p *Parser) parseAmount() *ast.Amount {
695
	s := p.cur.Span
696
	amt := &ast.Amount{
697
		QuantityFmt: ast.QuantityFormat{Decimal: '.'},
698
		Span:        p.span(s),
699
	}
700
701
	// commodity before quantity: $10.00
702
	if p.got(token.COMMODITYMARK) {
703
		amt.Commodity = p.cur.Literal
704
		amt.CommodityPos = ast.CommodityBefore
705
		p.advance()
706
		if p.got(token.WHITESPACE) {
707
			amt.HasSpace = true
708
			p.skipWhitespace()
709
		}
710
		switch p.cur.Type {
711
		case token.MINUS:
712
			amt.IsNegative = true
713
			p.advance()
714
		case token.PLUS:
715
			p.advance()
716
		}
717
		p.parseQuantityInto(amt)
718
	} else {
719
		// optional sign
720
		switch p.cur.Type {
721
		case token.MINUS:
722
			amt.IsNegative = true
723
			p.advance()
724
		case token.PLUS:
725
			p.advance()
726
		}
727
728
		// commodity before quantity: -$120:
729
		if p.got(token.COMMODITYMARK) {
730
			amt.Commodity = p.cur.Literal
731
			amt.CommodityPos = ast.CommodityBefore
732
			p.advance()
733
			if p.got(token.WHITESPACE) {
734
				amt.HasSpace = true
735
				p.skipWhitespace()
736
			}
737
		}
738
739
		p.parseQuantityInto(amt)
740
741
		// commodity after quantity: 10.00 UAH (only if not set)
742
		if amt.Commodity == "" {
743
			switch p.cur.Type {
744
			case token.WHITESPACE:
745
				p.skipWhitespace()
746
				if p.got(token.COMMODITYMARK) {
747
					amt.HasSpace = true
748
					amt.Commodity = p.cur.Literal
749
					amt.CommodityPos = ast.CommodityAfter
750
					p.advance()
751
				}
752
			case token.COMMODITYMARK:
753
				amt.Commodity = p.cur.Literal
754
				amt.CommodityPos = ast.CommodityAfter
755
				p.advance()
756
			}
757
		}
758
	}
759
760
	return amt
761
}
762
763
func (p *Parser) parseAmountWithOptExpr() *ast.Amount {
764
	if p.got(token.STAR) {
765
		p.advance()
766
		p.skipWhitespace()
767
		amt := p.parseAmount()
768
		if amt != nil {
769
			amt.IsExpr = true
770
		}
771
		return amt
772
	}
773
	if p.got(token.PARENEXPR) {
774
		lit := p.cur.Literal
775
		amt := &ast.Amount{
776
			IsExpr:      true,
777
			QuantityFmt: ast.QuantityFormat{Decimal: '.'},
778
		}
779
		if len(lit) >= 2 && lit[0] == '(' && lit[len(lit)-1] == ')' {
780
			inner := lit[1 : len(lit)-1]
781
			i := 0
782
			for i < len(inner) && (inner[i] == ' ' || inner[i] == '\t') {
783
				i++
784
			}
785
			j := len(inner)
786
			for j > i && (inner[j-1] == ' ' || inner[j-1] == '\t') {
787
				j--
788
			}
789
			amt.Expr = inner[i:j]
790
		}
791
		amt.Span = p.cur.Span
792
		p.advance()
793
		return amt
794
	}
795
	return p.parseAmount()
796
}
797
798
func (p *Parser) parsePosting() *ast.Posting {
799
	s := p.cur.Span
800
	posting := &ast.Posting{}
801
	p.expect(token.INDENT)
802
803
	// exit if it's empty line
804
	if p.got(token.NEWLINE) || p.got(token.EOF) {
805
		p.syncToNextline()
806
		return nil
807
	}
808
809
	// optional status, outside of brackets, '! (account)'
810
	posting.Status = p.parseStatus()
811
812
	// detect virtual posting brackets
813
	switch p.cur.Type {
814
	case token.LPAREN:
815
		posting.Type = ast.PostingVirtualUnbalanced
816
		p.advance()
817
	case token.LBRACKET:
818
		posting.Type = ast.PostingVirtualBalanced
819
		p.advance()
820
	}
821
822
	// optional status, inside of brackets, '(* account)'
823
	if p.got(token.STAR) || p.got(token.BANG) {
824
		posting.Status = p.parseStatus()
825
	}
826
827
	// validate, must be account text
828
	if p.cur.Type != token.TEXT {
829
		p.errorf("expected account name, got %s", p.cur.Type)
830
		p.syncToNextline()
831
		return nil
832
	}
833
834
	posting.Account = p.parseAccount()
835
836
	// consume closing bracket
837
	switch p.cur.Type {
838
	case token.RPAREN:
839
		p.advance()
840
	case token.RBRACKET:
841
		p.advance()
842
	}
843
844
	// optional amount - after two spaces
845
	if p.got(token.WHITESPACE) {
846
		p.skipWhitespace()
847
		if p.isAmountStart() || p.got(token.STAR) {
848
			posting.Amount = p.parseAmountWithOptExpr()
849
		}
850
	}
851
852
	// optional cost '@' or '@@'
853
	if p.got(token.WHITESPACE) {
854
		p.skipWhitespace()
855
	}
856
	if p.got(token.AT) || p.got(token.ATAT) {
857
		posting.Cost = p.parseCost()
858
	}
859
860
	// optional balance assertion
861
	if p.got(token.WHITESPACE) {
862
		p.skipWhitespace()
863
	}
864
	if p.got(token.EQ) || p.got(token.EQEQ) || p.got(token.EQEQEQ) {
865
		posting.Balance = p.parseBalanceAssertion()
866
		p.skipWhitespace()
867
		if p.got(token.AT) || p.got(token.ATAT) {
868
			p.parseCost()
869
		}
870
	}
871
872
	posting.Comment = p.parseOptInlineComment()
873
	p.expectNewline()
874
875
	// continuation comments
876
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
877
		p.advance()
878
		c := p.parseComment()
879
		posting.Comments = append(posting.Comments, *c)
880
	}
881
882
	posting.Span = p.span(s)
883
	return posting
884
}
885
886
func (p *Parser) parseCost() *ast.Cost {
887
	s := p.cur.Span
888
	isTotal := p.got(token.ATAT)
889
	p.advance() // consume '@' '@@'
890
	p.skipWhitespace()
891
	return &ast.Cost{
892
		IsTotal: isTotal,
893
		Amount:  *p.parseAmount(),
894
		Span:    p.span(s),
895
	}
896
}
897
898
func (p *Parser) parseBalanceAssertion() *ast.BalanceAssertion {
899
	s := p.cur.Span
900
901
	ba := &ast.BalanceAssertion{}
902
	switch p.cur.Type {
903
	case token.EQ: // basic assertion
904
	case token.EQEQ:
905
		ba.IsStrict = true
906
	case token.EQEQEQ:
907
		ba.IsStrict = true
908
		ba.IsInclusive = true
909
	}
910
	p.advance()
911
	p.skipWhitespace()
912
913
	ba.Amount = *p.parseAmount()
914
	ba.Span = p.span(s)
915
	return ba
916
}
917
918
func (p *Parser) parseAccount() ast.Account {
919
	s := p.cur.Span
920
	var name strings.Builder
921
922
	switch p.cur.Type {
923
	case token.TEXT:
924
		_, _ = name.WriteString(p.cur.Literal)
925
		p.advance()
926
		if p.got(token.WHITESPACE) && p.willGet(token.TEXT) && p.peek.Literal[0] != '(' {
927
			_, _ = name.WriteString(" ")
928
			p.advance()
929
			_, _ = name.WriteString(p.cur.Literal)
930
			p.advance()
931
		}
932
	case token.COMMODITYMARK:
933
		_, _ = name.WriteString(p.cur.Literal)
934
		p.advance()
935
		for p.got(token.TEXT) {
936
			_, _ = name.WriteString(p.cur.Literal)
937
			p.advance()
938
		}
939
	}
940
	return ast.Account{Name: name.String(), Span: p.span(s)}
941
}
942
943
func (p *Parser) parseDate() ast.Date {
944
	s := p.cur.Span
945
	tok, ok := p.expect(token.DATE)
946
	if !ok {
947
		return ast.Date{Span: p.span(s)}
948
	}
949
950
	sep := byte(0)
951
	lit := tok.Literal
952
	for i := 0; i < len(lit); i++ {
953
		if lit[i] == '/' || lit[i] == '-' || lit[i] == '.' {
954
			sep = lit[i]
955
			break
956
		}
957
	}
958
	if sep == 0 {
959
		p.errorf("invalid date format: %q", lit)
960
		return ast.Date{Span: p.span(s)}
961
	}
962
963
	parts := strings.Split(lit, string(sep))
964
965
	// M/D or MM/DD (year inferred)
966
	if len(parts) == 2 {
967
		month, err := strconv.Atoi(parts[0])
968
		day, err2 := strconv.Atoi(parts[1])
969
		if err != nil || err2 != nil {
970
			p.errorf("invalid date literal: %q", lit)
971
			return ast.Date{Span: p.span(s)}
972
		}
973
		if month < 1 || month > 12 {
974
			p.errorf("invalid month %d in %q", month, lit)
975
			return ast.Date{Span: p.span(s)}
976
		}
977
		if day < 1 || day > 31 {
978
			p.errorf("invalid day %d in %q", day, lit)
979
			return ast.Date{Span: p.span(s)}
980
		}
981
		return ast.Date{Month: month, Day: day, Sep: sep, Span: p.span(s)}
982
	}
983
984
	if len(parts) != 3 {
985
		p.errorf("invalid date format: %q", lit)
986
		return ast.Date{Span: p.span(s)}
987
	}
988
989
	year, err := strconv.Atoi(parts[0])
990
	month, err2 := strconv.Atoi(parts[1])
991
	day, err3 := strconv.Atoi(parts[2])
992
	if err != nil || err2 != nil || err3 != nil {
993
		p.errorf("invalid date literal: %q", lit)
994
		return ast.Date{Span: p.span(s)}
995
	}
996
	if month < 1 || month > 12 {
997
		p.errorf("invalid month %d in %q", month, lit)
998
		return ast.Date{Span: p.span(s)}
999
	}
1000
	if day < 1 || day > 31 {
1001
		p.errorf("invalid day %d in %q", day, lit)
1002
		return ast.Date{Span: p.span(s)}
1003
	}
1004
1005
	return ast.Date{
1006
		Year:  year,
1007
		Month: month,
1008
		Day:   day,
1009
		Sep:   sep,
1010
		Span:  p.span(s),
1011
	}
1012
}
1013
1014
func (p *Parser) parseOptInlineComment() *ast.Comment {
1015
	p.skipWhitespace() // todo:
1016
	if p.cur.Type != token.SEMICOLON && p.cur.Type != token.HASH {
1017
		return nil
1018
	}
1019
1020
	s := p.cur.Span
1021
	marker := p.cur.Literal[0]
1022
	p.advance() // consume marker
1023
	p.skipWhitespace()
1024
1025
	text := ""
1026
	if p.got(token.TEXT) {
1027
		text = p.cur.Literal
1028
		p.advance()
1029
	}
1030
1031
	return &ast.Comment{
1032
		Marker: marker,
1033
		Text:   text,
1034
		Span:   p.span(s),
1035
	}
1036
}
1037
1038
func (p *Parser) parseOptPeriodicDescription() string {
1039
	if p.cur.Type != token.WHITESPACE || len(p.cur.Literal) < 2 {
1040
		return ""
1041
	}
1042
1043
	p.skipWhitespace()
1044
1045
	if p.cur.Type != token.TEXT {
1046
		return ""
1047
	}
1048
1049
	return p.parseDescription()
1050
}
1051
1052
func (p *Parser) parseDescription() string {
1053
	var desc strings.Builder
1054
	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {
1055
		_, _ = desc.WriteString(p.cur.Literal)
1056
		p.advance()
1057
	}
1058
	return desc.String()
1059
}
1060
1061
func (p *Parser) parseDirectiveExpr() string {
1062
	var b strings.Builder
1063
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF && p.cur.Type != token.SEMICOLON {
1064
		_, _ = b.WriteString(p.cur.Literal)
1065
		p.advance()
1066
	}
1067
	return b.String()
1068
}
1069
1070
func (p *Parser) parseQuantityInto(amt *ast.Amount) {
1071
	if p.cur.Type != token.INT && p.cur.Type != token.DECIMAL && p.cur.Type != token.TEXT {
1072
		p.errorf("expected quantity, got %s", p.cur.Type)
1073
		return
1074
	}
1075
1076
	lit := p.cur.Literal
1077
	p.advance()
1078
1079
	// detect format metadata before normalizing
1080
	amt.QuantityFmt = detectFormat(lit)
1081
1082
	// normalize for decimal.NewFromString
1083
	// remove thousands separators, replace decimal mark with '.'
1084
	normalized := normalizeLiteral(lit, amt.QuantityFmt.Thousands, amt.QuantityFmt.Decimal)
1085
1086
	q, err := decimal.NewFromString(normalized)
1087
	if err != nil {
1088
		p.errorf("invalid quantity %q: %v", lit, err)
1089
		return
1090
	}
1091
1092
	if amt.IsNegative {
1093
		q = q.Neg()
1094
	}
1095
	amt.Quantity = q
1096
}
1097
1098
func (p *Parser) parseBlankLine() *ast.BlankLine {
1099
	s := p.cur.Span
1100
	p.expectNewline()
1101
	return &ast.BlankLine{Span: s}
1102
}
1103
1104
func (p *Parser) expectNewline() {
1105
	if p.got(token.NEWLINE) || p.got(token.EOF) {
1106
		if p.got(token.NEWLINE) {
1107
			p.advance()
1108
		}
1109
		return
1110
	}
1111
	p.errorf("expected %s, got %s", token.NEWLINE, p.cur.Type)
1112
}
1113
1114
func (p *Parser) advance() token.Token {
1115
	prev := p.cur
1116
	p.cur = p.peek
1117
	p.peek = p.lexer.Next()
1118
	return prev
1119
}
1120
1121
func (p *Parser) got(kind token.Type) bool     { return p.cur.Type == kind }
1122
func (p *Parser) willGet(kind token.Type) bool { return p.peek.Type == kind }
1123
1124
func (p *Parser) expect(kind token.Type) (token.Token, bool) {
1125
	if p.got(kind) {
1126
		return p.advance(), true
1127
	}
1128
	p.errorf("expected %s, got %s", kind, p.cur.Type)
1129
	return p.cur, false
1130
}
1131
1132
func (p *Parser) errorf(format string, args ...any) {
1133
	p.errors = append(p.errors, &ast.ParseError{
1134
		Span:    p.cur.Span,
1135
		Message: fmt.Sprintf(format, args...),
1136
	})
1137
}
1138
1139
func (p *Parser) sync() {
1140
	for {
1141
		switch p.cur.Type {
1142
		case token.EOF:
1143
			return
1144
		case token.NEWLINE:
1145
			p.advance()
1146
			switch p.cur.Type {
1147
			case token.DATE, token.ACCOUNT, token.COMMODITY,
1148
				token.INCLUDE, token.ALIAS, token.PAYEE,
1149
				token.TAG, token.YEAR, token.D, token.P,
1150
				token.APPLY, token.END, token.COMMENTKW,
1151
				token.DECIMALMARK, token.TILDE, token.N, token.EQ:
1152
				return
1153
			}
1154
		default:
1155
			p.advance()
1156
		}
1157
	}
1158
}
1159
1160
func (p *Parser) syncToNextline() {
1161
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF {
1162
		p.advance()
1163
	}
1164
	if p.got(token.NEWLINE) {
1165
		p.advance()
1166
	}
1167
}
1168
1169
func (p *Parser) skipWhitespace() {
1170
	for p.got(token.WHITESPACE) {
1171
		p.advance()
1172
	}
1173
}
1174
1175
func (p *Parser) span(s token.Span) token.Span {
1176
	return token.Span{Start: s.Start, End: p.cur.Span.Start}
1177
}
1178
1179
func normalizeLiteral(lit string, thousands, decimal byte) string {
1180
	var b strings.Builder
1181
	for _, ch := range []byte(lit) {
1182
		if thousands != 0 && ch == thousands {
1183
			continue // skip thousands separator
1184
		}
1185
		if ch == decimal {
1186
			b.WriteByte('.')
1187
		} else {
1188
			b.WriteByte(ch)
1189
		}
1190
	}
1191
	return b.String()
1192
}
1193
1194
func detectFormat(lit string) ast.QuantityFormat {
1195
	// find all separator positions
1196
	var separators []int
1197
	for i, ch := range []byte(lit) {
1198
		if ch == '.' || ch == ',' {
1199
			separators = append(separators, i)
1200
		}
1201
	}
1202
1203
	if len(separators) == 0 {
1204
		// "1000" — no separators, integer
1205
		return ast.QuantityFormat{Decimal: '.', Thousands: 0, Precision: 0}
1206
	}
1207
1208
	var decimal byte
1209
	thousands := byte(0)
1210
	precision := 0
1211
1212
	if len(separators) == 1 {
1213
		// "10.00" or "10,00" — single separator is the decimal mark
1214
		pos := separators[0]
1215
		decimal = lit[pos]
1216
		precision = len(lit) - pos - 1
1217
	} else {
1218
		// "1,000.00" or "1.000,00" — last separator is decimal, first is thousands
1219
		last := separators[len(separators)-1]
1220
		decimal = lit[last]
1221
		thousands = lit[separators[0]]
1222
		precision = len(lit) - last - 1
1223
	}
1224
1225
	return ast.QuantityFormat{
1226
		Decimal:   decimal,
1227
		Thousands: thousands,
1228
		Precision: precision,
1229
	}
1230
}
1231
1232
func parseSimpleDate(s string) ast.Date {
1233
	if len(s) < 8 {
1234
		return ast.Date{}
1235
	}
1236
	sep := byte('-')
1237
	if strings.Contains(s, "/") {
1238
		sep = byte('/')
1239
	} else if strings.Contains(s, ".") {
1240
		sep = byte('.')
1241
	}
1242
	parts := strings.Split(s, string(sep))
1243
	if len(parts) != 3 {
1244
		return ast.Date{}
1245
	}
1246
	year, _ := strconv.Atoi(parts[0])
1247
	month, _ := strconv.Atoi(parts[1])
1248
	day, _ := strconv.Atoi(parts[2])
1249
	return ast.Date{Year: year, Month: month, Day: day, Sep: sep}
1250
}