all repos

clerk @ 0c4384e55357eeaf877e7181621a0af2330468a9

missing tooling for ledger/hledger

clerk/journal/parser/parser.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
remove decimal dependency, 13 days ago
1
package parser
2
3
import (
4
	"fmt"
5
	"strconv"
6
	"strings"
7
8
	"olexsmir.xyz/clerk/internal/decimal"
9
	"olexsmir.xyz/clerk/journal/ast"
10
	"olexsmir.xyz/clerk/journal/lexer"
11
	"olexsmir.xyz/clerk/journal/token"
12
)
13
14
type Parser struct {
15
	lexer  *lexer.Lexer
16
	errors []*ast.ParseError
17
	cur    token.Token
18
	peek   token.Token
19
}
20
21
func New(lex *lexer.Lexer) *Parser {
22
	p := &Parser{lexer: lex}
23
	p.advance() // populate .peek
24
	p.advance() // populate .cur
25
	return p
26
}
27
28
func (p *Parser) ParseJournal() *ast.Journal {
29
	f := &ast.Journal{}
30
	for p.cur.Type != token.EOF {
31
		if e := p.parseEntry(); e != nil {
32
			f.Entries = append(f.Entries, e)
33
		}
34
	}
35
	f.Errors = p.errors
36
	return f
37
}
38
39
func (p *Parser) parseEntry() ast.Entry {
40
	switch p.cur.Type {
41
	case token.ILLEGAL:
42
		p.errorf("illegal character %q", p.cur.Literal)
43
		p.advance()
44
		return nil
45
	case token.INDENT:
46
		p.errorf("unexpected indent")
47
		p.syncToNextline()
48
		return nil
49
	case token.DATE:
50
		return p.parseTransaction()
51
	case token.TILDE:
52
		return p.parsePeriodicTransaction()
53
	case token.EQ:
54
		return p.parseAutomatedTransaction()
55
	case token.NEWLINE:
56
		return p.parseBlankLine()
57
	case token.SEMICOLON, token.HASH, token.PERCENT, token.STAR:
58
		return p.parseComment()
59
	case token.ACCOUNT:
60
		return p.parseAccountDirective()
61
	case token.COMMODITY:
62
		return p.parseCommodityDirective()
63
	case token.INCLUDE:
64
		return p.parseIncludeDirective()
65
	case token.ALIAS:
66
		return p.parseAliasDirective()
67
	case token.PAYEE:
68
		return p.parsePayeeDirective()
69
	case token.TAG:
70
		return p.parseTagDirective()
71
	case token.YEAR:
72
		return p.parseYearDirective()
73
	case token.DECIMALMARK:
74
		return p.parseDecimalMarkDirective()
75
	case token.D:
76
		return p.parseDefaultCommodityDirective()
77
	case token.P:
78
		return p.parseMarketPriceDirective()
79
	case token.N:
80
		return p.parseIgnoredDirective()
81
	case token.APPLY:
82
		return p.parseApplyDirective()
83
	case token.END:
84
		return p.parseEndDirective()
85
	case token.COMMENTKW:
86
		return p.parseCommentBlockDirective()
87
	default:
88
		p.errorf("unexpected token %s", p.cur.Type)
89
		p.sync()
90
		return nil
91
	}
92
}
93
94
func (p *Parser) parseTransaction() *ast.Transaction {
95
	s := p.cur.Span
96
	tx := &ast.Transaction{}
97
98
	tx.Date = p.parseDate()
99
100
	// optional secondary date
101
	if p.got(token.EQ) {
102
		p.advance()
103
		d := p.parseDate()
104
		tx.SecondDate = &d
105
	}
106
107
	p.skipWhitespace()
108
109
	// optional status
110
	tx.Status = p.parseStatus()
111
112
	// optional code
113
	if p.got(token.LPAREN) {
114
		p.advance()
115
		var code strings.Builder
116
		for p.cur.Type != token.RPAREN {
117
			_, _ = code.WriteString(p.cur.Literal)
118
			p.advance()
119
		}
120
		tx.Code = new(code.String())
121
		p.skipWhitespace()
122
	}
123
124
	// optional payee | note
125
	if p.got(token.TEXT) {
126
		tx.Payee = p.parsePayee()
127
128
		// check for | separator
129
		if p.got(token.WHITESPACE) {
130
			p.skipWhitespace()
131
		}
132
133
		if p.got(token.PIPE) {
134
			p.advance()
135
			p.skipWhitespace()
136
137
			var note strings.Builder
138
			for p.got(token.TEXT) || p.got(token.WHITESPACE) {
139
				_, _ = note.WriteString(p.cur.Literal)
140
				p.advance()
141
			}
142
			tx.Note = new(note.String())
143
		}
144
	}
145
146
	tx.Comment = p.parseOptInlineComment()
147
	p.expectNewline()
148
149
	// header comments — indented ; lines before first posting
150
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
151
		p.advance() // consume indent
152
		c := p.parseComment()
153
		tx.HeaderComments = append(tx.HeaderComments, *c)
154
	}
155
156
	// postings
157
	for p.got(token.INDENT) {
158
		if p := p.parsePosting(); p != nil {
159
			tx.Postings = append(tx.Postings, p)
160
		}
161
	}
162
163
	tx.Span = p.span(s)
164
	return tx
165
}
166
167
func (p *Parser) parsePayee() *ast.Payee {
168
	s := p.cur.Span
169
170
	// keep spaces/tags between text tokens; stop before trailing whitespace
171
	var name strings.Builder
172
	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {
173
		_, _ = name.WriteString(p.cur.Literal)
174
		p.advance()
175
	}
176
	return &ast.Payee{Name: name.String(), Span: p.span(s)}
177
}
178
179
func (p *Parser) parsePeriodicTransaction() *ast.PeriodicTransaction {
180
	s := p.cur.Span
181
	p.expect(token.TILDE)
182
	p.skipWhitespace()
183
184
	pt := &ast.PeriodicTransaction{}
185
186
	pt.Span = p.span(s)
187
	pt.Period = p.parsePeriod()
188
189
	if desc := p.parseOptPeriodicDescription(); desc != "" {
190
		pt.Description = &desc
191
	}
192
193
	comment := p.parseOptInlineComment()
194
	p.expectNewline()
195
196
	var headerComments []*ast.Comment
197
	var postings []*ast.Posting
198
	for p.got(token.INDENT) || p.got(token.SEMICOLON) {
199
		if p.got(token.SEMICOLON) {
200
			c := p.parseComment()
201
			headerComments = append(headerComments, c)
202
			continue
203
		}
204
		posting := p.parsePosting()
205
		if posting != nil {
206
			postings = append(postings, posting)
207
		}
208
	}
209
210
	pt.HeaderComments = headerComments
211
	pt.Postings = postings
212
	pt.Comment = comment
213
	return pt
214
}
215
216
func (p *Parser) parseAutomatedTransaction() *ast.AutomatedTransaction {
217
	s := p.cur.Span
218
	p.expect(token.EQ)
219
	p.skipWhitespace()
220
221
	at := &ast.AutomatedTransaction{}
222
	at.Span = p.span(s)
223
224
	at.Expr = p.parseDirectiveExpr()
225
	at.Comment = p.parseOptInlineComment()
226
	p.expectNewline()
227
228
	// header comments
229
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
230
		p.advance()
231
		at.HeaderComments = append(at.HeaderComments, p.parseComment())
232
	}
233
234
	// postings
235
	for p.got(token.INDENT) {
236
		if p := p.parsePosting(); p != nil {
237
			at.Postings = append(at.Postings, p)
238
		}
239
	}
240
241
	return at
242
}
243
244
func (p *Parser) parsePeriod() ast.Period {
245
	s := p.cur.Span
246
247
	var periodBuf strings.Builder
248
249
	for !p.got(token.NEWLINE) && !p.got(token.EOF) &&
250
		!p.got(token.SEMICOLON) && !p.got(token.HASH) && !p.got(token.PERCENT) && !p.got(token.STAR) {
251
252
		if p.got(token.WHITESPACE) {
253
			if len(p.cur.Literal) >= 2 {
254
				break
255
			}
256
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) ||
257
				p.willGet(token.SEMICOLON) || p.willGet(token.HASH) ||
258
				p.willGet(token.PERCENT) || p.willGet(token.STAR) {
259
				p.advance()
260
				continue
261
			}
262
		}
263
264
		periodBuf.WriteString(p.cur.Literal)
265
		p.advance()
266
	}
267
268
	str := periodBuf.String()
269
	period := ast.Period{Raw: str, Span: p.span(s)}
270
271
	if _, after, ok := strings.Cut(str, " from "); ok {
272
		end := strings.Index(after, " ")
273
		dateStr := after
274
		if end >= 0 {
275
			dateStr = after[:end]
276
		}
277
		if d := parseSimpleDate(dateStr); d.Year > 0 {
278
			period.From = &d
279
			rest := after
280
			if end >= 0 {
281
				rest = after[end:]
282
			}
283
			if _, toAfter, ok := strings.Cut(rest, " to "); ok {
284
				if toEnd := strings.Index(toAfter, " "); toEnd >= 0 {
285
					toAfter = toAfter[:toEnd]
286
				}
287
				if d := parseSimpleDate(toAfter); d.Year > 0 {
288
					period.To = &d
289
				}
290
			}
291
		}
292
	}
293
	return period
294
}
295
296
func (p *Parser) parseComment() *ast.Comment {
297
	s := p.cur.Span
298
	marker := p.cur.Literal[0]
299
	p.advance()
300
	p.skipWhitespace()
301
302
	var text string
303
	if p.got(token.TEXT) {
304
		text = p.cur.Literal
305
		p.advance()
306
	}
307
308
	p.expectNewline()
309
310
	return &ast.Comment{
311
		Marker: marker,
312
		Text:   text,
313
		Span:   p.span(s),
314
	}
315
}
316
317
func (p *Parser) parseAccountDirective() *ast.AccountDirective {
318
	s := p.cur.Span
319
	p.expect(token.ACCOUNT)
320
	p.skipWhitespace()
321
322
	account := p.parseAccount()
323
	comment := p.parseOptInlineComment()
324
	p.expectNewline()
325
	return &ast.AccountDirective{
326
		Account: account,
327
		Comment: comment,
328
		Span:    p.span(s),
329
	}
330
}
331
332
func (p *Parser) parseCommodityDirective() *ast.CommodityDirective {
333
	s := p.cur.Span
334
	p.expect(token.COMMODITY)
335
	p.skipWhitespace()
336
337
	var commodity string
338
	var format *ast.Amount
339
340
	switch p.cur.Type {
341
	case token.TEXT, token.INT, token.DECIMAL:
342
		format = p.parseAmount()
343
		commodity = format.Commodity
344
	case token.COMMODITYMARK:
345
		commodity = p.cur.Literal
346
		p.advance()
347
		hadSpace := p.got(token.WHITESPACE)
348
		p.skipWhitespace()
349
		if p.got(token.INT) || p.got(token.DECIMAL) || p.got(token.TEXT) {
350
			format = p.parseAmount()
351
			format.Commodity = commodity
352
			format.CommodityPos = ast.CommodityBefore
353
			format.HasSpace = hadSpace
354
		}
355
	default:
356
		p.errorf("expected commodity name or amount, got %s", p.cur.Type)
357
	}
358
359
	if commodity == "" {
360
		p.errorf("expected commodity name, got %s", p.cur.Type)
361
	}
362
363
	comment := p.parseOptInlineComment()
364
	p.expectNewline()
365
366
	cd := &ast.CommodityDirective{
367
		Commodity: commodity,
368
		Comment:   comment,
369
		Span:      p.span(s),
370
	}
371
	if format != nil {
372
		cd.Format = *format
373
	}
374
	return cd
375
}
376
377
func (p *Parser) parseIncludeDirective() *ast.IncludeDirective {
378
	s := p.cur.Span
379
	p.expect(token.INCLUDE)
380
	p.skipWhitespace()
381
382
	path := ""
383
	if p.got(token.TEXT) {
384
		path = p.cur.Literal
385
		p.advance()
386
	} else {
387
		p.errorf("expected file path, got %s", p.cur.Type)
388
	}
389
390
	comment := p.parseOptInlineComment()
391
	p.expectNewline()
392
393
	return &ast.IncludeDirective{
394
		Path:    path,
395
		Comment: comment,
396
		Span:    p.span(s),
397
	}
398
}
399
400
func (p *Parser) parseAliasDirective() *ast.AliasDirective {
401
	s := p.cur.Span
402
	alias := &ast.AliasDirective{}
403
	p.expect(token.ALIAS)
404
	p.skipWhitespace()
405
	alias.From = p.parseAccount().Name
406
	p.skipWhitespace()
407
	p.expect(token.EQ)
408
	p.skipWhitespace()
409
	alias.To = p.parseAccount().Name
410
	p.skipWhitespace()
411
	alias.Comment = p.parseOptInlineComment()
412
	p.expectNewline()
413
	alias.Span = p.span(s)
414
	return alias
415
}
416
417
func (p *Parser) parsePayeeDirective() *ast.PayeeDirective {
418
	s := p.cur.Span
419
	p.expect(token.PAYEE)
420
	p.skipWhitespace()
421
422
	name := ""
423
	if p.got(token.TEXT) {
424
		name = p.parsePayee().Name
425
	}
426
427
	comment := p.parseOptInlineComment()
428
	p.expectNewline()
429
430
	return &ast.PayeeDirective{
431
		Name:    name,
432
		Comment: comment,
433
		Span:    p.span(s),
434
	}
435
}
436
437
func (p *Parser) parseTagDirective() *ast.TagDirective {
438
	s := p.cur.Span
439
	p.expect(token.TAG)
440
	p.skipWhitespace()
441
442
	name := ""
443
	if p.got(token.TEXT) {
444
		name = p.cur.Literal
445
		p.advance()
446
	}
447
448
	comment := p.parseOptInlineComment()
449
	p.expectNewline()
450
451
	return &ast.TagDirective{
452
		Name:    name,
453
		Comment: comment,
454
		Span:    p.span(s),
455
	}
456
}
457
458
func (p *Parser) parseYearDirective() *ast.YearDirective {
459
	s := p.cur.Span
460
	year := &ast.YearDirective{}
461
	p.expect(token.YEAR)
462
	p.skipWhitespace()
463
464
	if p.got(token.INT) {
465
		year.Year, _ = strconv.Atoi(p.cur.Literal)
466
		p.advance()
467
	} else {
468
		p.errorf("expected year, got %s", p.cur.Type)
469
	}
470
471
	p.skipWhitespace()
472
	year.Comment = p.parseOptInlineComment()
473
	p.expectNewline()
474
	year.Span = p.span(s)
475
	return year
476
}
477
478
func (p *Parser) parseDecimalMarkDirective() *ast.DecimalMarkDirective {
479
	s := p.cur.Span
480
	mark := &ast.DecimalMarkDirective{}
481
	p.expect(token.DECIMALMARK)
482
	p.skipWhitespace()
483
484
	mark.Mark = byte('.')
485
	if p.got(token.TEXT) {
486
		if len(p.cur.Literal) > 0 {
487
			mark.Mark = p.cur.Literal[0]
488
		}
489
		p.advance()
490
	}
491
492
	p.skipWhitespace()
493
	mark.Comment = p.parseOptInlineComment()
494
	p.expectNewline()
495
	mark.Span = p.span(s)
496
	return mark
497
}
498
499
func (p *Parser) parseDefaultCommodityDirective() *ast.DefaultCommodityDirective {
500
	s := p.cur.Span
501
	com := &ast.DefaultCommodityDirective{}
502
	p.expect(token.D)
503
	p.skipWhitespace()
504
	com.Amount = *p.parseAmount()
505
	p.skipWhitespace()
506
	com.Comment = p.parseOptInlineComment()
507
	p.expectNewline()
508
	com.Span = p.span(s)
509
	return com
510
}
511
512
func (p *Parser) parseIgnoredDirective() *ast.IgnoredDirective {
513
	s := p.cur.Span
514
	p.expect(token.N)
515
	p.skipWhitespace()
516
	if p.got(token.TEXT) || p.got(token.COMMODITYMARK) {
517
		p.advance()
518
	}
519
	p.skipWhitespace()
520
	comment := p.parseOptInlineComment()
521
	p.expectNewline()
522
	return &ast.IgnoredDirective{
523
		Comment: comment,
524
		Span:    p.span(s),
525
	}
526
}
527
528
func (p *Parser) parseMarketPriceDirective() *ast.MarketPriceDirective {
529
	s := p.cur.Span
530
	p.expect(token.P)
531
	p.skipWhitespace()
532
533
	mp := &ast.MarketPriceDirective{}
534
	mp.DateTime.Date = p.parseDate()
535
	p.skipWhitespace()
536
537
	if p.got(token.TIME) {
538
		mp.DateTime.Time = new(p.parseTime())
539
		p.skipWhitespace()
540
	}
541
542
	tok, _ := p.expect(token.COMMODITYMARK)
543
	mp.Commodity = tok.Literal
544
	p.advance()
545
	p.skipWhitespace()
546
547
	mp.Amount = *p.parseAmount()
548
549
	p.skipWhitespace()
550
	mp.Comment = p.parseOptInlineComment()
551
552
	p.expectNewline()
553
	mp.Span = p.span(s)
554
	return mp
555
}
556
557
func (p *Parser) parseTime() ast.Time {
558
	s := p.cur.Span
559
	tok, _ := p.expect(token.TIME)
560
	lit := tok.Literal
561
562
	parts := strings.Split(lit, ":")
563
	if len(parts) < 2 {
564
		p.errorf("invalid time format: %q", lit)
565
		return ast.Time{Span: p.span(s)}
566
	}
567
568
	hour, _ := strconv.Atoi(parts[0])
569
	minute, _ := strconv.Atoi(parts[1])
570
	second := 0
571
	if len(parts) > 2 {
572
		second, _ = strconv.Atoi(parts[2])
573
	}
574
575
	if hour < 0 || hour > 23 {
576
		p.errorf("invalid hour %d in time %q", hour, lit)
577
	}
578
	if minute < 0 || minute > 59 {
579
		p.errorf("invalid minute %d in time %q", minute, lit)
580
	}
581
	if second < 0 || second > 59 {
582
		p.errorf("invalid second %d in time %q", second, lit)
583
	}
584
585
	return ast.Time{
586
		Hour:   hour,
587
		Minute: minute,
588
		Second: second,
589
		Span:   p.span(s),
590
	}
591
}
592
593
func (p *Parser) parseApplyDirective() *ast.ApplyDirective {
594
	s := p.cur.Span
595
	p.expect(token.APPLY)
596
	p.skipWhitespace()
597
598
	expr := p.parseDirectiveExpr()
599
	comment := p.parseOptInlineComment()
600
	p.expectNewline()
601
602
	return &ast.ApplyDirective{
603
		Expr:    expr,
604
		Comment: comment,
605
		Span:    p.span(s),
606
	}
607
}
608
609
func (p *Parser) parseEndDirective() *ast.EndDirective {
610
	s := p.cur.Span
611
	p.expect(token.END)
612
	p.skipWhitespace()
613
614
	expr := p.parseDirectiveExpr()
615
	comment := p.parseOptInlineComment()
616
	p.expectNewline()
617
618
	return &ast.EndDirective{
619
		Expr:    expr,
620
		Comment: comment,
621
		Span:    p.span(s),
622
	}
623
}
624
625
func (p *Parser) parseCommentBlockDirective() *ast.CommentBlockDirective {
626
	start := p.cur.Span
627
	p.expect(token.COMMENTKW)
628
	p.skipWhitespace()
629
630
	header := p.parseDirectiveExpr()
631
	comment := p.parseOptInlineComment()
632
	p.expectNewline()
633
634
	var content strings.Builder
635
	for p.cur.Type != token.EOF {
636
		if p.got(token.END) {
637
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) {
638
				p.advance()
639
				p.expectNewline()
640
				break
641
			}
642
			if p.willGet(token.WHITESPACE) {
643
				endTok := p.cur
644
				p.advance()
645
				wsTok := p.cur
646
				p.advance()
647
				if p.got(token.TEXT) && p.cur.Literal == "comment" { // todo: this should check if it's an actual COMMENTKW token
648
					p.advance()
649
					p.parseDirectiveExpr()
650
					p.parseOptInlineComment()
651
					p.expectNewline()
652
					break
653
				}
654
				content.WriteString(endTok.Literal)
655
				content.WriteString(wsTok.Literal)
656
				continue
657
			}
658
		}
659
		content.WriteString(p.cur.Literal)
660
		p.advance()
661
	}
662
663
	return &ast.CommentBlockDirective{
664
		Header:  header,
665
		Content: content.String(),
666
		Comment: comment,
667
		Span:    p.span(start),
668
	}
669
}
670
671
func (p *Parser) parseStatus() *ast.Status {
672
	if p.got(token.STAR) || p.got(token.BANG) {
673
		status := ast.StatusPending
674
		if p.cur.Literal[0] == '*' {
675
			status = ast.StatusCleared
676
		}
677
		st := &ast.Status{Value: status, Span: p.cur.Span}
678
		p.advance()
679
		p.skipWhitespace()
680
		return st
681
	}
682
	return nil
683
}
684
685
func (p *Parser) isAmountStart() bool {
686
	switch p.cur.Type {
687
	default:
688
		return false
689
	case token.COMMODITYMARK, token.INT, token.DECIMAL, token.MINUS, token.PLUS, token.PARENEXPR:
690
		return true
691
	case token.TEXT:
692
		return len(p.cur.Literal) > 0 && p.cur.Literal[0] >= '0' && p.cur.Literal[0] <= '9'
693
	}
694
}
695
696
func (p *Parser) parseAmount() *ast.Amount {
697
	s := p.cur.Span
698
	amt := &ast.Amount{
699
		QuantityFmt: ast.QuantityFormat{Decimal: '.'},
700
		Span:        p.span(s),
701
	}
702
703
	// commodity before quantity: $10.00
704
	if p.got(token.COMMODITYMARK) {
705
		amt.Commodity = p.cur.Literal
706
		amt.CommodityPos = ast.CommodityBefore
707
		p.advance()
708
		if p.got(token.WHITESPACE) {
709
			amt.HasSpace = true
710
			p.skipWhitespace()
711
		}
712
		switch p.cur.Type {
713
		case token.MINUS:
714
			amt.IsNegative = true
715
			p.advance()
716
		case token.PLUS:
717
			p.advance()
718
		}
719
		p.parseQuantityInto(amt)
720
	} else {
721
		// optional sign
722
		switch p.cur.Type {
723
		case token.MINUS:
724
			amt.IsNegative = true
725
			p.advance()
726
		case token.PLUS:
727
			p.advance()
728
		}
729
730
		// commodity before quantity: -$120:
731
		if p.got(token.COMMODITYMARK) {
732
			amt.Commodity = p.cur.Literal
733
			amt.CommodityPos = ast.CommodityBefore
734
			p.advance()
735
			if p.got(token.WHITESPACE) {
736
				amt.HasSpace = true
737
				p.skipWhitespace()
738
			}
739
		}
740
741
		p.parseQuantityInto(amt)
742
743
		// commodity after quantity: 10.00 UAH (only if not set)
744
		if amt.Commodity == "" {
745
			switch p.cur.Type {
746
			case token.WHITESPACE:
747
				p.skipWhitespace()
748
				if p.got(token.COMMODITYMARK) {
749
					amt.HasSpace = true
750
					amt.Commodity = p.cur.Literal
751
					amt.CommodityPos = ast.CommodityAfter
752
					p.advance()
753
				}
754
			case token.COMMODITYMARK:
755
				amt.Commodity = p.cur.Literal
756
				amt.CommodityPos = ast.CommodityAfter
757
				p.advance()
758
			}
759
		}
760
	}
761
762
	return amt
763
}
764
765
func (p *Parser) parseAmountWithOptExpr() *ast.Amount {
766
	if p.got(token.STAR) {
767
		p.advance()
768
		p.skipWhitespace()
769
		amt := p.parseAmount()
770
		if amt != nil {
771
			amt.IsExpr = true
772
		}
773
		return amt
774
	}
775
	if p.got(token.PARENEXPR) {
776
		lit := p.cur.Literal
777
		amt := &ast.Amount{
778
			IsExpr:      true,
779
			QuantityFmt: ast.QuantityFormat{Decimal: '.'},
780
		}
781
		if len(lit) >= 2 && lit[0] == '(' && lit[len(lit)-1] == ')' {
782
			inner := lit[1 : len(lit)-1]
783
			i := 0
784
			for i < len(inner) && (inner[i] == ' ' || inner[i] == '\t') {
785
				i++
786
			}
787
			j := len(inner)
788
			for j > i && (inner[j-1] == ' ' || inner[j-1] == '\t') {
789
				j--
790
			}
791
			amt.Expr = inner[i:j]
792
		}
793
		amt.Span = p.cur.Span
794
		p.advance()
795
		return amt
796
	}
797
	return p.parseAmount()
798
}
799
800
func (p *Parser) parsePosting() *ast.Posting {
801
	s := p.cur.Span
802
	posting := &ast.Posting{}
803
	p.expect(token.INDENT)
804
805
	// exit if it's empty line
806
	if p.got(token.NEWLINE) || p.got(token.EOF) {
807
		p.syncToNextline()
808
		return nil
809
	}
810
811
	// optional status, outside of brackets, '! (account)'
812
	posting.Status = p.parseStatus()
813
814
	// detect virtual posting brackets
815
	switch p.cur.Type {
816
	case token.LPAREN:
817
		posting.Type = ast.PostingVirtualUnbalanced
818
		p.advance()
819
	case token.LBRACKET:
820
		posting.Type = ast.PostingVirtualBalanced
821
		p.advance()
822
	}
823
824
	// optional status, inside of brackets, '(* account)'
825
	if p.got(token.STAR) || p.got(token.BANG) {
826
		posting.Status = p.parseStatus()
827
	}
828
829
	// validate, must be account text
830
	if p.cur.Type != token.TEXT {
831
		p.errorf("expected account name, got %s", p.cur.Type)
832
		p.syncToNextline()
833
		return nil
834
	}
835
836
	posting.Account = p.parseAccount()
837
838
	// consume closing bracket
839
	switch p.cur.Type {
840
	case token.RPAREN:
841
		p.advance()
842
	case token.RBRACKET:
843
		p.advance()
844
	}
845
846
	// optional amount - after two spaces
847
	if p.got(token.WHITESPACE) {
848
		p.skipWhitespace()
849
		if p.isAmountStart() || p.got(token.STAR) {
850
			posting.Amount = p.parseAmountWithOptExpr()
851
		}
852
	}
853
854
	// optional cost '@' or '@@'
855
	if p.got(token.WHITESPACE) {
856
		p.skipWhitespace()
857
	}
858
	if p.got(token.AT) || p.got(token.ATAT) {
859
		posting.Cost = p.parseCost()
860
	}
861
862
	// optional balance assertion
863
	if p.got(token.WHITESPACE) {
864
		p.skipWhitespace()
865
	}
866
	if p.got(token.EQ) || p.got(token.EQEQ) || p.got(token.EQEQEQ) {
867
		posting.Balance = p.parseBalanceAssertion()
868
		p.skipWhitespace()
869
		if p.got(token.AT) || p.got(token.ATAT) {
870
			p.parseCost()
871
		}
872
	}
873
874
	posting.Comment = p.parseOptInlineComment()
875
	p.expectNewline()
876
877
	// continuation comments
878
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
879
		p.advance()
880
		c := p.parseComment()
881
		posting.Comments = append(posting.Comments, *c)
882
	}
883
884
	posting.Span = p.span(s)
885
	return posting
886
}
887
888
func (p *Parser) parseCost() *ast.Cost {
889
	s := p.cur.Span
890
	isTotal := p.got(token.ATAT)
891
	p.advance() // consume '@' '@@'
892
	p.skipWhitespace()
893
	return &ast.Cost{
894
		IsTotal: isTotal,
895
		Amount:  *p.parseAmount(),
896
		Span:    p.span(s),
897
	}
898
}
899
900
func (p *Parser) parseBalanceAssertion() *ast.BalanceAssertion {
901
	s := p.cur.Span
902
903
	ba := &ast.BalanceAssertion{}
904
	switch p.cur.Type {
905
	case token.EQ: // basic assertion
906
	case token.EQEQ:
907
		ba.IsStrict = true
908
	case token.EQEQEQ:
909
		ba.IsStrict = true
910
		ba.IsInclusive = true
911
	}
912
	p.advance()
913
	p.skipWhitespace()
914
915
	ba.Amount = *p.parseAmount()
916
	ba.Span = p.span(s)
917
	return ba
918
}
919
920
func (p *Parser) parseAccount() ast.Account {
921
	s := p.cur.Span
922
	var name strings.Builder
923
924
	switch p.cur.Type {
925
	case token.TEXT:
926
		_, _ = name.WriteString(p.cur.Literal)
927
		p.advance()
928
		if p.got(token.WHITESPACE) && p.willGet(token.TEXT) && p.peek.Literal[0] != '(' {
929
			_, _ = name.WriteString(" ")
930
			p.advance()
931
			_, _ = name.WriteString(p.cur.Literal)
932
			p.advance()
933
		}
934
	case token.COMMODITYMARK:
935
		_, _ = name.WriteString(p.cur.Literal)
936
		p.advance()
937
		for p.got(token.TEXT) {
938
			_, _ = name.WriteString(p.cur.Literal)
939
			p.advance()
940
		}
941
	}
942
	return ast.Account{Name: name.String(), Span: p.span(s)}
943
}
944
945
func (p *Parser) parseDate() ast.Date {
946
	s := p.cur.Span
947
	tok, ok := p.expect(token.DATE)
948
	if !ok {
949
		return ast.Date{Span: p.span(s)}
950
	}
951
952
	sep := byte(0)
953
	lit := tok.Literal
954
	for i := 0; i < len(lit); i++ {
955
		if lit[i] == '/' || lit[i] == '-' || lit[i] == '.' {
956
			sep = lit[i]
957
			break
958
		}
959
	}
960
	if sep == 0 {
961
		p.errorf("invalid date format: %q", lit)
962
		return ast.Date{Span: p.span(s)}
963
	}
964
965
	parts := strings.Split(lit, string(sep))
966
967
	// M/D or MM/DD (year inferred)
968
	if len(parts) == 2 {
969
		month, err := strconv.Atoi(parts[0])
970
		day, err2 := strconv.Atoi(parts[1])
971
		if err != nil || err2 != nil {
972
			p.errorf("invalid date literal: %q", lit)
973
			return ast.Date{Span: p.span(s)}
974
		}
975
		if month < 1 || month > 12 {
976
			p.errorf("invalid month %d in %q", month, lit)
977
			return ast.Date{Span: p.span(s)}
978
		}
979
		if day < 1 || day > 31 {
980
			p.errorf("invalid day %d in %q", day, lit)
981
			return ast.Date{Span: p.span(s)}
982
		}
983
		return ast.Date{Month: month, Day: day, Sep: sep, Span: p.span(s)}
984
	}
985
986
	if len(parts) != 3 {
987
		p.errorf("invalid date format: %q", lit)
988
		return ast.Date{Span: p.span(s)}
989
	}
990
991
	year, err := strconv.Atoi(parts[0])
992
	month, err2 := strconv.Atoi(parts[1])
993
	day, err3 := strconv.Atoi(parts[2])
994
	if err != nil || err2 != nil || err3 != nil {
995
		p.errorf("invalid date literal: %q", lit)
996
		return ast.Date{Span: p.span(s)}
997
	}
998
	if month < 1 || month > 12 {
999
		p.errorf("invalid month %d in %q", month, lit)
1000
		return ast.Date{Span: p.span(s)}
1001
	}
1002
	if day < 1 || day > 31 {
1003
		p.errorf("invalid day %d in %q", day, lit)
1004
		return ast.Date{Span: p.span(s)}
1005
	}
1006
1007
	return ast.Date{
1008
		Year:  year,
1009
		Month: month,
1010
		Day:   day,
1011
		Sep:   sep,
1012
		Span:  p.span(s),
1013
	}
1014
}
1015
1016
func (p *Parser) parseOptInlineComment() *ast.Comment {
1017
	p.skipWhitespace() // todo:
1018
	if p.cur.Type != token.SEMICOLON && p.cur.Type != token.HASH {
1019
		return nil
1020
	}
1021
1022
	s := p.cur.Span
1023
	marker := p.cur.Literal[0]
1024
	p.advance() // consume marker
1025
	p.skipWhitespace()
1026
1027
	text := ""
1028
	if p.got(token.TEXT) {
1029
		text = p.cur.Literal
1030
		p.advance()
1031
	}
1032
1033
	return &ast.Comment{
1034
		Marker: marker,
1035
		Text:   text,
1036
		Span:   p.span(s),
1037
	}
1038
}
1039
1040
func (p *Parser) parseOptPeriodicDescription() string {
1041
	if p.cur.Type != token.WHITESPACE || len(p.cur.Literal) < 2 {
1042
		return ""
1043
	}
1044
1045
	p.skipWhitespace()
1046
1047
	if p.cur.Type != token.TEXT {
1048
		return ""
1049
	}
1050
1051
	return p.parseDescription()
1052
}
1053
1054
func (p *Parser) parseDescription() string {
1055
	var desc strings.Builder
1056
	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {
1057
		_, _ = desc.WriteString(p.cur.Literal)
1058
		p.advance()
1059
	}
1060
	return desc.String()
1061
}
1062
1063
func (p *Parser) parseDirectiveExpr() string {
1064
	var b strings.Builder
1065
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF && p.cur.Type != token.SEMICOLON {
1066
		_, _ = b.WriteString(p.cur.Literal)
1067
		p.advance()
1068
	}
1069
	return b.String()
1070
}
1071
1072
func (p *Parser) parseQuantityInto(amt *ast.Amount) {
1073
	if p.cur.Type != token.INT && p.cur.Type != token.DECIMAL && p.cur.Type != token.TEXT {
1074
		p.errorf("expected quantity, got %s", p.cur.Type)
1075
		return
1076
	}
1077
1078
	lit := p.cur.Literal
1079
	p.advance()
1080
1081
	// detect format metadata before normalizing
1082
	amt.QuantityFmt = detectFormat(lit)
1083
1084
	// normalize for decimal.NewFromString
1085
	// remove thousands separators, replace decimal mark with '.'
1086
	normalized := normalizeLiteral(lit, amt.QuantityFmt.Thousands, amt.QuantityFmt.Decimal)
1087
1088
	q, err := decimal.FromString(normalized)
1089
	if err != nil {
1090
		p.errorf("invalid quantity %q: %v", lit, err)
1091
		return
1092
	}
1093
1094
	if amt.IsNegative {
1095
		q = q.Neg()
1096
	}
1097
	amt.Quantity = q
1098
}
1099
1100
func (p *Parser) parseBlankLine() *ast.BlankLine {
1101
	s := p.cur.Span
1102
	p.expectNewline()
1103
	return &ast.BlankLine{Span: s}
1104
}
1105
1106
func (p *Parser) expectNewline() {
1107
	if p.got(token.NEWLINE) || p.got(token.EOF) {
1108
		if p.got(token.NEWLINE) {
1109
			p.advance()
1110
		}
1111
		return
1112
	}
1113
	p.errorf("expected %s, got %s", token.NEWLINE, p.cur.Type)
1114
}
1115
1116
func (p *Parser) advance() token.Token {
1117
	prev := p.cur
1118
	p.cur = p.peek
1119
	p.peek = p.lexer.Next()
1120
	return prev
1121
}
1122
1123
func (p *Parser) got(kind token.Type) bool     { return p.cur.Type == kind }
1124
func (p *Parser) willGet(kind token.Type) bool { return p.peek.Type == kind }
1125
1126
func (p *Parser) expect(kind token.Type) (token.Token, bool) {
1127
	if p.got(kind) {
1128
		return p.advance(), true
1129
	}
1130
	p.errorf("expected %s, got %s", kind, p.cur.Type)
1131
	return p.cur, false
1132
}
1133
1134
func (p *Parser) errorf(format string, args ...any) {
1135
	p.errors = append(p.errors, &ast.ParseError{
1136
		Span:    p.cur.Span,
1137
		Message: fmt.Sprintf(format, args...),
1138
	})
1139
}
1140
1141
func (p *Parser) sync() {
1142
	for {
1143
		switch p.cur.Type {
1144
		case token.EOF:
1145
			return
1146
		case token.NEWLINE:
1147
			p.advance()
1148
			switch p.cur.Type {
1149
			case token.DATE, token.ACCOUNT, token.COMMODITY,
1150
				token.INCLUDE, token.ALIAS, token.PAYEE,
1151
				token.TAG, token.YEAR, token.D, token.P,
1152
				token.APPLY, token.END, token.COMMENTKW,
1153
				token.DECIMALMARK, token.TILDE, token.N, token.EQ:
1154
				return
1155
			}
1156
		default:
1157
			p.advance()
1158
		}
1159
	}
1160
}
1161
1162
func (p *Parser) syncToNextline() {
1163
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF {
1164
		p.advance()
1165
	}
1166
	if p.got(token.NEWLINE) {
1167
		p.advance()
1168
	}
1169
}
1170
1171
func (p *Parser) skipWhitespace() {
1172
	for p.got(token.WHITESPACE) {
1173
		p.advance()
1174
	}
1175
}
1176
1177
func (p *Parser) span(s token.Span) token.Span {
1178
	return token.Span{Start: s.Start, End: p.cur.Span.Start}
1179
}
1180
1181
func normalizeLiteral(lit string, thousands, decimal byte) string {
1182
	var b strings.Builder
1183
	for _, ch := range []byte(lit) {
1184
		if thousands != 0 && ch == thousands {
1185
			continue // skip thousands separator
1186
		}
1187
		if ch == decimal {
1188
			b.WriteByte('.')
1189
		} else {
1190
			b.WriteByte(ch)
1191
		}
1192
	}
1193
	return b.String()
1194
}
1195
1196
func detectFormat(lit string) ast.QuantityFormat {
1197
	// find all separator positions
1198
	var separators []int
1199
	for i, ch := range []byte(lit) {
1200
		if ch == '.' || ch == ',' {
1201
			separators = append(separators, i)
1202
		}
1203
	}
1204
1205
	if len(separators) == 0 {
1206
		// "1000" — no separators, integer
1207
		return ast.QuantityFormat{Decimal: '.', Thousands: 0, Precision: 0}
1208
	}
1209
1210
	var decimal byte
1211
	thousands := byte(0)
1212
	precision := 0
1213
1214
	if len(separators) == 1 {
1215
		// "10.00" or "10,00" — single separator is the decimal mark
1216
		pos := separators[0]
1217
		decimal = lit[pos]
1218
		precision = len(lit) - pos - 1
1219
	} else {
1220
		// "1,000.00" or "1.000,00" — last separator is decimal, first is thousands
1221
		last := separators[len(separators)-1]
1222
		decimal = lit[last]
1223
		thousands = lit[separators[0]]
1224
		precision = len(lit) - last - 1
1225
	}
1226
1227
	return ast.QuantityFormat{
1228
		Decimal:   decimal,
1229
		Thousands: thousands,
1230
		Precision: precision,
1231
	}
1232
}
1233
1234
func parseSimpleDate(s string) ast.Date {
1235
	if len(s) < 8 {
1236
		return ast.Date{}
1237
	}
1238
	sep := byte('-')
1239
	if strings.Contains(s, "/") {
1240
		sep = byte('/')
1241
	} else if strings.Contains(s, ".") {
1242
		sep = byte('.')
1243
	}
1244
	parts := strings.Split(s, string(sep))
1245
	if len(parts) != 3 {
1246
		return ast.Date{}
1247
	}
1248
	year, _ := strconv.Atoi(parts[0])
1249
	month, _ := strconv.Atoi(parts[1])
1250
	day, _ := strconv.Atoi(parts[2])
1251
	return ast.Date{Year: year, Month: month, Day: day, Sep: sep}
1252
}