all repos

clerk @ 6fdb9097048e212574439fb0da84d0c94aa7e01b

missing tooling for ledger/hledger

clerk/journal/parser/parser.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
formatter, 17 hours ago
1
package parser
2
3
import (
4
	"fmt"
5
	"strconv"
6
	"strings"
7
8
	"olexsmir.xyz/clerk/internal/decimal"
9
	"olexsmir.xyz/clerk/journal/ast"
10
	"olexsmir.xyz/clerk/journal/lexer"
11
	"olexsmir.xyz/clerk/journal/token"
12
)
13
14
type Parser struct {
15
	lexer  *lexer.Lexer
16
	errors []*ast.ParseError
17
	cur    token.Token
18
	peek   token.Token
19
}
20
21
func New(lex *lexer.Lexer) *Parser {
22
	p := &Parser{lexer: lex}
23
	p.advance() // populate .peek
24
	p.advance() // populate .cur
25
	return p
26
}
27
28
func (p *Parser) ParseJournal() *ast.Journal {
29
	f := &ast.Journal{}
30
	for p.cur.Type != token.EOF {
31
		if e := p.parseEntry(); e != nil {
32
			f.Entries = append(f.Entries, e)
33
		}
34
	}
35
	f.Errors = p.errors
36
	return f
37
}
38
39
func isDirectiveKeyword(t token.Type) bool {
40
	switch t {
41
	case token.COMMENTKW, token.ACCOUNT, token.COMMODITY, token.INCLUDE,
42
		token.ALIAS, token.PAYEE, token.TAG, token.APPLY, token.END,
43
		token.YEAR, token.DECIMALMARK, token.D, token.P, token.N, token.C:
44
		return true
45
	}
46
	return false
47
}
48
49
func (p *Parser) parseEntry() ast.Entry {
50
	if p.got(token.BANG) || p.got(token.AT) {
51
		if isDirectiveKeyword(p.peek.Type) {
52
			p.advance() // consume prefix
53
		}
54
	}
55
	switch p.cur.Type {
56
	case token.ILLEGAL:
57
		p.errorf("illegal character %q", p.cur.Literal)
58
		p.advance()
59
		return nil
60
	case token.INDENT:
61
		p.errorf("unexpected indent")
62
		p.syncToNextline()
63
		return nil
64
	case token.DATE:
65
		return p.parseTransaction()
66
	case token.TILDE:
67
		return p.parsePeriodicTransaction()
68
	case token.EQ:
69
		return p.parseAutomatedTransaction()
70
	case token.NEWLINE:
71
		return p.parseBlankLine()
72
	case token.SEMICOLON, token.HASH, token.PERCENT, token.STAR:
73
		return p.parseComment()
74
	case token.ACCOUNT:
75
		return p.parseAccountDirective()
76
	case token.COMMODITY:
77
		return p.parseCommodityDirective()
78
	case token.INCLUDE:
79
		return p.parseIncludeDirective()
80
	case token.ALIAS:
81
		return p.parseAliasDirective()
82
	case token.PAYEE:
83
		return p.parsePayeeDirective()
84
	case token.TAG:
85
		return p.parseTagDirective()
86
	case token.YEAR:
87
		return p.parseYearDirective()
88
	case token.DECIMALMARK:
89
		return p.parseDecimalMarkDirective()
90
	case token.D:
91
		return p.parseDefaultCommodityDirective()
92
	case token.P:
93
		return p.parseMarketPriceDirective()
94
	case token.N:
95
		return p.parseIgnoredDirective()
96
	case token.C:
97
		return p.parseConversionDirective()
98
	case token.APPLY:
99
		return p.parseApplyDirective()
100
	case token.END:
101
		return p.parseEndDirective()
102
	case token.COMMENTKW:
103
		return p.parseCommentBlockDirective()
104
	default:
105
		p.errorf("unexpected token %s", p.cur.Type)
106
		p.sync()
107
		return nil
108
	}
109
}
110
111
func (p *Parser) parseTransaction() *ast.Transaction {
112
	s := p.cur.Span
113
	tx := &ast.Transaction{}
114
115
	tx.Date = p.parseDate()
116
117
	// optional secondary date
118
	if p.got(token.EQ) {
119
		p.advance()
120
		d := p.parseDate()
121
		tx.SecondDate = &d
122
	}
123
124
	p.skipWhitespace()
125
126
	// optional status
127
	tx.Status = p.parseStatus()
128
129
	// optional code
130
	if p.got(token.LPAREN) {
131
		p.advance()
132
		var code strings.Builder
133
		for p.cur.Type != token.RPAREN {
134
			_, _ = code.WriteString(p.cur.Literal)
135
			p.advance()
136
		}
137
		tx.Code = new(code.String())
138
		p.skipWhitespace()
139
	}
140
141
	// optional payee | note
142
	if p.got(token.TEXT) || p.got(token.STRING) {
143
		tx.Payee = p.parsePayee()
144
145
		// check for | separator
146
		if p.got(token.WHITESPACE) {
147
			p.skipWhitespace()
148
		}
149
150
		if p.got(token.PIPE) {
151
			p.advance()
152
			p.skipWhitespace()
153
154
			var note strings.Builder
155
			for p.got(token.TEXT) || p.got(token.WHITESPACE) {
156
				_, _ = note.WriteString(p.cur.Literal)
157
				p.advance()
158
			}
159
			tx.Note = new(note.String())
160
		}
161
	}
162
163
	tx.Comment = p.parseOptInlineComment()
164
	p.expectNewline()
165
166
	// header comments — indented ; lines before first posting
167
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
168
		p.advance() // consume indent
169
		c := p.parseComment()
170
		tx.HeaderComments = append(tx.HeaderComments, c)
171
	}
172
173
	// postings
174
	for p.got(token.INDENT) {
175
		if p := p.parsePosting(); p != nil {
176
			tx.Postings = append(tx.Postings, p)
177
		}
178
	}
179
180
	tx.Span = p.span(s)
181
	return tx
182
}
183
184
func unquote(s string) string {
185
	if len(s) >= 2 && ((s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'')) {
186
		return s[1 : len(s)-1]
187
	}
188
	return s
189
}
190
191
func (p *Parser) parsePayee() *ast.Payee {
192
	s := p.cur.Span
193
194
	if p.got(token.STRING) {
195
		name := unquote(p.cur.Literal)
196
		p.advance()
197
		return &ast.Payee{Name: name, Span: p.span(s)}
198
	}
199
200
	// keep spaces/tags between text tokens; stop before trailing whitespace
201
	var name strings.Builder
202
	for p.got(token.TEXT) || p.got(token.INT) || p.got(token.DECIMAL) || (p.got(token.WHITESPACE) && (p.willGet(token.TEXT) || p.willGet(token.INT) || p.willGet(token.DECIMAL))) {
203
		_, _ = name.WriteString(p.cur.Literal)
204
		p.advance()
205
	}
206
	return &ast.Payee{Name: unquote(name.String()), Span: p.span(s)}
207
}
208
209
func (p *Parser) parsePeriodicTransaction() *ast.PeriodicTransaction {
210
	s := p.cur.Span
211
	p.expect(token.TILDE)
212
	p.skipWhitespace()
213
214
	pt := &ast.PeriodicTransaction{}
215
216
	pt.Span = p.span(s)
217
	pt.Period = p.parsePeriod()
218
219
	if desc := p.parseOptPeriodicDescription(); desc != "" {
220
		pt.Description = &desc
221
	}
222
223
	comment := p.parseOptInlineComment()
224
	p.expectNewline()
225
226
	var headerComments []*ast.Comment
227
	var postings []*ast.Posting
228
	for p.got(token.INDENT) || p.got(token.SEMICOLON) {
229
		if p.got(token.SEMICOLON) {
230
			c := p.parseComment()
231
			headerComments = append(headerComments, c)
232
			continue
233
		}
234
		posting := p.parsePosting()
235
		if posting != nil {
236
			postings = append(postings, posting)
237
		}
238
	}
239
240
	pt.HeaderComments = headerComments
241
	pt.Postings = postings
242
	pt.Comment = comment
243
	return pt
244
}
245
246
func (p *Parser) parseAutomatedTransaction() *ast.AutomatedTransaction {
247
	s := p.cur.Span
248
	p.expect(token.EQ)
249
	p.skipWhitespace()
250
251
	at := &ast.AutomatedTransaction{}
252
	at.Span = p.span(s)
253
254
	at.Expr = p.parseDirectiveExpr()
255
	at.Comment = p.parseOptInlineComment()
256
	p.expectNewline()
257
258
	// header comments
259
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
260
		p.advance()
261
		at.HeaderComments = append(at.HeaderComments, p.parseComment())
262
	}
263
264
	// postings
265
	for p.got(token.INDENT) {
266
		if p := p.parsePosting(); p != nil {
267
			at.Postings = append(at.Postings, p)
268
		}
269
	}
270
271
	return at
272
}
273
274
func (p *Parser) parsePeriod() ast.Period {
275
	s := p.cur.Span
276
277
	var periodBuf strings.Builder
278
279
	for !p.got(token.NEWLINE) && !p.got(token.EOF) &&
280
		!p.got(token.SEMICOLON) && !p.got(token.HASH) && !p.got(token.PERCENT) && !p.got(token.STAR) {
281
282
		if p.got(token.WHITESPACE) {
283
			if len(p.cur.Literal) >= 2 {
284
				break
285
			}
286
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) ||
287
				p.willGet(token.SEMICOLON) || p.willGet(token.HASH) ||
288
				p.willGet(token.PERCENT) || p.willGet(token.STAR) {
289
				p.advance()
290
				continue
291
			}
292
		}
293
294
		periodBuf.WriteString(p.cur.Literal)
295
		p.advance()
296
	}
297
298
	str := periodBuf.String()
299
	period := ast.Period{Raw: str, Span: p.span(s)}
300
301
	if _, after, ok := strings.Cut(str, " from "); ok {
302
		end := strings.Index(after, " ")
303
		dateStr := after
304
		if end >= 0 {
305
			dateStr = after[:end]
306
		}
307
		if d := parseSimpleDate(dateStr); d.Year > 0 {
308
			period.From = &d
309
			rest := after
310
			if end >= 0 {
311
				rest = after[end:]
312
			}
313
			if _, toAfter, ok := strings.Cut(rest, " to "); ok {
314
				if toEnd := strings.Index(toAfter, " "); toEnd >= 0 {
315
					toAfter = toAfter[:toEnd]
316
				}
317
				if d := parseSimpleDate(toAfter); d.Year > 0 {
318
					period.To = &d
319
				}
320
			}
321
		}
322
	}
323
	return period
324
}
325
326
func (p *Parser) parseComment() *ast.Comment {
327
	s := p.cur.Span
328
	marker := p.cur.Literal[0]
329
	p.advance()
330
	p.skipWhitespace()
331
332
	var text string
333
	if p.got(token.TEXT) {
334
		text = p.cur.Literal
335
		p.advance()
336
	}
337
338
	p.expectNewline()
339
340
	return &ast.Comment{
341
		Marker: marker,
342
		Text:   text,
343
		Span:   p.span(s),
344
	}
345
}
346
347
func (p *Parser) parseAccountDirective() *ast.AccountDirective {
348
	s := p.cur.Span
349
	p.expect(token.ACCOUNT)
350
	p.skipWhitespace()
351
352
	account := p.parseAccount()
353
	comment := p.parseOptInlineComment()
354
	p.expectNewline()
355
356
	for p.got(token.INDENT) {
357
		p.advance()
358
		for !p.got(token.NEWLINE) && !p.got(token.EOF) {
359
			p.advance()
360
		}
361
		p.expectNewline()
362
	}
363
364
	return &ast.AccountDirective{
365
		Account: account,
366
		Comment: comment,
367
		Span:    p.span(s),
368
	}
369
}
370
371
func (p *Parser) parseCommodityDirective() *ast.CommodityDirective {
372
	s := p.cur.Span
373
	p.expect(token.COMMODITY)
374
	p.skipWhitespace()
375
376
	var commodity string
377
	var format *ast.Amount
378
379
	switch p.cur.Type {
380
	case token.TEXT, token.INT, token.DECIMAL:
381
		format = p.parseAmount()
382
		commodity = format.Commodity
383
	case token.COMMODITYMARK:
384
		commodity = p.cur.Literal
385
		p.advance()
386
		hadSpace := p.got(token.WHITESPACE)
387
		p.skipWhitespace()
388
		if p.got(token.INT) || p.got(token.DECIMAL) || p.got(token.TEXT) {
389
			format = p.parseAmount()
390
			format.Commodity = commodity
391
			format.CommodityPos = ast.CommodityBefore
392
			format.HasSpace = hadSpace
393
		}
394
	default:
395
		p.errorf("expected commodity name or amount, got %s", p.cur.Type)
396
	}
397
398
	if commodity == "" {
399
		p.errorf("expected commodity name, got %s", p.cur.Type)
400
	}
401
402
	comment := p.parseOptInlineComment()
403
	p.expectNewline()
404
405
	for p.got(token.INDENT) {
406
		p.advance()
407
		p.skipWhitespace()
408
		if p.got(token.COMMODITYMARK) && p.cur.Literal == "format" {
409
			p.advance()
410
			p.skipWhitespace()
411
			format = p.parseAmount()
412
		} else {
413
			for !p.got(token.NEWLINE) && !p.got(token.EOF) {
414
				p.advance()
415
			}
416
		}
417
		p.expectNewline()
418
	}
419
420
	cd := &ast.CommodityDirective{
421
		Commodity: commodity,
422
		Comment:   comment,
423
		Span:      p.span(s),
424
	}
425
	if format != nil {
426
		cd.Format = *format
427
	}
428
	return cd
429
}
430
431
func (p *Parser) parseIncludeDirective() *ast.IncludeDirective {
432
	s := p.cur.Span
433
	p.expect(token.INCLUDE)
434
	p.skipWhitespace()
435
436
	path := ""
437
	if p.got(token.TEXT) {
438
		path = p.cur.Literal
439
		p.advance()
440
	} else {
441
		p.errorf("expected file path, got %s", p.cur.Type)
442
	}
443
444
	comment := p.parseOptInlineComment()
445
	p.expectNewline()
446
447
	return &ast.IncludeDirective{
448
		Path:    path,
449
		Comment: comment,
450
		Span:    p.span(s),
451
	}
452
}
453
454
func (p *Parser) parseAliasDirective() *ast.AliasDirective {
455
	s := p.cur.Span
456
	alias := &ast.AliasDirective{}
457
	p.expect(token.ALIAS)
458
	p.skipWhitespace()
459
	alias.From = p.parseAccount().Name
460
	p.skipWhitespace()
461
	p.expect(token.EQ)
462
	p.skipWhitespace()
463
	alias.To = p.parseAccount().Name
464
	p.skipWhitespace()
465
	alias.Comment = p.parseOptInlineComment()
466
	p.expectNewline()
467
	alias.Span = p.span(s)
468
	return alias
469
}
470
471
func (p *Parser) parsePayeeDirective() *ast.PayeeDirective {
472
	s := p.cur.Span
473
	p.expect(token.PAYEE)
474
	p.skipWhitespace()
475
476
	name := ""
477
	if p.got(token.TEXT) || p.got(token.STRING) {
478
		name = p.parsePayee().Name
479
	}
480
481
	comment := p.parseOptInlineComment()
482
	p.expectNewline()
483
484
	return &ast.PayeeDirective{
485
		Name:    name,
486
		Comment: comment,
487
		Span:    p.span(s),
488
	}
489
}
490
491
func (p *Parser) parseTagDirective() *ast.TagDirective {
492
	s := p.cur.Span
493
	p.expect(token.TAG)
494
	p.skipWhitespace()
495
496
	name := ""
497
	if p.got(token.TEXT) {
498
		name = p.cur.Literal
499
		p.advance()
500
	}
501
502
	comment := p.parseOptInlineComment()
503
	p.expectNewline()
504
505
	return &ast.TagDirective{
506
		Name:    name,
507
		Comment: comment,
508
		Span:    p.span(s),
509
	}
510
}
511
512
func (p *Parser) parseYearDirective() *ast.YearDirective {
513
	s := p.cur.Span
514
	year := &ast.YearDirective{}
515
	p.expect(token.YEAR)
516
	p.skipWhitespace()
517
518
	if p.got(token.INT) {
519
		year.Year, _ = strconv.Atoi(p.cur.Literal)
520
		p.advance()
521
	} else {
522
		p.errorf("expected year, got %s", p.cur.Type)
523
	}
524
525
	p.skipWhitespace()
526
	year.Comment = p.parseOptInlineComment()
527
	p.expectNewline()
528
	year.Span = p.span(s)
529
	return year
530
}
531
532
func (p *Parser) parseDecimalMarkDirective() *ast.DecimalMarkDirective {
533
	s := p.cur.Span
534
	mark := &ast.DecimalMarkDirective{}
535
	p.expect(token.DECIMALMARK)
536
	p.skipWhitespace()
537
538
	mark.Mark = byte('.')
539
	if p.got(token.TEXT) {
540
		if len(p.cur.Literal) > 0 {
541
			mark.Mark = p.cur.Literal[0]
542
		}
543
		p.advance()
544
	}
545
546
	p.skipWhitespace()
547
	mark.Comment = p.parseOptInlineComment()
548
	p.expectNewline()
549
	mark.Span = p.span(s)
550
	return mark
551
}
552
553
func (p *Parser) parseDefaultCommodityDirective() *ast.DefaultCommodityDirective {
554
	s := p.cur.Span
555
	com := &ast.DefaultCommodityDirective{}
556
	p.expect(token.D)
557
	p.skipWhitespace()
558
	com.Amount = *p.parseAmount()
559
	p.skipWhitespace()
560
	com.Comment = p.parseOptInlineComment()
561
	p.expectNewline()
562
	com.Span = p.span(s)
563
	return com
564
}
565
566
func (p *Parser) parseConversionDirective() *ast.ConversionDirective {
567
	s := p.cur.Span
568
	cd := &ast.ConversionDirective{}
569
	p.expect(token.C)
570
	p.skipWhitespace()
571
572
	if p.isAmountStart() {
573
		cd.From = *p.parseAmount()
574
	} else {
575
		p.errorf("expected amount, got %s", p.cur.Type)
576
	}
577
578
	p.skipWhitespace()
579
	if p.got(token.EQ) {
580
		p.advance()
581
		p.skipWhitespace()
582
		if p.isAmountStart() {
583
			cd.To = *p.parseAmount()
584
		} else {
585
			p.errorf("expected amount, got %s", p.cur.Type)
586
		}
587
	}
588
589
	p.skipWhitespace()
590
	cd.Comment = p.parseOptInlineComment()
591
	p.expectNewline()
592
	cd.Span = p.span(s)
593
	return cd
594
}
595
596
func (p *Parser) parseIgnoredDirective() *ast.IgnoredDirective {
597
	s := p.cur.Span
598
	p.expect(token.N)
599
	p.skipWhitespace()
600
	if p.got(token.TEXT) || p.got(token.COMMODITYMARK) {
601
		p.advance()
602
	}
603
	p.skipWhitespace()
604
	comment := p.parseOptInlineComment()
605
	p.expectNewline()
606
	return &ast.IgnoredDirective{
607
		Comment: comment,
608
		Span:    p.span(s),
609
	}
610
}
611
612
func (p *Parser) parseMarketPriceDirective() *ast.MarketPriceDirective {
613
	s := p.cur.Span
614
	p.expect(token.P)
615
	p.skipWhitespace()
616
617
	mp := &ast.MarketPriceDirective{}
618
	mp.DateTime.Date = p.parseDate()
619
	p.skipWhitespace()
620
621
	if p.got(token.TIME) {
622
		mp.DateTime.Time = new(p.parseTime())
623
		p.skipWhitespace()
624
	}
625
626
	tok, _ := p.expect(token.COMMODITYMARK)
627
	mp.Commodity = tok.Literal
628
	p.advance()
629
	p.skipWhitespace()
630
631
	mp.Amount = *p.parseAmount()
632
633
	p.skipWhitespace()
634
	mp.Comment = p.parseOptInlineComment()
635
636
	p.expectNewline()
637
	mp.Span = p.span(s)
638
	return mp
639
}
640
641
func (p *Parser) parseTime() ast.Time {
642
	s := p.cur.Span
643
	tok, _ := p.expect(token.TIME)
644
	lit := tok.Literal
645
646
	parts := strings.Split(lit, ":")
647
	if len(parts) < 2 {
648
		p.errorf("invalid time format: %q", lit)
649
		return ast.Time{Span: p.span(s)}
650
	}
651
652
	hour, _ := strconv.Atoi(parts[0])
653
	minute, _ := strconv.Atoi(parts[1])
654
	second := 0
655
	if len(parts) > 2 {
656
		second, _ = strconv.Atoi(parts[2])
657
	}
658
659
	if hour < 0 || hour > 23 {
660
		p.errorf("invalid hour %d in time %q", hour, lit)
661
	}
662
	if minute < 0 || minute > 59 {
663
		p.errorf("invalid minute %d in time %q", minute, lit)
664
	}
665
	if second < 0 || second > 59 {
666
		p.errorf("invalid second %d in time %q", second, lit)
667
	}
668
669
	return ast.Time{
670
		Hour:   hour,
671
		Minute: minute,
672
		Second: second,
673
		Span:   p.span(s),
674
	}
675
}
676
677
func (p *Parser) parseApplyDirective() *ast.ApplyDirective {
678
	s := p.cur.Span
679
	p.expect(token.APPLY)
680
	p.skipWhitespace()
681
682
	expr := p.parseDirectiveExpr()
683
	comment := p.parseOptInlineComment()
684
	p.expectNewline()
685
686
	return &ast.ApplyDirective{
687
		Expr:    expr,
688
		Comment: comment,
689
		Span:    p.span(s),
690
	}
691
}
692
693
func (p *Parser) parseEndDirective() *ast.EndDirective {
694
	s := p.cur.Span
695
	p.expect(token.END)
696
	p.skipWhitespace()
697
698
	expr := p.parseDirectiveExpr()
699
	comment := p.parseOptInlineComment()
700
	p.expectNewline()
701
702
	return &ast.EndDirective{
703
		Expr:    expr,
704
		Comment: comment,
705
		Span:    p.span(s),
706
	}
707
}
708
709
func (p *Parser) parseCommentBlockDirective() *ast.CommentBlockDirective {
710
	start := p.cur.Span
711
	p.expect(token.COMMENTKW)
712
	p.skipWhitespace()
713
714
	header := p.parseDirectiveExpr()
715
	comment := p.parseOptInlineComment()
716
	p.expectNewline()
717
718
	var content strings.Builder
719
	for p.cur.Type != token.EOF {
720
		if p.got(token.END) {
721
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) {
722
				p.advance()
723
				p.expectNewline()
724
				break
725
			}
726
			if p.willGet(token.WHITESPACE) {
727
				endTok := p.cur
728
				p.advance()
729
				wsTok := p.cur
730
				p.advance()
731
				if p.got(token.TEXT) && p.cur.Literal == "comment" { // todo: this should check if it's an actual COMMENTKW token
732
					p.advance()
733
					p.parseDirectiveExpr()
734
					p.parseOptInlineComment()
735
					p.expectNewline()
736
					break
737
				}
738
				content.WriteString(endTok.Literal)
739
				content.WriteString(wsTok.Literal)
740
				continue
741
			}
742
		}
743
		content.WriteString(p.cur.Literal)
744
		p.advance()
745
	}
746
747
	return &ast.CommentBlockDirective{
748
		Header:  header,
749
		Content: content.String(),
750
		Comment: comment,
751
		Span:    p.span(start),
752
	}
753
}
754
755
func (p *Parser) parseStatus() *ast.Status {
756
	if p.got(token.STAR) || p.got(token.BANG) {
757
		status := ast.StatusPending
758
		if p.cur.Literal[0] == '*' {
759
			status = ast.StatusCleared
760
		}
761
		st := &ast.Status{Value: status, Span: p.cur.Span}
762
		p.advance()
763
		p.skipWhitespace()
764
		return st
765
	}
766
	return nil
767
}
768
769
func (p *Parser) isAmountStart() bool {
770
	switch p.cur.Type {
771
	default:
772
		return false
773
	case token.COMMODITYMARK, token.INT, token.DECIMAL, token.MINUS, token.PLUS, token.PARENEXPR:
774
		return true
775
	case token.TEXT:
776
		return len(p.cur.Literal) > 0 && p.cur.Literal[0] >= '0' && p.cur.Literal[0] <= '9'
777
	}
778
}
779
780
func (p *Parser) parseAmount() *ast.Amount {
781
	s := p.cur.Span
782
	amt := &ast.Amount{
783
		QuantityFmt: ast.QuantityFormat{Decimal: '.'},
784
		Span:        p.span(s),
785
	}
786
787
	// commodity before quantity: $10.00
788
	if p.got(token.COMMODITYMARK) {
789
		amt.Commodity = p.cur.Literal
790
		amt.CommodityPos = ast.CommodityBefore
791
		p.advance()
792
		if p.got(token.WHITESPACE) {
793
			amt.HasSpace = true
794
			p.skipWhitespace()
795
		}
796
		switch p.cur.Type {
797
		case token.MINUS:
798
			amt.IsNegative = true
799
			p.advance()
800
		case token.PLUS:
801
			p.advance()
802
		}
803
		p.parseQuantityInto(amt)
804
	} else {
805
		// optional sign
806
		switch p.cur.Type {
807
		case token.MINUS:
808
			amt.IsNegative = true
809
			p.advance()
810
		case token.PLUS:
811
			p.advance()
812
		}
813
814
		// commodity before quantity: -$120:
815
		if p.got(token.COMMODITYMARK) {
816
			amt.Commodity = p.cur.Literal
817
			amt.CommodityPos = ast.CommodityBefore
818
			p.advance()
819
			if p.got(token.WHITESPACE) {
820
				amt.HasSpace = true
821
				p.skipWhitespace()
822
			}
823
		}
824
825
		p.parseQuantityInto(amt)
826
827
		// commodity after quantity: 10.00 UAH (only if not set)
828
		if amt.Commodity == "" {
829
			switch p.cur.Type {
830
			case token.WHITESPACE:
831
				p.skipWhitespace()
832
				if p.got(token.COMMODITYMARK) || p.got(token.TEXT) {
833
					amt.HasSpace = true
834
					amt.Commodity = p.cur.Literal
835
					amt.CommodityPos = ast.CommodityAfter
836
					p.advance()
837
				}
838
			case token.COMMODITYMARK, token.TEXT:
839
				amt.Commodity = p.cur.Literal
840
				amt.CommodityPos = ast.CommodityAfter
841
				p.advance()
842
			}
843
		}
844
	}
845
846
	return amt
847
}
848
849
func (p *Parser) parseAmountWithOptExpr() *ast.Amount {
850
	if p.got(token.STAR) {
851
		p.advance()
852
		p.skipWhitespace()
853
		amt := p.parseAmount()
854
		if amt != nil {
855
			amt.IsExpr = true
856
		}
857
		return amt
858
	}
859
	if p.got(token.PARENEXPR) {
860
		lit := p.cur.Literal
861
		amt := &ast.Amount{
862
			IsExpr:      true,
863
			QuantityFmt: ast.QuantityFormat{Decimal: '.'},
864
		}
865
		if len(lit) >= 2 && lit[0] == '(' && lit[len(lit)-1] == ')' {
866
			inner := lit[1 : len(lit)-1]
867
			i := 0
868
			for i < len(inner) && (inner[i] == ' ' || inner[i] == '\t') {
869
				i++
870
			}
871
			j := len(inner)
872
			for j > i && (inner[j-1] == ' ' || inner[j-1] == '\t') {
873
				j--
874
			}
875
			amt.Expr = inner[i:j]
876
		}
877
		amt.Span = p.cur.Span
878
		p.advance()
879
		return amt
880
	}
881
	return p.parseAmount()
882
}
883
884
func (p *Parser) parsePosting() *ast.Posting {
885
	s := p.cur.Span
886
	posting := &ast.Posting{}
887
	p.expect(token.INDENT)
888
889
	// exit if it's empty line
890
	if p.got(token.NEWLINE) || p.got(token.EOF) {
891
		p.syncToNextline()
892
		return nil
893
	}
894
895
	// optional status, outside of brackets, '! (account)'
896
	posting.Status = p.parseStatus()
897
898
	// detect virtual posting brackets
899
	switch p.cur.Type {
900
	case token.LPAREN:
901
		posting.Type = ast.PostingVirtualUnbalanced
902
		p.advance()
903
	case token.LBRACKET:
904
		posting.Type = ast.PostingVirtualBalanced
905
		p.advance()
906
	}
907
908
	// optional status, inside of brackets, '(* account)'
909
	if p.got(token.STAR) || p.got(token.BANG) {
910
		posting.Status = p.parseStatus()
911
	}
912
913
	// validate, must be account text
914
	if p.cur.Type != token.TEXT {
915
		p.errorf("expected account name, got %s", p.cur.Type)
916
		p.syncToNextline()
917
		return nil
918
	}
919
920
	posting.Account = p.parseAccount()
921
922
	// consume closing bracket
923
	switch p.cur.Type {
924
	case token.RPAREN:
925
		p.advance()
926
	case token.RBRACKET:
927
		p.advance()
928
	}
929
930
	// optional amount - after two spaces
931
	if p.got(token.WHITESPACE) {
932
		p.skipWhitespace()
933
		if p.isAmountStart() || p.got(token.STAR) {
934
			posting.Amount = p.parseAmountWithOptExpr()
935
		}
936
	}
937
938
	// optional cost '@' or '@@'
939
	if p.got(token.WHITESPACE) {
940
		p.skipWhitespace()
941
	}
942
	if p.got(token.AT) || p.got(token.ATAT) {
943
		posting.Cost = p.parseCost()
944
	}
945
946
	// optional balance assertion
947
	if p.got(token.WHITESPACE) {
948
		p.skipWhitespace()
949
	}
950
	if p.got(token.EQ) || p.got(token.EQEQ) || p.got(token.EQEQEQ) {
951
		posting.Balance = p.parseBalanceAssertion()
952
		p.skipWhitespace()
953
		if p.got(token.AT) || p.got(token.ATAT) {
954
			p.parseCost()
955
		}
956
	}
957
958
	posting.Comment = p.parseOptInlineComment()
959
	p.expectNewline()
960
961
	// continuation comments
962
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
963
		p.advance()
964
		c := p.parseComment()
965
		posting.Comments = append(posting.Comments, *c)
966
	}
967
968
	posting.Span = p.span(s)
969
	return posting
970
}
971
972
func (p *Parser) parseCost() *ast.Cost {
973
	s := p.cur.Span
974
	isTotal := p.got(token.ATAT)
975
	p.advance() // consume '@' '@@'
976
	p.skipWhitespace()
977
	return &ast.Cost{
978
		IsTotal: isTotal,
979
		Amount:  *p.parseAmount(),
980
		Span:    p.span(s),
981
	}
982
}
983
984
func (p *Parser) parseBalanceAssertion() *ast.BalanceAssertion {
985
	s := p.cur.Span
986
987
	ba := &ast.BalanceAssertion{}
988
	switch p.cur.Type {
989
	case token.EQ: // basic assertion
990
	case token.EQEQ:
991
		ba.IsStrict = true
992
	case token.EQEQEQ:
993
		ba.IsStrict = true
994
		ba.IsInclusive = true
995
	}
996
	p.advance()
997
	p.skipWhitespace()
998
999
	ba.Amount = *p.parseAmount()
1000
	ba.Span = p.span(s)
1001
	return ba
1002
}
1003
1004
func (p *Parser) parseAccount() ast.Account {
1005
	s := p.cur.Span
1006
	var name strings.Builder
1007
1008
	switch p.cur.Type {
1009
	case token.TEXT:
1010
		_, _ = name.WriteString(p.cur.Literal)
1011
		p.advance()
1012
		if p.got(token.WHITESPACE) && p.willGet(token.TEXT) && p.peek.Literal[0] != '(' {
1013
			_, _ = name.WriteString(" ")
1014
			p.advance()
1015
			_, _ = name.WriteString(p.cur.Literal)
1016
			p.advance()
1017
		}
1018
	case token.COMMODITYMARK:
1019
		_, _ = name.WriteString(p.cur.Literal)
1020
		p.advance()
1021
		for p.got(token.TEXT) {
1022
			_, _ = name.WriteString(p.cur.Literal)
1023
			p.advance()
1024
		}
1025
	}
1026
	return ast.Account{Name: name.String(), Span: p.span(s)}
1027
}
1028
1029
func (p *Parser) parseDate() ast.Date {
1030
	s := p.cur.Span
1031
	tok, ok := p.expect(token.DATE)
1032
	if !ok {
1033
		return ast.Date{Span: p.span(s)}
1034
	}
1035
1036
	sep := byte(0)
1037
	lit := tok.Literal
1038
	for i := 0; i < len(lit); i++ {
1039
		if lit[i] == '/' || lit[i] == '-' || lit[i] == '.' {
1040
			sep = lit[i]
1041
			break
1042
		}
1043
	}
1044
	if sep == 0 {
1045
		p.errorf("invalid date format: %q", lit)
1046
		return ast.Date{Span: p.span(s)}
1047
	}
1048
1049
	parts := strings.Split(lit, string(sep))
1050
1051
	// M/D or MM/DD (year inferred)
1052
	if len(parts) == 2 {
1053
		month, err := strconv.Atoi(parts[0])
1054
		day, err2 := strconv.Atoi(parts[1])
1055
		if err != nil || err2 != nil {
1056
			p.errorf("invalid date literal: %q", lit)
1057
			return ast.Date{Span: p.span(s)}
1058
		}
1059
		if month < 1 || month > 12 {
1060
			p.errorf("invalid month %d in %q", month, lit)
1061
			return ast.Date{Span: p.span(s)}
1062
		}
1063
		if day < 1 || day > 31 {
1064
			p.errorf("invalid day %d in %q", day, lit)
1065
			return ast.Date{Span: p.span(s)}
1066
		}
1067
		return ast.Date{Month: month, Day: day, Sep: sep, Span: p.span(s)}
1068
	}
1069
1070
	if len(parts) != 3 {
1071
		p.errorf("invalid date format: %q", lit)
1072
		return ast.Date{Span: p.span(s)}
1073
	}
1074
1075
	year, err := strconv.Atoi(parts[0])
1076
	month, err2 := strconv.Atoi(parts[1])
1077
	day, err3 := strconv.Atoi(parts[2])
1078
	if err != nil || err2 != nil || err3 != nil {
1079
		p.errorf("invalid date literal: %q", lit)
1080
		return ast.Date{Span: p.span(s)}
1081
	}
1082
	if month < 1 || month > 12 {
1083
		p.errorf("invalid month %d in %q", month, lit)
1084
		return ast.Date{Span: p.span(s)}
1085
	}
1086
	if day < 1 || day > 31 {
1087
		p.errorf("invalid day %d in %q", day, lit)
1088
		return ast.Date{Span: p.span(s)}
1089
	}
1090
1091
	return ast.Date{
1092
		Year:  year,
1093
		Month: month,
1094
		Day:   day,
1095
		Sep:   sep,
1096
		Span:  p.span(s),
1097
	}
1098
}
1099
1100
func (p *Parser) parseOptInlineComment() *ast.Comment {
1101
	p.skipWhitespace() // todo:
1102
	if p.cur.Type != token.SEMICOLON && p.cur.Type != token.HASH {
1103
		return nil
1104
	}
1105
1106
	s := p.cur.Span
1107
	marker := p.cur.Literal[0]
1108
	p.advance() // consume marker
1109
	p.skipWhitespace()
1110
1111
	text := ""
1112
	if p.got(token.TEXT) {
1113
		text = p.cur.Literal
1114
		p.advance()
1115
	}
1116
1117
	return &ast.Comment{
1118
		Marker: marker,
1119
		Text:   text,
1120
		Span:   p.span(s),
1121
	}
1122
}
1123
1124
func (p *Parser) parseOptPeriodicDescription() string {
1125
	if p.cur.Type != token.WHITESPACE || len(p.cur.Literal) < 2 {
1126
		return ""
1127
	}
1128
1129
	p.skipWhitespace()
1130
1131
	if p.cur.Type != token.TEXT {
1132
		return ""
1133
	}
1134
1135
	return p.parseDescription()
1136
}
1137
1138
func (p *Parser) parseDescription() string {
1139
	var desc strings.Builder
1140
	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {
1141
		_, _ = desc.WriteString(p.cur.Literal)
1142
		p.advance()
1143
	}
1144
	return desc.String()
1145
}
1146
1147
func (p *Parser) parseDirectiveExpr() string {
1148
	var b strings.Builder
1149
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF && p.cur.Type != token.SEMICOLON {
1150
		_, _ = b.WriteString(p.cur.Literal)
1151
		p.advance()
1152
	}
1153
	return b.String()
1154
}
1155
1156
func (p *Parser) parseQuantityInto(amt *ast.Amount) {
1157
	if p.cur.Type != token.INT && p.cur.Type != token.DECIMAL && p.cur.Type != token.TEXT {
1158
		p.errorf("expected quantity, got %s", p.cur.Type)
1159
		return
1160
	}
1161
1162
	lit := p.cur.Literal
1163
	p.advance()
1164
1165
	// detect format metadata before normalizing
1166
	amt.QuantityFmt = detectFormat(lit)
1167
1168
	// normalize for decimal.NewFromString
1169
	// remove thousands separators, replace decimal mark with '.'
1170
	normalized := normalizeLiteral(lit, amt.QuantityFmt.Thousands, amt.QuantityFmt.Decimal)
1171
1172
	q, err := decimal.FromString(normalized)
1173
	if err != nil {
1174
		p.errorf("invalid quantity %q: %v", lit, err)
1175
		return
1176
	}
1177
1178
	if amt.IsNegative {
1179
		q = q.Neg()
1180
	}
1181
	amt.Quantity = q
1182
}
1183
1184
func (p *Parser) parseBlankLine() *ast.BlankLine {
1185
	s := p.cur.Span
1186
	p.expectNewline()
1187
	return &ast.BlankLine{Span: s}
1188
}
1189
1190
func (p *Parser) expectNewline() {
1191
	if p.got(token.NEWLINE) || p.got(token.EOF) {
1192
		if p.got(token.NEWLINE) {
1193
			p.advance()
1194
		}
1195
		return
1196
	}
1197
	p.errorf("expected %s, got %s", token.NEWLINE, p.cur.Type)
1198
}
1199
1200
func (p *Parser) advance() token.Token {
1201
	prev := p.cur
1202
	p.cur = p.peek
1203
	p.peek = p.lexer.Next()
1204
	return prev
1205
}
1206
1207
func (p *Parser) got(kind token.Type) bool     { return p.cur.Type == kind }
1208
func (p *Parser) willGet(kind token.Type) bool { return p.peek.Type == kind }
1209
1210
func (p *Parser) expect(kind token.Type) (token.Token, bool) {
1211
	if p.got(kind) {
1212
		return p.advance(), true
1213
	}
1214
	p.errorf("expected %s, got %s", kind, p.cur.Type)
1215
	return p.cur, false
1216
}
1217
1218
func (p *Parser) errorf(format string, args ...any) {
1219
	p.errors = append(p.errors, &ast.ParseError{
1220
		Span:    p.cur.Span,
1221
		Message: fmt.Sprintf(format, args...),
1222
	})
1223
}
1224
1225
func (p *Parser) sync() {
1226
	for {
1227
		switch p.cur.Type {
1228
		case token.EOF:
1229
			return
1230
		case token.NEWLINE:
1231
			p.advance()
1232
			switch p.cur.Type {
1233
			case token.DATE, token.ACCOUNT, token.COMMODITY,
1234
				token.INCLUDE, token.ALIAS, token.PAYEE,
1235
				token.TAG, token.YEAR, token.D, token.P,
1236
				token.APPLY, token.END, token.COMMENTKW,
1237
				token.DECIMALMARK, token.TILDE, token.N, token.EQ:
1238
				return
1239
			}
1240
		default:
1241
			p.advance()
1242
		}
1243
	}
1244
}
1245
1246
func (p *Parser) syncToNextline() {
1247
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF {
1248
		p.advance()
1249
	}
1250
	if p.got(token.NEWLINE) {
1251
		p.advance()
1252
	}
1253
}
1254
1255
func (p *Parser) skipWhitespace() {
1256
	for p.got(token.WHITESPACE) {
1257
		p.advance()
1258
	}
1259
}
1260
1261
func (p *Parser) span(s token.Span) token.Span {
1262
	return token.Span{Start: s.Start, End: p.cur.Span.Start}
1263
}
1264
1265
func normalizeLiteral(lit string, thousands, decimal byte) string {
1266
	var b strings.Builder
1267
	for _, ch := range []byte(lit) {
1268
		if thousands != 0 && ch == thousands {
1269
			continue // skip thousands separator
1270
		}
1271
		if ch == decimal {
1272
			b.WriteByte('.')
1273
		} else {
1274
			b.WriteByte(ch)
1275
		}
1276
	}
1277
	return b.String()
1278
}
1279
1280
func detectFormat(lit string) ast.QuantityFormat {
1281
	var separators []int
1282
	for i, ch := range []byte(lit) {
1283
		if ch == '.' || ch == ',' || ch == ' ' || ch == '_' || ch == '\'' {
1284
			separators = append(separators, i)
1285
		}
1286
	}
1287
1288
	if len(separators) == 0 {
1289
		return ast.QuantityFormat{Decimal: '.', Thousands: 0, Precision: 0}
1290
	}
1291
1292
	var decimal byte
1293
	thousands := byte(0)
1294
	precision := 0
1295
1296
	if len(separators) == 1 {
1297
		pos := separators[0]
1298
		sepChar := lit[pos]
1299
		if sepChar == ' ' || sepChar == '_' || sepChar == '\'' {
1300
			thousands = sepChar
1301
			decimal = '.' // default
1302
			precision = 0
1303
		} else {
1304
			decimal = sepChar
1305
			precision = len(lit) - pos - 1
1306
		}
1307
	} else {
1308
		last := separators[len(separators)-1]
1309
		decimal = lit[last]
1310
		thousands = lit[separators[0]]
1311
		precision = len(lit) - last - 1
1312
	}
1313
1314
	return ast.QuantityFormat{
1315
		Decimal:   decimal,
1316
		Thousands: thousands,
1317
		Precision: precision,
1318
	}
1319
}
1320
1321
func parseSimpleDate(s string) ast.Date {
1322
	if len(s) < 8 {
1323
		return ast.Date{}
1324
	}
1325
	sep := byte('-')
1326
	if strings.Contains(s, "/") {
1327
		sep = byte('/')
1328
	} else if strings.Contains(s, ".") {
1329
		sep = byte('.')
1330
	}
1331
	parts := strings.Split(s, string(sep))
1332
	if len(parts) != 3 {
1333
		return ast.Date{}
1334
	}
1335
	year, _ := strconv.Atoi(parts[0])
1336
	month, _ := strconv.Atoi(parts[1])
1337
	day, _ := strconv.Atoi(parts[2])
1338
	return ast.Date{Year: year, Month: month, Day: day, Sep: sep}
1339
}