all repos

clerk @ cb14fd1

missing tooling for ledger/hledger

clerk/journal/parser/parser.go (view raw)

Oleksandr Smirnov Oleksandr Smirnov
olexsmir@gmail.com
parser: add more spec compliance..., 14 days ago
1
package parser
2
3
import (
4
	"fmt"
5
	"strconv"
6
	"strings"
7
8
	"olexsmir.xyz/clerk/internal/decimal"
9
	"olexsmir.xyz/clerk/journal/ast"
10
	"olexsmir.xyz/clerk/journal/lexer"
11
	"olexsmir.xyz/clerk/journal/token"
12
)
13
14
type Parser struct {
15
	lexer  *lexer.Lexer
16
	errors []*ast.ParseError
17
	cur    token.Token
18
	peek   token.Token
19
}
20
21
func New(lex *lexer.Lexer) *Parser {
22
	p := &Parser{lexer: lex}
23
	p.advance() // populate .peek
24
	p.advance() // populate .cur
25
	return p
26
}
27
28
func (p *Parser) ParseJournal() *ast.Journal {
29
	f := &ast.Journal{}
30
	for p.cur.Type != token.EOF {
31
		if e := p.parseEntry(); e != nil {
32
			f.Entries = append(f.Entries, e)
33
		}
34
	}
35
	f.Errors = p.errors
36
	return f
37
}
38
39
func isDirectiveKeyword(t token.Type) bool {
40
	switch t {
41
	case token.COMMENTKW, token.ACCOUNT, token.COMMODITY, token.INCLUDE,
42
		token.ALIAS, token.PAYEE, token.TAG, token.APPLY, token.END,
43
		token.YEAR, token.DECIMALMARK, token.D, token.P, token.N:
44
		return true
45
	}
46
	return false
47
}
48
49
func (p *Parser) parseEntry() ast.Entry {
50
	if p.got(token.BANG) || p.got(token.AT) {
51
		if isDirectiveKeyword(p.peek.Type) {
52
			p.advance() // consume prefix
53
		}
54
	}
55
	switch p.cur.Type {
56
	case token.ILLEGAL:
57
		p.errorf("illegal character %q", p.cur.Literal)
58
		p.advance()
59
		return nil
60
	case token.INDENT:
61
		p.errorf("unexpected indent")
62
		p.syncToNextline()
63
		return nil
64
	case token.DATE:
65
		return p.parseTransaction()
66
	case token.TILDE:
67
		return p.parsePeriodicTransaction()
68
	case token.EQ:
69
		return p.parseAutomatedTransaction()
70
	case token.NEWLINE:
71
		return p.parseBlankLine()
72
	case token.SEMICOLON, token.HASH, token.PERCENT, token.STAR:
73
		return p.parseComment()
74
	case token.ACCOUNT:
75
		return p.parseAccountDirective()
76
	case token.COMMODITY:
77
		return p.parseCommodityDirective()
78
	case token.INCLUDE:
79
		return p.parseIncludeDirective()
80
	case token.ALIAS:
81
		return p.parseAliasDirective()
82
	case token.PAYEE:
83
		return p.parsePayeeDirective()
84
	case token.TAG:
85
		return p.parseTagDirective()
86
	case token.YEAR:
87
		return p.parseYearDirective()
88
	case token.DECIMALMARK:
89
		return p.parseDecimalMarkDirective()
90
	case token.D:
91
		return p.parseDefaultCommodityDirective()
92
	case token.P:
93
		return p.parseMarketPriceDirective()
94
	case token.N:
95
		return p.parseIgnoredDirective()
96
	case token.APPLY:
97
		return p.parseApplyDirective()
98
	case token.END:
99
		return p.parseEndDirective()
100
	case token.COMMENTKW:
101
		return p.parseCommentBlockDirective()
102
	default:
103
		p.errorf("unexpected token %s", p.cur.Type)
104
		p.sync()
105
		return nil
106
	}
107
}
108
109
func (p *Parser) parseTransaction() *ast.Transaction {
110
	s := p.cur.Span
111
	tx := &ast.Transaction{}
112
113
	tx.Date = p.parseDate()
114
115
	// optional secondary date
116
	if p.got(token.EQ) {
117
		p.advance()
118
		d := p.parseDate()
119
		tx.SecondDate = &d
120
	}
121
122
	p.skipWhitespace()
123
124
	// optional status
125
	tx.Status = p.parseStatus()
126
127
	// optional code
128
	if p.got(token.LPAREN) {
129
		p.advance()
130
		var code strings.Builder
131
		for p.cur.Type != token.RPAREN {
132
			_, _ = code.WriteString(p.cur.Literal)
133
			p.advance()
134
		}
135
		tx.Code = new(code.String())
136
		p.skipWhitespace()
137
	}
138
139
	// optional payee | note
140
	if p.got(token.TEXT) || p.got(token.STRING) {
141
		tx.Payee = p.parsePayee()
142
143
		// check for | separator
144
		if p.got(token.WHITESPACE) {
145
			p.skipWhitespace()
146
		}
147
148
		if p.got(token.PIPE) {
149
			p.advance()
150
			p.skipWhitespace()
151
152
			var note strings.Builder
153
			for p.got(token.TEXT) || p.got(token.WHITESPACE) {
154
				_, _ = note.WriteString(p.cur.Literal)
155
				p.advance()
156
			}
157
			tx.Note = new(note.String())
158
		}
159
	}
160
161
	tx.Comment = p.parseOptInlineComment()
162
	p.expectNewline()
163
164
	// header comments — indented ; lines before first posting
165
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
166
		p.advance() // consume indent
167
		c := p.parseComment()
168
		tx.HeaderComments = append(tx.HeaderComments, *c)
169
	}
170
171
	// postings
172
	for p.got(token.INDENT) {
173
		if p := p.parsePosting(); p != nil {
174
			tx.Postings = append(tx.Postings, p)
175
		}
176
	}
177
178
	tx.Span = p.span(s)
179
	return tx
180
}
181
182
func unquote(s string) string {
183
	if len(s) >= 2 && ((s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'')) {
184
		return s[1 : len(s)-1]
185
	}
186
	return s
187
}
188
189
func (p *Parser) parsePayee() *ast.Payee {
190
	s := p.cur.Span
191
192
	if p.got(token.STRING) {
193
		name := unquote(p.cur.Literal)
194
		p.advance()
195
		return &ast.Payee{Name: name, Span: p.span(s)}
196
	}
197
198
	// keep spaces/tags between text tokens; stop before trailing whitespace
199
	var name strings.Builder
200
	for p.got(token.TEXT) || p.got(token.INT) || p.got(token.DECIMAL) || (p.got(token.WHITESPACE) && (p.willGet(token.TEXT) || p.willGet(token.INT) || p.willGet(token.DECIMAL))) {
201
		_, _ = name.WriteString(p.cur.Literal)
202
		p.advance()
203
	}
204
	return &ast.Payee{Name: unquote(name.String()), Span: p.span(s)}
205
}
206
207
func (p *Parser) parsePeriodicTransaction() *ast.PeriodicTransaction {
208
	s := p.cur.Span
209
	p.expect(token.TILDE)
210
	p.skipWhitespace()
211
212
	pt := &ast.PeriodicTransaction{}
213
214
	pt.Span = p.span(s)
215
	pt.Period = p.parsePeriod()
216
217
	if desc := p.parseOptPeriodicDescription(); desc != "" {
218
		pt.Description = &desc
219
	}
220
221
	comment := p.parseOptInlineComment()
222
	p.expectNewline()
223
224
	var headerComments []*ast.Comment
225
	var postings []*ast.Posting
226
	for p.got(token.INDENT) || p.got(token.SEMICOLON) {
227
		if p.got(token.SEMICOLON) {
228
			c := p.parseComment()
229
			headerComments = append(headerComments, c)
230
			continue
231
		}
232
		posting := p.parsePosting()
233
		if posting != nil {
234
			postings = append(postings, posting)
235
		}
236
	}
237
238
	pt.HeaderComments = headerComments
239
	pt.Postings = postings
240
	pt.Comment = comment
241
	return pt
242
}
243
244
func (p *Parser) parseAutomatedTransaction() *ast.AutomatedTransaction {
245
	s := p.cur.Span
246
	p.expect(token.EQ)
247
	p.skipWhitespace()
248
249
	at := &ast.AutomatedTransaction{}
250
	at.Span = p.span(s)
251
252
	at.Expr = p.parseDirectiveExpr()
253
	at.Comment = p.parseOptInlineComment()
254
	p.expectNewline()
255
256
	// header comments
257
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
258
		p.advance()
259
		at.HeaderComments = append(at.HeaderComments, p.parseComment())
260
	}
261
262
	// postings
263
	for p.got(token.INDENT) {
264
		if p := p.parsePosting(); p != nil {
265
			at.Postings = append(at.Postings, p)
266
		}
267
	}
268
269
	return at
270
}
271
272
func (p *Parser) parsePeriod() ast.Period {
273
	s := p.cur.Span
274
275
	var periodBuf strings.Builder
276
277
	for !p.got(token.NEWLINE) && !p.got(token.EOF) &&
278
		!p.got(token.SEMICOLON) && !p.got(token.HASH) && !p.got(token.PERCENT) && !p.got(token.STAR) {
279
280
		if p.got(token.WHITESPACE) {
281
			if len(p.cur.Literal) >= 2 {
282
				break
283
			}
284
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) ||
285
				p.willGet(token.SEMICOLON) || p.willGet(token.HASH) ||
286
				p.willGet(token.PERCENT) || p.willGet(token.STAR) {
287
				p.advance()
288
				continue
289
			}
290
		}
291
292
		periodBuf.WriteString(p.cur.Literal)
293
		p.advance()
294
	}
295
296
	str := periodBuf.String()
297
	period := ast.Period{Raw: str, Span: p.span(s)}
298
299
	if _, after, ok := strings.Cut(str, " from "); ok {
300
		end := strings.Index(after, " ")
301
		dateStr := after
302
		if end >= 0 {
303
			dateStr = after[:end]
304
		}
305
		if d := parseSimpleDate(dateStr); d.Year > 0 {
306
			period.From = &d
307
			rest := after
308
			if end >= 0 {
309
				rest = after[end:]
310
			}
311
			if _, toAfter, ok := strings.Cut(rest, " to "); ok {
312
				if toEnd := strings.Index(toAfter, " "); toEnd >= 0 {
313
					toAfter = toAfter[:toEnd]
314
				}
315
				if d := parseSimpleDate(toAfter); d.Year > 0 {
316
					period.To = &d
317
				}
318
			}
319
		}
320
	}
321
	return period
322
}
323
324
func (p *Parser) parseComment() *ast.Comment {
325
	s := p.cur.Span
326
	marker := p.cur.Literal[0]
327
	p.advance()
328
	p.skipWhitespace()
329
330
	var text string
331
	if p.got(token.TEXT) {
332
		text = p.cur.Literal
333
		p.advance()
334
	}
335
336
	p.expectNewline()
337
338
	return &ast.Comment{
339
		Marker: marker,
340
		Text:   text,
341
		Span:   p.span(s),
342
	}
343
}
344
345
func (p *Parser) parseAccountDirective() *ast.AccountDirective {
346
	s := p.cur.Span
347
	p.expect(token.ACCOUNT)
348
	p.skipWhitespace()
349
350
	account := p.parseAccount()
351
	comment := p.parseOptInlineComment()
352
	p.expectNewline()
353
354
	for p.got(token.INDENT) {
355
		p.advance()
356
		for !p.got(token.NEWLINE) && !p.got(token.EOF) {
357
			p.advance()
358
		}
359
		p.expectNewline()
360
	}
361
362
	return &ast.AccountDirective{
363
		Account: account,
364
		Comment: comment,
365
		Span:    p.span(s),
366
	}
367
}
368
369
func (p *Parser) parseCommodityDirective() *ast.CommodityDirective {
370
	s := p.cur.Span
371
	p.expect(token.COMMODITY)
372
	p.skipWhitespace()
373
374
	var commodity string
375
	var format *ast.Amount
376
377
	switch p.cur.Type {
378
	case token.TEXT, token.INT, token.DECIMAL:
379
		format = p.parseAmount()
380
		commodity = format.Commodity
381
	case token.COMMODITYMARK:
382
		commodity = p.cur.Literal
383
		p.advance()
384
		hadSpace := p.got(token.WHITESPACE)
385
		p.skipWhitespace()
386
		if p.got(token.INT) || p.got(token.DECIMAL) || p.got(token.TEXT) {
387
			format = p.parseAmount()
388
			format.Commodity = commodity
389
			format.CommodityPos = ast.CommodityBefore
390
			format.HasSpace = hadSpace
391
		}
392
	default:
393
		p.errorf("expected commodity name or amount, got %s", p.cur.Type)
394
	}
395
396
	if commodity == "" {
397
		p.errorf("expected commodity name, got %s", p.cur.Type)
398
	}
399
400
	comment := p.parseOptInlineComment()
401
	p.expectNewline()
402
403
	for p.got(token.INDENT) {
404
		p.advance()
405
		p.skipWhitespace()
406
		if p.got(token.COMMODITYMARK) && p.cur.Literal == "format" {
407
			p.advance()
408
			p.skipWhitespace()
409
			format = p.parseAmount()
410
		} else {
411
			for !p.got(token.NEWLINE) && !p.got(token.EOF) {
412
				p.advance()
413
			}
414
		}
415
		p.expectNewline()
416
	}
417
418
	cd := &ast.CommodityDirective{
419
		Commodity: commodity,
420
		Comment:   comment,
421
		Span:      p.span(s),
422
	}
423
	if format != nil {
424
		cd.Format = *format
425
	}
426
	return cd
427
}
428
429
func (p *Parser) parseIncludeDirective() *ast.IncludeDirective {
430
	s := p.cur.Span
431
	p.expect(token.INCLUDE)
432
	p.skipWhitespace()
433
434
	path := ""
435
	if p.got(token.TEXT) {
436
		path = p.cur.Literal
437
		p.advance()
438
	} else {
439
		p.errorf("expected file path, got %s", p.cur.Type)
440
	}
441
442
	comment := p.parseOptInlineComment()
443
	p.expectNewline()
444
445
	return &ast.IncludeDirective{
446
		Path:    path,
447
		Comment: comment,
448
		Span:    p.span(s),
449
	}
450
}
451
452
func (p *Parser) parseAliasDirective() *ast.AliasDirective {
453
	s := p.cur.Span
454
	alias := &ast.AliasDirective{}
455
	p.expect(token.ALIAS)
456
	p.skipWhitespace()
457
	alias.From = p.parseAccount().Name
458
	p.skipWhitespace()
459
	p.expect(token.EQ)
460
	p.skipWhitespace()
461
	alias.To = p.parseAccount().Name
462
	p.skipWhitespace()
463
	alias.Comment = p.parseOptInlineComment()
464
	p.expectNewline()
465
	alias.Span = p.span(s)
466
	return alias
467
}
468
469
func (p *Parser) parsePayeeDirective() *ast.PayeeDirective {
470
	s := p.cur.Span
471
	p.expect(token.PAYEE)
472
	p.skipWhitespace()
473
474
	name := ""
475
	if p.got(token.TEXT) || p.got(token.STRING) {
476
		name = p.parsePayee().Name
477
	}
478
479
	comment := p.parseOptInlineComment()
480
	p.expectNewline()
481
482
	return &ast.PayeeDirective{
483
		Name:    name,
484
		Comment: comment,
485
		Span:    p.span(s),
486
	}
487
}
488
489
func (p *Parser) parseTagDirective() *ast.TagDirective {
490
	s := p.cur.Span
491
	p.expect(token.TAG)
492
	p.skipWhitespace()
493
494
	name := ""
495
	if p.got(token.TEXT) {
496
		name = p.cur.Literal
497
		p.advance()
498
	}
499
500
	comment := p.parseOptInlineComment()
501
	p.expectNewline()
502
503
	return &ast.TagDirective{
504
		Name:    name,
505
		Comment: comment,
506
		Span:    p.span(s),
507
	}
508
}
509
510
func (p *Parser) parseYearDirective() *ast.YearDirective {
511
	s := p.cur.Span
512
	year := &ast.YearDirective{}
513
	p.expect(token.YEAR)
514
	p.skipWhitespace()
515
516
	if p.got(token.INT) {
517
		year.Year, _ = strconv.Atoi(p.cur.Literal)
518
		p.advance()
519
	} else {
520
		p.errorf("expected year, got %s", p.cur.Type)
521
	}
522
523
	p.skipWhitespace()
524
	year.Comment = p.parseOptInlineComment()
525
	p.expectNewline()
526
	year.Span = p.span(s)
527
	return year
528
}
529
530
func (p *Parser) parseDecimalMarkDirective() *ast.DecimalMarkDirective {
531
	s := p.cur.Span
532
	mark := &ast.DecimalMarkDirective{}
533
	p.expect(token.DECIMALMARK)
534
	p.skipWhitespace()
535
536
	mark.Mark = byte('.')
537
	if p.got(token.TEXT) {
538
		if len(p.cur.Literal) > 0 {
539
			mark.Mark = p.cur.Literal[0]
540
		}
541
		p.advance()
542
	}
543
544
	p.skipWhitespace()
545
	mark.Comment = p.parseOptInlineComment()
546
	p.expectNewline()
547
	mark.Span = p.span(s)
548
	return mark
549
}
550
551
func (p *Parser) parseDefaultCommodityDirective() *ast.DefaultCommodityDirective {
552
	s := p.cur.Span
553
	com := &ast.DefaultCommodityDirective{}
554
	p.expect(token.D)
555
	p.skipWhitespace()
556
	com.Amount = *p.parseAmount()
557
	p.skipWhitespace()
558
	com.Comment = p.parseOptInlineComment()
559
	p.expectNewline()
560
	com.Span = p.span(s)
561
	return com
562
}
563
564
func (p *Parser) parseIgnoredDirective() *ast.IgnoredDirective {
565
	s := p.cur.Span
566
	p.expect(token.N)
567
	p.skipWhitespace()
568
	if p.got(token.TEXT) || p.got(token.COMMODITYMARK) {
569
		p.advance()
570
	}
571
	p.skipWhitespace()
572
	comment := p.parseOptInlineComment()
573
	p.expectNewline()
574
	return &ast.IgnoredDirective{
575
		Comment: comment,
576
		Span:    p.span(s),
577
	}
578
}
579
580
func (p *Parser) parseMarketPriceDirective() *ast.MarketPriceDirective {
581
	s := p.cur.Span
582
	p.expect(token.P)
583
	p.skipWhitespace()
584
585
	mp := &ast.MarketPriceDirective{}
586
	mp.DateTime.Date = p.parseDate()
587
	p.skipWhitespace()
588
589
	if p.got(token.TIME) {
590
		mp.DateTime.Time = new(p.parseTime())
591
		p.skipWhitespace()
592
	}
593
594
	tok, _ := p.expect(token.COMMODITYMARK)
595
	mp.Commodity = tok.Literal
596
	p.advance()
597
	p.skipWhitespace()
598
599
	mp.Amount = *p.parseAmount()
600
601
	p.skipWhitespace()
602
	mp.Comment = p.parseOptInlineComment()
603
604
	p.expectNewline()
605
	mp.Span = p.span(s)
606
	return mp
607
}
608
609
func (p *Parser) parseTime() ast.Time {
610
	s := p.cur.Span
611
	tok, _ := p.expect(token.TIME)
612
	lit := tok.Literal
613
614
	parts := strings.Split(lit, ":")
615
	if len(parts) < 2 {
616
		p.errorf("invalid time format: %q", lit)
617
		return ast.Time{Span: p.span(s)}
618
	}
619
620
	hour, _ := strconv.Atoi(parts[0])
621
	minute, _ := strconv.Atoi(parts[1])
622
	second := 0
623
	if len(parts) > 2 {
624
		second, _ = strconv.Atoi(parts[2])
625
	}
626
627
	if hour < 0 || hour > 23 {
628
		p.errorf("invalid hour %d in time %q", hour, lit)
629
	}
630
	if minute < 0 || minute > 59 {
631
		p.errorf("invalid minute %d in time %q", minute, lit)
632
	}
633
	if second < 0 || second > 59 {
634
		p.errorf("invalid second %d in time %q", second, lit)
635
	}
636
637
	return ast.Time{
638
		Hour:   hour,
639
		Minute: minute,
640
		Second: second,
641
		Span:   p.span(s),
642
	}
643
}
644
645
func (p *Parser) parseApplyDirective() *ast.ApplyDirective {
646
	s := p.cur.Span
647
	p.expect(token.APPLY)
648
	p.skipWhitespace()
649
650
	expr := p.parseDirectiveExpr()
651
	comment := p.parseOptInlineComment()
652
	p.expectNewline()
653
654
	return &ast.ApplyDirective{
655
		Expr:    expr,
656
		Comment: comment,
657
		Span:    p.span(s),
658
	}
659
}
660
661
func (p *Parser) parseEndDirective() *ast.EndDirective {
662
	s := p.cur.Span
663
	p.expect(token.END)
664
	p.skipWhitespace()
665
666
	expr := p.parseDirectiveExpr()
667
	comment := p.parseOptInlineComment()
668
	p.expectNewline()
669
670
	return &ast.EndDirective{
671
		Expr:    expr,
672
		Comment: comment,
673
		Span:    p.span(s),
674
	}
675
}
676
677
func (p *Parser) parseCommentBlockDirective() *ast.CommentBlockDirective {
678
	start := p.cur.Span
679
	p.expect(token.COMMENTKW)
680
	p.skipWhitespace()
681
682
	header := p.parseDirectiveExpr()
683
	comment := p.parseOptInlineComment()
684
	p.expectNewline()
685
686
	var content strings.Builder
687
	for p.cur.Type != token.EOF {
688
		if p.got(token.END) {
689
			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) {
690
				p.advance()
691
				p.expectNewline()
692
				break
693
			}
694
			if p.willGet(token.WHITESPACE) {
695
				endTok := p.cur
696
				p.advance()
697
				wsTok := p.cur
698
				p.advance()
699
				if p.got(token.TEXT) && p.cur.Literal == "comment" { // todo: this should check if it's an actual COMMENTKW token
700
					p.advance()
701
					p.parseDirectiveExpr()
702
					p.parseOptInlineComment()
703
					p.expectNewline()
704
					break
705
				}
706
				content.WriteString(endTok.Literal)
707
				content.WriteString(wsTok.Literal)
708
				continue
709
			}
710
		}
711
		content.WriteString(p.cur.Literal)
712
		p.advance()
713
	}
714
715
	return &ast.CommentBlockDirective{
716
		Header:  header,
717
		Content: content.String(),
718
		Comment: comment,
719
		Span:    p.span(start),
720
	}
721
}
722
723
func (p *Parser) parseStatus() *ast.Status {
724
	if p.got(token.STAR) || p.got(token.BANG) {
725
		status := ast.StatusPending
726
		if p.cur.Literal[0] == '*' {
727
			status = ast.StatusCleared
728
		}
729
		st := &ast.Status{Value: status, Span: p.cur.Span}
730
		p.advance()
731
		p.skipWhitespace()
732
		return st
733
	}
734
	return nil
735
}
736
737
func (p *Parser) isAmountStart() bool {
738
	switch p.cur.Type {
739
	default:
740
		return false
741
	case token.COMMODITYMARK, token.INT, token.DECIMAL, token.MINUS, token.PLUS, token.PARENEXPR:
742
		return true
743
	case token.TEXT:
744
		return len(p.cur.Literal) > 0 && p.cur.Literal[0] >= '0' && p.cur.Literal[0] <= '9'
745
	}
746
}
747
748
func (p *Parser) parseAmount() *ast.Amount {
749
	s := p.cur.Span
750
	amt := &ast.Amount{
751
		QuantityFmt: ast.QuantityFormat{Decimal: '.'},
752
		Span:        p.span(s),
753
	}
754
755
	// commodity before quantity: $10.00
756
	if p.got(token.COMMODITYMARK) {
757
		amt.Commodity = p.cur.Literal
758
		amt.CommodityPos = ast.CommodityBefore
759
		p.advance()
760
		if p.got(token.WHITESPACE) {
761
			amt.HasSpace = true
762
			p.skipWhitespace()
763
		}
764
		switch p.cur.Type {
765
		case token.MINUS:
766
			amt.IsNegative = true
767
			p.advance()
768
		case token.PLUS:
769
			p.advance()
770
		}
771
		p.parseQuantityInto(amt)
772
	} else {
773
		// optional sign
774
		switch p.cur.Type {
775
		case token.MINUS:
776
			amt.IsNegative = true
777
			p.advance()
778
		case token.PLUS:
779
			p.advance()
780
		}
781
782
		// commodity before quantity: -$120:
783
		if p.got(token.COMMODITYMARK) {
784
			amt.Commodity = p.cur.Literal
785
			amt.CommodityPos = ast.CommodityBefore
786
			p.advance()
787
			if p.got(token.WHITESPACE) {
788
				amt.HasSpace = true
789
				p.skipWhitespace()
790
			}
791
		}
792
793
		p.parseQuantityInto(amt)
794
795
		// commodity after quantity: 10.00 UAH (only if not set)
796
		if amt.Commodity == "" {
797
			switch p.cur.Type {
798
			case token.WHITESPACE:
799
				p.skipWhitespace()
800
				if p.got(token.COMMODITYMARK) {
801
					amt.HasSpace = true
802
					amt.Commodity = p.cur.Literal
803
					amt.CommodityPos = ast.CommodityAfter
804
					p.advance()
805
				}
806
			case token.COMMODITYMARK:
807
				amt.Commodity = p.cur.Literal
808
				amt.CommodityPos = ast.CommodityAfter
809
				p.advance()
810
			}
811
		}
812
	}
813
814
	return amt
815
}
816
817
func (p *Parser) parseAmountWithOptExpr() *ast.Amount {
818
	if p.got(token.STAR) {
819
		p.advance()
820
		p.skipWhitespace()
821
		amt := p.parseAmount()
822
		if amt != nil {
823
			amt.IsExpr = true
824
		}
825
		return amt
826
	}
827
	if p.got(token.PARENEXPR) {
828
		lit := p.cur.Literal
829
		amt := &ast.Amount{
830
			IsExpr:      true,
831
			QuantityFmt: ast.QuantityFormat{Decimal: '.'},
832
		}
833
		if len(lit) >= 2 && lit[0] == '(' && lit[len(lit)-1] == ')' {
834
			inner := lit[1 : len(lit)-1]
835
			i := 0
836
			for i < len(inner) && (inner[i] == ' ' || inner[i] == '\t') {
837
				i++
838
			}
839
			j := len(inner)
840
			for j > i && (inner[j-1] == ' ' || inner[j-1] == '\t') {
841
				j--
842
			}
843
			amt.Expr = inner[i:j]
844
		}
845
		amt.Span = p.cur.Span
846
		p.advance()
847
		return amt
848
	}
849
	return p.parseAmount()
850
}
851
852
func (p *Parser) parsePosting() *ast.Posting {
853
	s := p.cur.Span
854
	posting := &ast.Posting{}
855
	p.expect(token.INDENT)
856
857
	// exit if it's empty line
858
	if p.got(token.NEWLINE) || p.got(token.EOF) {
859
		p.syncToNextline()
860
		return nil
861
	}
862
863
	// optional status, outside of brackets, '! (account)'
864
	posting.Status = p.parseStatus()
865
866
	// detect virtual posting brackets
867
	switch p.cur.Type {
868
	case token.LPAREN:
869
		posting.Type = ast.PostingVirtualUnbalanced
870
		p.advance()
871
	case token.LBRACKET:
872
		posting.Type = ast.PostingVirtualBalanced
873
		p.advance()
874
	}
875
876
	// optional status, inside of brackets, '(* account)'
877
	if p.got(token.STAR) || p.got(token.BANG) {
878
		posting.Status = p.parseStatus()
879
	}
880
881
	// validate, must be account text
882
	if p.cur.Type != token.TEXT {
883
		p.errorf("expected account name, got %s", p.cur.Type)
884
		p.syncToNextline()
885
		return nil
886
	}
887
888
	posting.Account = p.parseAccount()
889
890
	// consume closing bracket
891
	switch p.cur.Type {
892
	case token.RPAREN:
893
		p.advance()
894
	case token.RBRACKET:
895
		p.advance()
896
	}
897
898
	// optional amount - after two spaces
899
	if p.got(token.WHITESPACE) {
900
		p.skipWhitespace()
901
		if p.isAmountStart() || p.got(token.STAR) {
902
			posting.Amount = p.parseAmountWithOptExpr()
903
		}
904
	}
905
906
	// optional cost '@' or '@@'
907
	if p.got(token.WHITESPACE) {
908
		p.skipWhitespace()
909
	}
910
	if p.got(token.AT) || p.got(token.ATAT) {
911
		posting.Cost = p.parseCost()
912
	}
913
914
	// optional balance assertion
915
	if p.got(token.WHITESPACE) {
916
		p.skipWhitespace()
917
	}
918
	if p.got(token.EQ) || p.got(token.EQEQ) || p.got(token.EQEQEQ) {
919
		posting.Balance = p.parseBalanceAssertion()
920
		p.skipWhitespace()
921
		if p.got(token.AT) || p.got(token.ATAT) {
922
			p.parseCost()
923
		}
924
	}
925
926
	posting.Comment = p.parseOptInlineComment()
927
	p.expectNewline()
928
929
	// continuation comments
930
	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {
931
		p.advance()
932
		c := p.parseComment()
933
		posting.Comments = append(posting.Comments, *c)
934
	}
935
936
	posting.Span = p.span(s)
937
	return posting
938
}
939
940
func (p *Parser) parseCost() *ast.Cost {
941
	s := p.cur.Span
942
	isTotal := p.got(token.ATAT)
943
	p.advance() // consume '@' '@@'
944
	p.skipWhitespace()
945
	return &ast.Cost{
946
		IsTotal: isTotal,
947
		Amount:  *p.parseAmount(),
948
		Span:    p.span(s),
949
	}
950
}
951
952
func (p *Parser) parseBalanceAssertion() *ast.BalanceAssertion {
953
	s := p.cur.Span
954
955
	ba := &ast.BalanceAssertion{}
956
	switch p.cur.Type {
957
	case token.EQ: // basic assertion
958
	case token.EQEQ:
959
		ba.IsStrict = true
960
	case token.EQEQEQ:
961
		ba.IsStrict = true
962
		ba.IsInclusive = true
963
	}
964
	p.advance()
965
	p.skipWhitespace()
966
967
	ba.Amount = *p.parseAmount()
968
	ba.Span = p.span(s)
969
	return ba
970
}
971
972
func (p *Parser) parseAccount() ast.Account {
973
	s := p.cur.Span
974
	var name strings.Builder
975
976
	switch p.cur.Type {
977
	case token.TEXT:
978
		_, _ = name.WriteString(p.cur.Literal)
979
		p.advance()
980
		if p.got(token.WHITESPACE) && p.willGet(token.TEXT) && p.peek.Literal[0] != '(' {
981
			_, _ = name.WriteString(" ")
982
			p.advance()
983
			_, _ = name.WriteString(p.cur.Literal)
984
			p.advance()
985
		}
986
	case token.COMMODITYMARK:
987
		_, _ = name.WriteString(p.cur.Literal)
988
		p.advance()
989
		for p.got(token.TEXT) {
990
			_, _ = name.WriteString(p.cur.Literal)
991
			p.advance()
992
		}
993
	}
994
	return ast.Account{Name: name.String(), Span: p.span(s)}
995
}
996
997
func (p *Parser) parseDate() ast.Date {
998
	s := p.cur.Span
999
	tok, ok := p.expect(token.DATE)
1000
	if !ok {
1001
		return ast.Date{Span: p.span(s)}
1002
	}
1003
1004
	sep := byte(0)
1005
	lit := tok.Literal
1006
	for i := 0; i < len(lit); i++ {
1007
		if lit[i] == '/' || lit[i] == '-' || lit[i] == '.' {
1008
			sep = lit[i]
1009
			break
1010
		}
1011
	}
1012
	if sep == 0 {
1013
		p.errorf("invalid date format: %q", lit)
1014
		return ast.Date{Span: p.span(s)}
1015
	}
1016
1017
	parts := strings.Split(lit, string(sep))
1018
1019
	// M/D or MM/DD (year inferred)
1020
	if len(parts) == 2 {
1021
		month, err := strconv.Atoi(parts[0])
1022
		day, err2 := strconv.Atoi(parts[1])
1023
		if err != nil || err2 != nil {
1024
			p.errorf("invalid date literal: %q", lit)
1025
			return ast.Date{Span: p.span(s)}
1026
		}
1027
		if month < 1 || month > 12 {
1028
			p.errorf("invalid month %d in %q", month, lit)
1029
			return ast.Date{Span: p.span(s)}
1030
		}
1031
		if day < 1 || day > 31 {
1032
			p.errorf("invalid day %d in %q", day, lit)
1033
			return ast.Date{Span: p.span(s)}
1034
		}
1035
		return ast.Date{Month: month, Day: day, Sep: sep, Span: p.span(s)}
1036
	}
1037
1038
	if len(parts) != 3 {
1039
		p.errorf("invalid date format: %q", lit)
1040
		return ast.Date{Span: p.span(s)}
1041
	}
1042
1043
	year, err := strconv.Atoi(parts[0])
1044
	month, err2 := strconv.Atoi(parts[1])
1045
	day, err3 := strconv.Atoi(parts[2])
1046
	if err != nil || err2 != nil || err3 != nil {
1047
		p.errorf("invalid date literal: %q", lit)
1048
		return ast.Date{Span: p.span(s)}
1049
	}
1050
	if month < 1 || month > 12 {
1051
		p.errorf("invalid month %d in %q", month, lit)
1052
		return ast.Date{Span: p.span(s)}
1053
	}
1054
	if day < 1 || day > 31 {
1055
		p.errorf("invalid day %d in %q", day, lit)
1056
		return ast.Date{Span: p.span(s)}
1057
	}
1058
1059
	return ast.Date{
1060
		Year:  year,
1061
		Month: month,
1062
		Day:   day,
1063
		Sep:   sep,
1064
		Span:  p.span(s),
1065
	}
1066
}
1067
1068
func (p *Parser) parseOptInlineComment() *ast.Comment {
1069
	p.skipWhitespace() // todo:
1070
	if p.cur.Type != token.SEMICOLON && p.cur.Type != token.HASH {
1071
		return nil
1072
	}
1073
1074
	s := p.cur.Span
1075
	marker := p.cur.Literal[0]
1076
	p.advance() // consume marker
1077
	p.skipWhitespace()
1078
1079
	text := ""
1080
	if p.got(token.TEXT) {
1081
		text = p.cur.Literal
1082
		p.advance()
1083
	}
1084
1085
	return &ast.Comment{
1086
		Marker: marker,
1087
		Text:   text,
1088
		Span:   p.span(s),
1089
	}
1090
}
1091
1092
func (p *Parser) parseOptPeriodicDescription() string {
1093
	if p.cur.Type != token.WHITESPACE || len(p.cur.Literal) < 2 {
1094
		return ""
1095
	}
1096
1097
	p.skipWhitespace()
1098
1099
	if p.cur.Type != token.TEXT {
1100
		return ""
1101
	}
1102
1103
	return p.parseDescription()
1104
}
1105
1106
func (p *Parser) parseDescription() string {
1107
	var desc strings.Builder
1108
	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {
1109
		_, _ = desc.WriteString(p.cur.Literal)
1110
		p.advance()
1111
	}
1112
	return desc.String()
1113
}
1114
1115
func (p *Parser) parseDirectiveExpr() string {
1116
	var b strings.Builder
1117
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF && p.cur.Type != token.SEMICOLON {
1118
		_, _ = b.WriteString(p.cur.Literal)
1119
		p.advance()
1120
	}
1121
	return b.String()
1122
}
1123
1124
func (p *Parser) parseQuantityInto(amt *ast.Amount) {
1125
	if p.cur.Type != token.INT && p.cur.Type != token.DECIMAL && p.cur.Type != token.TEXT {
1126
		p.errorf("expected quantity, got %s", p.cur.Type)
1127
		return
1128
	}
1129
1130
	lit := p.cur.Literal
1131
	p.advance()
1132
1133
	// detect format metadata before normalizing
1134
	amt.QuantityFmt = detectFormat(lit)
1135
1136
	// normalize for decimal.NewFromString
1137
	// remove thousands separators, replace decimal mark with '.'
1138
	normalized := normalizeLiteral(lit, amt.QuantityFmt.Thousands, amt.QuantityFmt.Decimal)
1139
1140
	q, err := decimal.FromString(normalized)
1141
	if err != nil {
1142
		p.errorf("invalid quantity %q: %v", lit, err)
1143
		return
1144
	}
1145
1146
	if amt.IsNegative {
1147
		q = q.Neg()
1148
	}
1149
	amt.Quantity = q
1150
}
1151
1152
func (p *Parser) parseBlankLine() *ast.BlankLine {
1153
	s := p.cur.Span
1154
	p.expectNewline()
1155
	return &ast.BlankLine{Span: s}
1156
}
1157
1158
func (p *Parser) expectNewline() {
1159
	if p.got(token.NEWLINE) || p.got(token.EOF) {
1160
		if p.got(token.NEWLINE) {
1161
			p.advance()
1162
		}
1163
		return
1164
	}
1165
	p.errorf("expected %s, got %s", token.NEWLINE, p.cur.Type)
1166
}
1167
1168
func (p *Parser) advance() token.Token {
1169
	prev := p.cur
1170
	p.cur = p.peek
1171
	p.peek = p.lexer.Next()
1172
	return prev
1173
}
1174
1175
func (p *Parser) got(kind token.Type) bool     { return p.cur.Type == kind }
1176
func (p *Parser) willGet(kind token.Type) bool { return p.peek.Type == kind }
1177
1178
func (p *Parser) expect(kind token.Type) (token.Token, bool) {
1179
	if p.got(kind) {
1180
		return p.advance(), true
1181
	}
1182
	p.errorf("expected %s, got %s", kind, p.cur.Type)
1183
	return p.cur, false
1184
}
1185
1186
func (p *Parser) errorf(format string, args ...any) {
1187
	p.errors = append(p.errors, &ast.ParseError{
1188
		Span:    p.cur.Span,
1189
		Message: fmt.Sprintf(format, args...),
1190
	})
1191
}
1192
1193
func (p *Parser) sync() {
1194
	for {
1195
		switch p.cur.Type {
1196
		case token.EOF:
1197
			return
1198
		case token.NEWLINE:
1199
			p.advance()
1200
			switch p.cur.Type {
1201
			case token.DATE, token.ACCOUNT, token.COMMODITY,
1202
				token.INCLUDE, token.ALIAS, token.PAYEE,
1203
				token.TAG, token.YEAR, token.D, token.P,
1204
				token.APPLY, token.END, token.COMMENTKW,
1205
				token.DECIMALMARK, token.TILDE, token.N, token.EQ:
1206
				return
1207
			}
1208
		default:
1209
			p.advance()
1210
		}
1211
	}
1212
}
1213
1214
func (p *Parser) syncToNextline() {
1215
	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF {
1216
		p.advance()
1217
	}
1218
	if p.got(token.NEWLINE) {
1219
		p.advance()
1220
	}
1221
}
1222
1223
func (p *Parser) skipWhitespace() {
1224
	for p.got(token.WHITESPACE) {
1225
		p.advance()
1226
	}
1227
}
1228
1229
func (p *Parser) span(s token.Span) token.Span {
1230
	return token.Span{Start: s.Start, End: p.cur.Span.Start}
1231
}
1232
1233
func normalizeLiteral(lit string, thousands, decimal byte) string {
1234
	var b strings.Builder
1235
	for _, ch := range []byte(lit) {
1236
		if thousands != 0 && ch == thousands {
1237
			continue // skip thousands separator
1238
		}
1239
		if ch == decimal {
1240
			b.WriteByte('.')
1241
		} else {
1242
			b.WriteByte(ch)
1243
		}
1244
	}
1245
	return b.String()
1246
}
1247
1248
func detectFormat(lit string) ast.QuantityFormat {
1249
	var separators []int
1250
	for i, ch := range []byte(lit) {
1251
		if ch == '.' || ch == ',' || ch == ' ' || ch == '_' || ch == '\'' {
1252
			separators = append(separators, i)
1253
		}
1254
	}
1255
1256
	if len(separators) == 0 {
1257
		return ast.QuantityFormat{Decimal: '.', Thousands: 0, Precision: 0}
1258
	}
1259
1260
	var decimal byte
1261
	thousands := byte(0)
1262
	precision := 0
1263
1264
	if len(separators) == 1 {
1265
		pos := separators[0]
1266
		sepChar := lit[pos]
1267
		if sepChar == ' ' || sepChar == '_' || sepChar == '\'' {
1268
			thousands = sepChar
1269
			decimal = '.' // default
1270
			precision = 0
1271
		} else {
1272
			decimal = sepChar
1273
			precision = len(lit) - pos - 1
1274
		}
1275
	} else {
1276
		last := separators[len(separators)-1]
1277
		decimal = lit[last]
1278
		thousands = lit[separators[0]]
1279
		precision = len(lit) - last - 1
1280
	}
1281
1282
	return ast.QuantityFormat{
1283
		Decimal:   decimal,
1284
		Thousands: thousands,
1285
		Precision: precision,
1286
	}
1287
}
1288
1289
func parseSimpleDate(s string) ast.Date {
1290
	if len(s) < 8 {
1291
		return ast.Date{}
1292
	}
1293
	sep := byte('-')
1294
	if strings.Contains(s, "/") {
1295
		sep = byte('/')
1296
	} else if strings.Contains(s, ".") {
1297
		sep = byte('.')
1298
	}
1299
	parts := strings.Split(s, string(sep))
1300
	if len(parts) != 3 {
1301
		return ast.Date{}
1302
	}
1303
	year, _ := strconv.Atoi(parts[0])
1304
	month, _ := strconv.Atoi(parts[1])
1305
	day, _ := strconv.Atoi(parts[2])
1306
	return ast.Date{Year: year, Month: month, Day: day, Sep: sep}
1307
}