clerk: journal/parser/parser.go (cb14fd1)

1

package parser

2

3

import (

4

	"fmt"

5

	"strconv"

6

	"strings"

7

8

	"olexsmir.xyz/clerk/internal/decimal"

9

	"olexsmir.xyz/clerk/journal/ast"

10

	"olexsmir.xyz/clerk/journal/lexer"

11

	"olexsmir.xyz/clerk/journal/token"

12

13

14

type Parser struct {

15

	lexer  *lexer.Lexer

16

	errors []*ast.ParseError

17

	cur    token.Token

18

	peek   token.Token

19

20

21

func New(lex *lexer.Lexer) *Parser {

22

	p := &Parser{lexer: lex}

23

	p.advance() // populate .peek

24

	p.advance() // populate .cur

25

	return p

26

27

28

func (p *Parser) ParseJournal() *ast.Journal {

29

	f := &ast.Journal{}

30

	for p.cur.Type != token.EOF {

31

		if e := p.parseEntry(); e != nil {

32

			f.Entries = append(f.Entries, e)

33

34

35

	f.Errors = p.errors

36

	return f

37

38

39

func isDirectiveKeyword(t token.Type) bool {

40

	switch t {

41

	case token.COMMENTKW, token.ACCOUNT, token.COMMODITY, token.INCLUDE,

42

		token.ALIAS, token.PAYEE, token.TAG, token.APPLY, token.END,

43

		token.YEAR, token.DECIMALMARK, token.D, token.P, token.N:

44

		return true

45

46

	return false

47

48

49

func (p *Parser) parseEntry() ast.Entry {

50

	if p.got(token.BANG) || p.got(token.AT) {

51

		if isDirectiveKeyword(p.peek.Type) {

52

			p.advance() // consume prefix

53

54

55

	switch p.cur.Type {

56

	case token.ILLEGAL:

57

		p.errorf("illegal character %q", p.cur.Literal)

58

		p.advance()

59

		return nil

60

	case token.INDENT:

61

		p.errorf("unexpected indent")

62

		p.syncToNextline()

63

		return nil

64

	case token.DATE:

65

		return p.parseTransaction()

66

	case token.TILDE:

67

		return p.parsePeriodicTransaction()

68

	case token.EQ:

69

		return p.parseAutomatedTransaction()

70

	case token.NEWLINE:

71

		return p.parseBlankLine()

72

	case token.SEMICOLON, token.HASH, token.PERCENT, token.STAR:

73

		return p.parseComment()

74

	case token.ACCOUNT:

75

		return p.parseAccountDirective()

76

	case token.COMMODITY:

77

		return p.parseCommodityDirective()

78

	case token.INCLUDE:

79

		return p.parseIncludeDirective()

80

	case token.ALIAS:

81

		return p.parseAliasDirective()

82

	case token.PAYEE:

83

		return p.parsePayeeDirective()

84

	case token.TAG:

85

		return p.parseTagDirective()

86

	case token.YEAR:

87

		return p.parseYearDirective()

88

	case token.DECIMALMARK:

89

		return p.parseDecimalMarkDirective()

90

	case token.D:

91

		return p.parseDefaultCommodityDirective()

92

	case token.P:

93

		return p.parseMarketPriceDirective()

94

	case token.N:

95

		return p.parseIgnoredDirective()

96

	case token.APPLY:

97

		return p.parseApplyDirective()

98

	case token.END:

99

		return p.parseEndDirective()

100

	case token.COMMENTKW:

101

		return p.parseCommentBlockDirective()

102

	default:

103

		p.errorf("unexpected token %s", p.cur.Type)

104

		p.sync()

105

		return nil

106

107

108

109

func (p *Parser) parseTransaction() *ast.Transaction {

110

	s := p.cur.Span

111

	tx := &ast.Transaction{}

112

113

	tx.Date = p.parseDate()

114

115

	// optional secondary date

116

	if p.got(token.EQ) {

117

		p.advance()

118

		d := p.parseDate()

119

		tx.SecondDate = &d

120

121

122

	p.skipWhitespace()

123

124

	// optional status

125

	tx.Status = p.parseStatus()

126

127

	// optional code

128

	if p.got(token.LPAREN) {

129

		p.advance()

130

		var code strings.Builder

131

		for p.cur.Type != token.RPAREN {

132

			_, _ = code.WriteString(p.cur.Literal)

133

			p.advance()

134

135

		tx.Code = new(code.String())

136

		p.skipWhitespace()

137

138

139

	// optional payee | note

140

	if p.got(token.TEXT) || p.got(token.STRING) {

141

		tx.Payee = p.parsePayee()

142

143

		// check for | separator

144

		if p.got(token.WHITESPACE) {

145

			p.skipWhitespace()

146

147

148

		if p.got(token.PIPE) {

149

			p.advance()

150

			p.skipWhitespace()

151

152

			var note strings.Builder

153

			for p.got(token.TEXT) || p.got(token.WHITESPACE) {

154

				_, _ = note.WriteString(p.cur.Literal)

155

				p.advance()

156

157

			tx.Note = new(note.String())

158

159

160

161

	tx.Comment = p.parseOptInlineComment()

162

	p.expectNewline()

163

164

	// header comments — indented ; lines before first posting

165

	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {

166

		p.advance() // consume indent

167

		c := p.parseComment()

168

		tx.HeaderComments = append(tx.HeaderComments, *c)

169

170

171

	// postings

172

	for p.got(token.INDENT) {

173

		if p := p.parsePosting(); p != nil {

174

			tx.Postings = append(tx.Postings, p)

175

176

177

178

	tx.Span = p.span(s)

179

	return tx

180

181

182

func unquote(s string) string {

183

	if len(s) >= 2 && ((s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'')) {

184

		return s[1 : len(s)-1]

185

186

	return s

187

188

189

func (p *Parser) parsePayee() *ast.Payee {

190

	s := p.cur.Span

191

192

	if p.got(token.STRING) {

193

		name := unquote(p.cur.Literal)

194

		p.advance()

195

		return &ast.Payee{Name: name, Span: p.span(s)}

196

197

198

	// keep spaces/tags between text tokens; stop before trailing whitespace

199

	var name strings.Builder

200

	for p.got(token.TEXT) || p.got(token.INT) || p.got(token.DECIMAL) || (p.got(token.WHITESPACE) && (p.willGet(token.TEXT) || p.willGet(token.INT) || p.willGet(token.DECIMAL))) {

201

		_, _ = name.WriteString(p.cur.Literal)

202

		p.advance()

203

204

	return &ast.Payee{Name: unquote(name.String()), Span: p.span(s)}

205

206

207

func (p *Parser) parsePeriodicTransaction() *ast.PeriodicTransaction {

208

	s := p.cur.Span

209

	p.expect(token.TILDE)

210

	p.skipWhitespace()

211

212

	pt := &ast.PeriodicTransaction{}

213

214

	pt.Span = p.span(s)

215

	pt.Period = p.parsePeriod()

216

217

	if desc := p.parseOptPeriodicDescription(); desc != "" {

218

		pt.Description = &desc

219

220

221

	comment := p.parseOptInlineComment()

222

	p.expectNewline()

223

224

	var headerComments []*ast.Comment

225

	var postings []*ast.Posting

226

	for p.got(token.INDENT) || p.got(token.SEMICOLON) {

227

		if p.got(token.SEMICOLON) {

228

			c := p.parseComment()

229

			headerComments = append(headerComments, c)

230

			continue

231

232

		posting := p.parsePosting()

233

		if posting != nil {

234

			postings = append(postings, posting)

235

236

237

238

	pt.HeaderComments = headerComments

239

	pt.Postings = postings

240

	pt.Comment = comment

241

	return pt

242

243

244

func (p *Parser) parseAutomatedTransaction() *ast.AutomatedTransaction {

245

	s := p.cur.Span

246

	p.expect(token.EQ)

247

	p.skipWhitespace()

248

249

	at := &ast.AutomatedTransaction{}

250

	at.Span = p.span(s)

251

252

	at.Expr = p.parseDirectiveExpr()

253

	at.Comment = p.parseOptInlineComment()

254

	p.expectNewline()

255

256

	// header comments

257

	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {

258

		p.advance()

259

		at.HeaderComments = append(at.HeaderComments, p.parseComment())

260

261

262

	// postings

263

	for p.got(token.INDENT) {

264

		if p := p.parsePosting(); p != nil {

265

			at.Postings = append(at.Postings, p)

266

267

268

269

	return at

270

271

272

func (p *Parser) parsePeriod() ast.Period {

273

	s := p.cur.Span

274

275

	var periodBuf strings.Builder

276

277

	for !p.got(token.NEWLINE) && !p.got(token.EOF) &&

278

		!p.got(token.SEMICOLON) && !p.got(token.HASH) && !p.got(token.PERCENT) && !p.got(token.STAR) {

279

280

		if p.got(token.WHITESPACE) {

281

			if len(p.cur.Literal) >= 2 {

282

				break

283

284

			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) ||

285

				p.willGet(token.SEMICOLON) || p.willGet(token.HASH) ||

286

				p.willGet(token.PERCENT) || p.willGet(token.STAR) {

287

				p.advance()

288

				continue

289

290

291

292

		periodBuf.WriteString(p.cur.Literal)

293

		p.advance()

294

295

296

	str := periodBuf.String()

297

	period := ast.Period{Raw: str, Span: p.span(s)}

298

299

	if _, after, ok := strings.Cut(str, " from "); ok {

300

		end := strings.Index(after, " ")

301

		dateStr := after

302

		if end >= 0 {

303

			dateStr = after[:end]

304

305

		if d := parseSimpleDate(dateStr); d.Year > 0 {

306

			period.From = &d

307

			rest := after

308

			if end >= 0 {

309

				rest = after[end:]

310

311

			if _, toAfter, ok := strings.Cut(rest, " to "); ok {

312

				if toEnd := strings.Index(toAfter, " "); toEnd >= 0 {

313

					toAfter = toAfter[:toEnd]

314

315

				if d := parseSimpleDate(toAfter); d.Year > 0 {

316

					period.To = &d

317

318

319

320

321

	return period

322

323

324

func (p *Parser) parseComment() *ast.Comment {

325

	s := p.cur.Span

326

	marker := p.cur.Literal[0]

327

	p.advance()

328

	p.skipWhitespace()

329

330

	var text string

331

	if p.got(token.TEXT) {

332

		text = p.cur.Literal

333

		p.advance()

334

335

336

	p.expectNewline()

337

338

	return &ast.Comment{

339

		Marker: marker,

340

		Text:   text,

341

		Span:   p.span(s),

342

343

344

345

func (p *Parser) parseAccountDirective() *ast.AccountDirective {

346

	s := p.cur.Span

347

	p.expect(token.ACCOUNT)

348

	p.skipWhitespace()

349

350

	account := p.parseAccount()

351

	comment := p.parseOptInlineComment()

352

	p.expectNewline()

353

354

	for p.got(token.INDENT) {

355

		p.advance()

356

		for !p.got(token.NEWLINE) && !p.got(token.EOF) {

357

			p.advance()

358

359

		p.expectNewline()

360

361

362

	return &ast.AccountDirective{

363

		Account: account,

364

		Comment: comment,

365

		Span:    p.span(s),

366

367

368

369

func (p *Parser) parseCommodityDirective() *ast.CommodityDirective {

370

	s := p.cur.Span

371

	p.expect(token.COMMODITY)

372

	p.skipWhitespace()

373

374

	var commodity string

375

	var format *ast.Amount

376

377

	switch p.cur.Type {

378

	case token.TEXT, token.INT, token.DECIMAL:

379

		format = p.parseAmount()

380

		commodity = format.Commodity

381

	case token.COMMODITYMARK:

382

		commodity = p.cur.Literal

383

		p.advance()

384

		hadSpace := p.got(token.WHITESPACE)

385

		p.skipWhitespace()

386

		if p.got(token.INT) || p.got(token.DECIMAL) || p.got(token.TEXT) {

387

			format = p.parseAmount()

388

			format.Commodity = commodity

389

			format.CommodityPos = ast.CommodityBefore

390

			format.HasSpace = hadSpace

391

392

	default:

393

		p.errorf("expected commodity name or amount, got %s", p.cur.Type)

394

395

396

	if commodity == "" {

397

		p.errorf("expected commodity name, got %s", p.cur.Type)

398

399

400

	comment := p.parseOptInlineComment()

401

	p.expectNewline()

402

403

	for p.got(token.INDENT) {

404

		p.advance()

405

		p.skipWhitespace()

406

		if p.got(token.COMMODITYMARK) && p.cur.Literal == "format" {

407

			p.advance()

408

			p.skipWhitespace()

409

			format = p.parseAmount()

410

		} else {

411

			for !p.got(token.NEWLINE) && !p.got(token.EOF) {

412

				p.advance()

413

414

415

		p.expectNewline()

416

417

418

	cd := &ast.CommodityDirective{

419

		Commodity: commodity,

420

		Comment:   comment,

421

		Span:      p.span(s),

422

423

	if format != nil {

424

		cd.Format = *format

425

426

	return cd

427

428

429

func (p *Parser) parseIncludeDirective() *ast.IncludeDirective {

430

	s := p.cur.Span

431

	p.expect(token.INCLUDE)

432

	p.skipWhitespace()

433

434

	path := ""

435

	if p.got(token.TEXT) {

436

		path = p.cur.Literal

437

		p.advance()

438

	} else {

439

		p.errorf("expected file path, got %s", p.cur.Type)

440

441

442

	comment := p.parseOptInlineComment()

443

	p.expectNewline()

444

445

	return &ast.IncludeDirective{

446

		Path:    path,

447

		Comment: comment,

448

		Span:    p.span(s),

449

450

451

452

func (p *Parser) parseAliasDirective() *ast.AliasDirective {

453

	s := p.cur.Span

454

	alias := &ast.AliasDirective{}

455

	p.expect(token.ALIAS)

456

	p.skipWhitespace()

457

	alias.From = p.parseAccount().Name

458

	p.skipWhitespace()

459

	p.expect(token.EQ)

460

	p.skipWhitespace()

461

	alias.To = p.parseAccount().Name

462

	p.skipWhitespace()

463

	alias.Comment = p.parseOptInlineComment()

464

	p.expectNewline()

465

	alias.Span = p.span(s)

466

	return alias

467

468

469

func (p *Parser) parsePayeeDirective() *ast.PayeeDirective {

470

	s := p.cur.Span

471

	p.expect(token.PAYEE)

472

	p.skipWhitespace()

473

474

	name := ""

475

	if p.got(token.TEXT) || p.got(token.STRING) {

476

		name = p.parsePayee().Name

477

478

479

	comment := p.parseOptInlineComment()

480

	p.expectNewline()

481

482

	return &ast.PayeeDirective{

483

		Name:    name,

484

		Comment: comment,

485

		Span:    p.span(s),

486

487

488

489

func (p *Parser) parseTagDirective() *ast.TagDirective {

490

	s := p.cur.Span

491

	p.expect(token.TAG)

492

	p.skipWhitespace()

493

494

	name := ""

495

	if p.got(token.TEXT) {

496

		name = p.cur.Literal

497

		p.advance()

498

499

500

	comment := p.parseOptInlineComment()

501

	p.expectNewline()

502

503

	return &ast.TagDirective{

504

		Name:    name,

505

		Comment: comment,

506

		Span:    p.span(s),

507

508

509

510

func (p *Parser) parseYearDirective() *ast.YearDirective {

511

	s := p.cur.Span

512

	year := &ast.YearDirective{}

513

	p.expect(token.YEAR)

514

	p.skipWhitespace()

515

516

	if p.got(token.INT) {

517

		year.Year, _ = strconv.Atoi(p.cur.Literal)

518

		p.advance()

519

	} else {

520

		p.errorf("expected year, got %s", p.cur.Type)

521

522

523

	p.skipWhitespace()

524

	year.Comment = p.parseOptInlineComment()

525

	p.expectNewline()

526

	year.Span = p.span(s)

527

	return year

528

529

530

func (p *Parser) parseDecimalMarkDirective() *ast.DecimalMarkDirective {

531

	s := p.cur.Span

532

	mark := &ast.DecimalMarkDirective{}

533

	p.expect(token.DECIMALMARK)

534

	p.skipWhitespace()

535

536

	mark.Mark = byte('.')

537

	if p.got(token.TEXT) {

538

		if len(p.cur.Literal) > 0 {

539

			mark.Mark = p.cur.Literal[0]

540

541

		p.advance()

542

543

544

	p.skipWhitespace()

545

	mark.Comment = p.parseOptInlineComment()

546

	p.expectNewline()

547

	mark.Span = p.span(s)

548

	return mark

549

550

551

func (p *Parser) parseDefaultCommodityDirective() *ast.DefaultCommodityDirective {

552

	s := p.cur.Span

553

	com := &ast.DefaultCommodityDirective{}

554

	p.expect(token.D)

555

	p.skipWhitespace()

556

	com.Amount = *p.parseAmount()

557

	p.skipWhitespace()

558

	com.Comment = p.parseOptInlineComment()

559

	p.expectNewline()

560

	com.Span = p.span(s)

561

	return com

562

563

564

func (p *Parser) parseIgnoredDirective() *ast.IgnoredDirective {

565

	s := p.cur.Span

566

	p.expect(token.N)

567

	p.skipWhitespace()

568

	if p.got(token.TEXT) || p.got(token.COMMODITYMARK) {

569

		p.advance()

570

571

	p.skipWhitespace()

572

	comment := p.parseOptInlineComment()

573

	p.expectNewline()

574

	return &ast.IgnoredDirective{

575

		Comment: comment,

576

		Span:    p.span(s),

577

578

579

580

func (p *Parser) parseMarketPriceDirective() *ast.MarketPriceDirective {

581

	s := p.cur.Span

582

	p.expect(token.P)

583

	p.skipWhitespace()

584

585

	mp := &ast.MarketPriceDirective{}

586

	mp.DateTime.Date = p.parseDate()

587

	p.skipWhitespace()

588

589

	if p.got(token.TIME) {

590

		mp.DateTime.Time = new(p.parseTime())

591

		p.skipWhitespace()

592

593

594

	tok, _ := p.expect(token.COMMODITYMARK)

595

	mp.Commodity = tok.Literal

596

	p.advance()

597

	p.skipWhitespace()

598

599

	mp.Amount = *p.parseAmount()

600

601

	p.skipWhitespace()

602

	mp.Comment = p.parseOptInlineComment()

603

604

	p.expectNewline()

605

	mp.Span = p.span(s)

606

	return mp

607

608

609

func (p *Parser) parseTime() ast.Time {

610

	s := p.cur.Span

611

	tok, _ := p.expect(token.TIME)

612

	lit := tok.Literal

613

614

	parts := strings.Split(lit, ":")

615

	if len(parts) < 2 {

616

		p.errorf("invalid time format: %q", lit)

617

		return ast.Time{Span: p.span(s)}

618

619

620

	hour, _ := strconv.Atoi(parts[0])

621

	minute, _ := strconv.Atoi(parts[1])

622

	second := 0

623

	if len(parts) > 2 {

624

		second, _ = strconv.Atoi(parts[2])

625

626

627

	if hour < 0 || hour > 23 {

628

		p.errorf("invalid hour %d in time %q", hour, lit)

629

630

	if minute < 0 || minute > 59 {

631

		p.errorf("invalid minute %d in time %q", minute, lit)

632

633

	if second < 0 || second > 59 {

634

		p.errorf("invalid second %d in time %q", second, lit)

635

636

637

	return ast.Time{

638

		Hour:   hour,

639

		Minute: minute,

640

		Second: second,

641

		Span:   p.span(s),

642

643

644

645

func (p *Parser) parseApplyDirective() *ast.ApplyDirective {

646

	s := p.cur.Span

647

	p.expect(token.APPLY)

648

	p.skipWhitespace()

649

650

	expr := p.parseDirectiveExpr()

651

	comment := p.parseOptInlineComment()

652

	p.expectNewline()

653

654

	return &ast.ApplyDirective{

655

		Expr:    expr,

656

		Comment: comment,

657

		Span:    p.span(s),

658

659

660

661

func (p *Parser) parseEndDirective() *ast.EndDirective {

662

	s := p.cur.Span

663

	p.expect(token.END)

664

	p.skipWhitespace()

665

666

	expr := p.parseDirectiveExpr()

667

	comment := p.parseOptInlineComment()

668

	p.expectNewline()

669

670

	return &ast.EndDirective{

671

		Expr:    expr,

672

		Comment: comment,

673

		Span:    p.span(s),

674

675

676

677

func (p *Parser) parseCommentBlockDirective() *ast.CommentBlockDirective {

678

	start := p.cur.Span

679

	p.expect(token.COMMENTKW)

680

	p.skipWhitespace()

681

682

	header := p.parseDirectiveExpr()

683

	comment := p.parseOptInlineComment()

684

	p.expectNewline()

685

686

	var content strings.Builder

687

	for p.cur.Type != token.EOF {

688

		if p.got(token.END) {

689

			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) {

690

				p.advance()

691

				p.expectNewline()

692

				break

693

694

			if p.willGet(token.WHITESPACE) {

695

				endTok := p.cur

696

				p.advance()

697

				wsTok := p.cur

698

				p.advance()

699

				if p.got(token.TEXT) && p.cur.Literal == "comment" { // todo: this should check if it's an actual COMMENTKW token

700

					p.advance()

701

					p.parseDirectiveExpr()

702

					p.parseOptInlineComment()

703

					p.expectNewline()

704

					break

705

706

				content.WriteString(endTok.Literal)

707

				content.WriteString(wsTok.Literal)

708

				continue

709

710

711

		content.WriteString(p.cur.Literal)

712

		p.advance()

713

714

715

	return &ast.CommentBlockDirective{

716

		Header:  header,

717

		Content: content.String(),

718

		Comment: comment,

719

		Span:    p.span(start),

720

721

722

723

func (p *Parser) parseStatus() *ast.Status {

724

	if p.got(token.STAR) || p.got(token.BANG) {

725

		status := ast.StatusPending

726

		if p.cur.Literal[0] == '*' {

727

			status = ast.StatusCleared

728

729

		st := &ast.Status{Value: status, Span: p.cur.Span}

730

		p.advance()

731

		p.skipWhitespace()

732

		return st

733

734

	return nil

735

736

737

func (p *Parser) isAmountStart() bool {

738

	switch p.cur.Type {

739

	default:

740

		return false

741

	case token.COMMODITYMARK, token.INT, token.DECIMAL, token.MINUS, token.PLUS, token.PARENEXPR:

742

		return true

743

	case token.TEXT:

744

		return len(p.cur.Literal) > 0 && p.cur.Literal[0] >= '0' && p.cur.Literal[0] <= '9'

745

746

747

748

func (p *Parser) parseAmount() *ast.Amount {

749

	s := p.cur.Span

750

	amt := &ast.Amount{

751

		QuantityFmt: ast.QuantityFormat{Decimal: '.'},

752

		Span:        p.span(s),

753

754

755

	// commodity before quantity: $10.00

756

	if p.got(token.COMMODITYMARK) {

757

		amt.Commodity = p.cur.Literal

758

		amt.CommodityPos = ast.CommodityBefore

759

		p.advance()

760

		if p.got(token.WHITESPACE) {

761

			amt.HasSpace = true

762

			p.skipWhitespace()

763

764

		switch p.cur.Type {

765

		case token.MINUS:

766

			amt.IsNegative = true

767

			p.advance()

768

		case token.PLUS:

769

			p.advance()

770

771

		p.parseQuantityInto(amt)

772

	} else {

773

		// optional sign

774

		switch p.cur.Type {

775

		case token.MINUS:

776

			amt.IsNegative = true

777

			p.advance()

778

		case token.PLUS:

779

			p.advance()

780

781

782

		// commodity before quantity: -$120:

783

		if p.got(token.COMMODITYMARK) {

784

			amt.Commodity = p.cur.Literal

785

			amt.CommodityPos = ast.CommodityBefore

786

			p.advance()

787

			if p.got(token.WHITESPACE) {

788

				amt.HasSpace = true

789

				p.skipWhitespace()

790

791

792

793

		p.parseQuantityInto(amt)

794

795

		// commodity after quantity: 10.00 UAH (only if not set)

796

		if amt.Commodity == "" {

797

			switch p.cur.Type {

798

			case token.WHITESPACE:

799

				p.skipWhitespace()

800

				if p.got(token.COMMODITYMARK) {

801

					amt.HasSpace = true

802

					amt.Commodity = p.cur.Literal

803

					amt.CommodityPos = ast.CommodityAfter

804

					p.advance()

805

806

			case token.COMMODITYMARK:

807

				amt.Commodity = p.cur.Literal

808

				amt.CommodityPos = ast.CommodityAfter

809

				p.advance()

810

811

812

813

814

	return amt

815

816

817

func (p *Parser) parseAmountWithOptExpr() *ast.Amount {

818

	if p.got(token.STAR) {

819

		p.advance()

820

		p.skipWhitespace()

821

		amt := p.parseAmount()

822

		if amt != nil {

823

			amt.IsExpr = true

824

825

		return amt

826

827

	if p.got(token.PARENEXPR) {

828

		lit := p.cur.Literal

829

		amt := &ast.Amount{

830

			IsExpr:      true,

831

			QuantityFmt: ast.QuantityFormat{Decimal: '.'},

832

833

		if len(lit) >= 2 && lit[0] == '(' && lit[len(lit)-1] == ')' {

834

			inner := lit[1 : len(lit)-1]

835

			i := 0

836

			for i < len(inner) && (inner[i] == ' ' || inner[i] == '\t') {

837

i++

838

839

			j := len(inner)

840

			for j > i && (inner[j-1] == ' ' || inner[j-1] == '\t') {

841

j--

842

843

			amt.Expr = inner[i:j]

844

845

		amt.Span = p.cur.Span

846

		p.advance()

847

		return amt

848

849

	return p.parseAmount()

850

851

852

func (p *Parser) parsePosting() *ast.Posting {

853

	s := p.cur.Span

854

	posting := &ast.Posting{}

855

	p.expect(token.INDENT)

856

857

	// exit if it's empty line

858

	if p.got(token.NEWLINE) || p.got(token.EOF) {

859

		p.syncToNextline()

860

		return nil

861

862

863

	// optional status, outside of brackets, '! (account)'

864

	posting.Status = p.parseStatus()

865

866

	// detect virtual posting brackets

867

	switch p.cur.Type {

868

	case token.LPAREN:

869

		posting.Type = ast.PostingVirtualUnbalanced

870

		p.advance()

871

	case token.LBRACKET:

872

		posting.Type = ast.PostingVirtualBalanced

873

		p.advance()

874

875

876

	// optional status, inside of brackets, '(* account)'

877

	if p.got(token.STAR) || p.got(token.BANG) {

878

		posting.Status = p.parseStatus()

879

880

881

	// validate, must be account text

882

	if p.cur.Type != token.TEXT {

883

		p.errorf("expected account name, got %s", p.cur.Type)

884

		p.syncToNextline()

885

		return nil

886

887

888

	posting.Account = p.parseAccount()

889

890

	// consume closing bracket

891

	switch p.cur.Type {

892

	case token.RPAREN:

893

		p.advance()

894

	case token.RBRACKET:

895

		p.advance()

896

897

898

	// optional amount - after two spaces

899

	if p.got(token.WHITESPACE) {

900

		p.skipWhitespace()

901

		if p.isAmountStart() || p.got(token.STAR) {

902

			posting.Amount = p.parseAmountWithOptExpr()

903

904

905

906

	// optional cost '@' or '@@'

907

	if p.got(token.WHITESPACE) {

908

		p.skipWhitespace()

909

910

	if p.got(token.AT) || p.got(token.ATAT) {

911

		posting.Cost = p.parseCost()

912

913

914

	// optional balance assertion

915

	if p.got(token.WHITESPACE) {

916

		p.skipWhitespace()

917

918

	if p.got(token.EQ) || p.got(token.EQEQ) || p.got(token.EQEQEQ) {

919

		posting.Balance = p.parseBalanceAssertion()

920

		p.skipWhitespace()

921

		if p.got(token.AT) || p.got(token.ATAT) {

922

			p.parseCost()

923

924

925

926

	posting.Comment = p.parseOptInlineComment()

927

	p.expectNewline()

928

929

	// continuation comments

930

	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {

931

		p.advance()

932

		c := p.parseComment()

933

		posting.Comments = append(posting.Comments, *c)

934

935

936

	posting.Span = p.span(s)

937

	return posting

938

939

940

func (p *Parser) parseCost() *ast.Cost {

941

	s := p.cur.Span

942

	isTotal := p.got(token.ATAT)

943

	p.advance() // consume '@' '@@'

944

	p.skipWhitespace()

945

	return &ast.Cost{

946

		IsTotal: isTotal,

947

		Amount:  *p.parseAmount(),

948

		Span:    p.span(s),

949

950

951

952

func (p *Parser) parseBalanceAssertion() *ast.BalanceAssertion {

953

	s := p.cur.Span

954

955

	ba := &ast.BalanceAssertion{}

956

	switch p.cur.Type {

957

	case token.EQ: // basic assertion

958

	case token.EQEQ:

959

		ba.IsStrict = true

960

	case token.EQEQEQ:

961

		ba.IsStrict = true

962

		ba.IsInclusive = true

963

964

	p.advance()

965

	p.skipWhitespace()

966

967

	ba.Amount = *p.parseAmount()

968

	ba.Span = p.span(s)

969

	return ba

970

971

972

func (p *Parser) parseAccount() ast.Account {

973

	s := p.cur.Span

974

	var name strings.Builder

975

976

	switch p.cur.Type {

977

	case token.TEXT:

978

		_, _ = name.WriteString(p.cur.Literal)

979

		p.advance()

980

		if p.got(token.WHITESPACE) && p.willGet(token.TEXT) && p.peek.Literal[0] != '(' {

981

			_, _ = name.WriteString(" ")

982

			p.advance()

983

			_, _ = name.WriteString(p.cur.Literal)

984

			p.advance()

985

986

	case token.COMMODITYMARK:

987

		_, _ = name.WriteString(p.cur.Literal)

988

		p.advance()

989

		for p.got(token.TEXT) {

990

			_, _ = name.WriteString(p.cur.Literal)

991

			p.advance()

992

993

994

	return ast.Account{Name: name.String(), Span: p.span(s)}

995

996

997

func (p *Parser) parseDate() ast.Date {

998

	s := p.cur.Span

999

	tok, ok := p.expect(token.DATE)

1000

	if !ok {

1001

		return ast.Date{Span: p.span(s)}

1002

1003

1004

	sep := byte(0)

1005

	lit := tok.Literal

1006

	for i := 0; i < len(lit); i++ {

1007

		if lit[i] == '/' || lit[i] == '-' || lit[i] == '.' {

1008

			sep = lit[i]

1009

			break

1010

1011

1012

	if sep == 0 {

1013

		p.errorf("invalid date format: %q", lit)

1014

		return ast.Date{Span: p.span(s)}

1015

1016

1017

	parts := strings.Split(lit, string(sep))

1018

1019

	// M/D or MM/DD (year inferred)

1020

	if len(parts) == 2 {

1021

		month, err := strconv.Atoi(parts[0])

1022

		day, err2 := strconv.Atoi(parts[1])

1023

		if err != nil || err2 != nil {

1024

			p.errorf("invalid date literal: %q", lit)

1025

			return ast.Date{Span: p.span(s)}

1026

1027

		if month < 1 || month > 12 {

1028

			p.errorf("invalid month %d in %q", month, lit)

1029

			return ast.Date{Span: p.span(s)}

1030

1031

		if day < 1 || day > 31 {

1032

			p.errorf("invalid day %d in %q", day, lit)

1033

			return ast.Date{Span: p.span(s)}

1034

1035

		return ast.Date{Month: month, Day: day, Sep: sep, Span: p.span(s)}

1036

1037

1038

	if len(parts) != 3 {

1039

		p.errorf("invalid date format: %q", lit)

1040

		return ast.Date{Span: p.span(s)}

1041

1042

1043

	year, err := strconv.Atoi(parts[0])

1044

	month, err2 := strconv.Atoi(parts[1])

1045

	day, err3 := strconv.Atoi(parts[2])

1046

	if err != nil || err2 != nil || err3 != nil {

1047

		p.errorf("invalid date literal: %q", lit)

1048

		return ast.Date{Span: p.span(s)}

1049

1050

	if month < 1 || month > 12 {

1051

		p.errorf("invalid month %d in %q", month, lit)

1052

		return ast.Date{Span: p.span(s)}

1053

1054

	if day < 1 || day > 31 {

1055

		p.errorf("invalid day %d in %q", day, lit)

1056

		return ast.Date{Span: p.span(s)}

1057

1058

1059

	return ast.Date{

1060

		Year:  year,

1061

		Month: month,

1062

		Day:   day,

1063

		Sep:   sep,

1064

		Span:  p.span(s),

1065

1066

1067

1068

func (p *Parser) parseOptInlineComment() *ast.Comment {

1069

	p.skipWhitespace() // todo:

1070

	if p.cur.Type != token.SEMICOLON && p.cur.Type != token.HASH {

1071

		return nil

1072

1073

1074

	s := p.cur.Span

1075

	marker := p.cur.Literal[0]

1076

	p.advance() // consume marker

1077

	p.skipWhitespace()

1078

1079

	text := ""

1080

	if p.got(token.TEXT) {

1081

		text = p.cur.Literal

1082

		p.advance()

1083

1084

1085

	return &ast.Comment{

1086

		Marker: marker,

1087

		Text:   text,

1088

		Span:   p.span(s),

1089

1090

1091

1092

func (p *Parser) parseOptPeriodicDescription() string {

1093

	if p.cur.Type != token.WHITESPACE || len(p.cur.Literal) < 2 {

1094

		return ""

1095

1096

1097

	p.skipWhitespace()

1098

1099

	if p.cur.Type != token.TEXT {

1100

		return ""

1101

1102

1103

	return p.parseDescription()

1104

1105

1106

func (p *Parser) parseDescription() string {

1107

	var desc strings.Builder

1108

	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {

1109

		_, _ = desc.WriteString(p.cur.Literal)

1110

		p.advance()

1111

1112

	return desc.String()

1113

1114

1115

func (p *Parser) parseDirectiveExpr() string {

1116

	var b strings.Builder

1117

	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF && p.cur.Type != token.SEMICOLON {

1118

		_, _ = b.WriteString(p.cur.Literal)

1119

		p.advance()

1120

1121

	return b.String()

1122

1123

1124

func (p *Parser) parseQuantityInto(amt *ast.Amount) {

1125

	if p.cur.Type != token.INT && p.cur.Type != token.DECIMAL && p.cur.Type != token.TEXT {

1126

		p.errorf("expected quantity, got %s", p.cur.Type)

1127

		return

1128

1129

1130

	lit := p.cur.Literal

1131

	p.advance()

1132

1133

	// detect format metadata before normalizing

1134

	amt.QuantityFmt = detectFormat(lit)

1135

1136

	// normalize for decimal.NewFromString

1137

	// remove thousands separators, replace decimal mark with '.'

1138

	normalized := normalizeLiteral(lit, amt.QuantityFmt.Thousands, amt.QuantityFmt.Decimal)

1139

1140

	q, err := decimal.FromString(normalized)

1141

	if err != nil {

1142

		p.errorf("invalid quantity %q: %v", lit, err)

1143

		return

1144

1145

1146

	if amt.IsNegative {

1147

		q = q.Neg()

1148

1149

	amt.Quantity = q

1150

1151

1152

func (p *Parser) parseBlankLine() *ast.BlankLine {

1153

	s := p.cur.Span

1154

	p.expectNewline()

1155

	return &ast.BlankLine{Span: s}

1156

1157

1158

func (p *Parser) expectNewline() {

1159

	if p.got(token.NEWLINE) || p.got(token.EOF) {

1160

		if p.got(token.NEWLINE) {

1161

			p.advance()

1162

1163

		return

1164

1165

	p.errorf("expected %s, got %s", token.NEWLINE, p.cur.Type)

1166

1167

1168

func (p *Parser) advance() token.Token {

1169

	prev := p.cur

1170

	p.cur = p.peek

1171

	p.peek = p.lexer.Next()

1172

	return prev

1173

1174

1175

func (p *Parser) got(kind token.Type) bool     { return p.cur.Type == kind }

1176

func (p *Parser) willGet(kind token.Type) bool { return p.peek.Type == kind }

1177

1178

func (p *Parser) expect(kind token.Type) (token.Token, bool) {

1179

	if p.got(kind) {

1180

		return p.advance(), true

1181

1182

	p.errorf("expected %s, got %s", kind, p.cur.Type)

1183

	return p.cur, false

1184

1185

1186

func (p *Parser) errorf(format string, args ...any) {

1187

	p.errors = append(p.errors, &ast.ParseError{

1188

		Span:    p.cur.Span,

1189

		Message: fmt.Sprintf(format, args...),

1190

})

1191

1192

1193

func (p *Parser) sync() {

1194

	for {

1195

		switch p.cur.Type {

1196

		case token.EOF:

1197

			return

1198

		case token.NEWLINE:

1199

			p.advance()

1200

			switch p.cur.Type {

1201

			case token.DATE, token.ACCOUNT, token.COMMODITY,

1202

				token.INCLUDE, token.ALIAS, token.PAYEE,

1203

				token.TAG, token.YEAR, token.D, token.P,

1204

				token.APPLY, token.END, token.COMMENTKW,

1205

				token.DECIMALMARK, token.TILDE, token.N, token.EQ:

1206

				return

1207

1208

		default:

1209

			p.advance()

1210

1211

1212

1213

1214

func (p *Parser) syncToNextline() {

1215

	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF {

1216

		p.advance()

1217

1218

	if p.got(token.NEWLINE) {

1219

		p.advance()

1220

1221

1222

1223

func (p *Parser) skipWhitespace() {

1224

	for p.got(token.WHITESPACE) {

1225

		p.advance()

1226

1227

1228

1229

func (p *Parser) span(s token.Span) token.Span {

1230

	return token.Span{Start: s.Start, End: p.cur.Span.Start}

1231

1232

1233

func normalizeLiteral(lit string, thousands, decimal byte) string {

1234

	var b strings.Builder

1235

	for _, ch := range []byte(lit) {

1236

		if thousands != 0 && ch == thousands {

1237

			continue // skip thousands separator

1238

1239

		if ch == decimal {

1240

			b.WriteByte('.')

1241

		} else {

1242

			b.WriteByte(ch)

1243

1244

1245

	return b.String()

1246

1247

1248

func detectFormat(lit string) ast.QuantityFormat {

1249

	var separators []int

1250

	for i, ch := range []byte(lit) {

1251

		if ch == '.' || ch == ',' || ch == ' ' || ch == '_' || ch == '\'' {

1252

			separators = append(separators, i)

1253

1254

1255

1256

	if len(separators) == 0 {

1257

		return ast.QuantityFormat{Decimal: '.', Thousands: 0, Precision: 0}

1258

1259

1260

	var decimal byte

1261

	thousands := byte(0)

1262

	precision := 0

1263

1264

	if len(separators) == 1 {

1265

		pos := separators[0]

1266

		sepChar := lit[pos]

1267

		if sepChar == ' ' || sepChar == '_' || sepChar == '\'' {

1268

			thousands = sepChar

1269

			decimal = '.' // default

1270

			precision = 0

1271

		} else {

1272

			decimal = sepChar

1273

			precision = len(lit) - pos - 1

1274

1275

	} else {

1276

		last := separators[len(separators)-1]

1277

		decimal = lit[last]

1278

		thousands = lit[separators[0]]

1279

		precision = len(lit) - last - 1

1280

1281

1282

	return ast.QuantityFormat{

1283

		Decimal:   decimal,

1284

		Thousands: thousands,

1285

		Precision: precision,

1286

1287

1288

1289

func parseSimpleDate(s string) ast.Date {

1290

	if len(s) < 8 {

1291

		return ast.Date{}

1292

1293

	sep := byte('-')

1294

	if strings.Contains(s, "/") {

1295

		sep = byte('/')

1296

	} else if strings.Contains(s, ".") {

1297

		sep = byte('.')

1298

1299

	parts := strings.Split(s, string(sep))

1300

	if len(parts) != 3 {

1301

		return ast.Date{}

1302

1303

	year, _ := strconv.Atoi(parts[0])

1304

	month, _ := strconv.Atoi(parts[1])

1305

	day, _ := strconv.Atoi(parts[2])

1306

	return ast.Date{Year: year, Month: month, Day: day, Sep: sep}

1307