clerk: journal/parser/parser.go (8e80bf9)

1

package parser

2

3

import (

4

	"fmt"

5

	"strconv"

6

	"strings"

7

8

	"olexsmir.xyz/clerk/internal/decimal"

9

	"olexsmir.xyz/clerk/journal/ast"

10

	"olexsmir.xyz/clerk/journal/lexer"

11

	"olexsmir.xyz/clerk/journal/token"

12

13

14

type Parser struct {

15

	lexer  *lexer.Lexer

16

	errors []*ast.ParseError

17

	cur    token.Token

18

	peek   token.Token

19

20

21

func New(lex *lexer.Lexer) *Parser {

22

	p := &Parser{lexer: lex}

23

	p.advance() // populate .peek

24

	p.advance() // populate .cur

25

	return p

26

27

28

func (p *Parser) ParseJournal() *ast.Journal {

29

	f := &ast.Journal{}

30

	for p.cur.Type != token.EOF {

31

		if e := p.parseEntry(); e != nil {

32

			f.Entries = append(f.Entries, e)

33

34

35

	f.Errors = p.errors

36

	return f

37

38

39

func isDirectiveKeyword(t token.Type) bool {

40

	switch t {

41

	case token.COMMENTKW, token.ACCOUNT, token.COMMODITY, token.INCLUDE,

42

		token.ALIAS, token.PAYEE, token.TAG, token.APPLY, token.END,

43

		token.YEAR, token.DECIMALMARK, token.D, token.P, token.N, token.C:

44

		return true

45

46

	return false

47

48

49

func (p *Parser) parseEntry() ast.Entry {

50

	if p.got(token.BANG) || p.got(token.AT) {

51

		if isDirectiveKeyword(p.peek.Type) {

52

			p.advance() // consume prefix

53

54

55

	switch p.cur.Type {

56

	case token.ILLEGAL:

57

		p.errorf("illegal character %q", p.cur.Literal)

58

		p.advance()

59

		return nil

60

	case token.INDENT:

61

		p.errorf("unexpected indent")

62

		p.syncToNextline()

63

		return nil

64

	case token.DATE:

65

		return p.parseTransaction()

66

	case token.TILDE:

67

		return p.parsePeriodicTransaction()

68

	case token.EQ:

69

		return p.parseAutomatedTransaction()

70

	case token.NEWLINE:

71

		return p.parseBlankLine()

72

	case token.SEMICOLON, token.HASH, token.PERCENT, token.STAR:

73

		return p.parseComment()

74

	case token.ACCOUNT:

75

		return p.parseAccountDirective()

76

	case token.COMMODITY:

77

		return p.parseCommodityDirective()

78

	case token.INCLUDE:

79

		return p.parseIncludeDirective()

80

	case token.ALIAS:

81

		return p.parseAliasDirective()

82

	case token.PAYEE:

83

		return p.parsePayeeDirective()

84

	case token.TAG:

85

		return p.parseTagDirective()

86

	case token.YEAR:

87

		return p.parseYearDirective()

88

	case token.DECIMALMARK:

89

		return p.parseDecimalMarkDirective()

90

	case token.D:

91

		return p.parseDefaultCommodityDirective()

92

	case token.P:

93

		return p.parseMarketPriceDirective()

94

	case token.N:

95

		return p.parseIgnoredDirective()

96

	case token.C:

97

		return p.parseConversionDirective()

98

	case token.APPLY:

99

		return p.parseApplyDirective()

100

	case token.END:

101

		return p.parseEndDirective()

102

	case token.COMMENTKW:

103

		return p.parseCommentBlockDirective()

104

	default:

105

		p.errorf("unexpected token %s", p.cur.Type)

106

		p.sync()

107

		return nil

108

109

110

111

func (p *Parser) parseTransaction() *ast.Transaction {

112

	s := p.cur.Span

113

	tx := &ast.Transaction{}

114

115

	tx.Date = p.parseDate()

116

117

	// optional secondary date

118

	if p.got(token.EQ) {

119

		p.advance()

120

		d := p.parseDate()

121

		tx.SecondDate = &d

122

123

124

	p.skipWhitespace()

125

126

	// optional status

127

	tx.Status = p.parseStatus()

128

129

	// optional code

130

	if p.got(token.LPAREN) {

131

		p.advance()

132

		var code strings.Builder

133

		for p.cur.Type != token.RPAREN {

134

			_, _ = code.WriteString(p.cur.Literal)

135

			p.advance()

136

137

		tx.Code = new(code.String())

138

		p.skipWhitespace()

139

140

141

	// optional payee | note

142

	if p.got(token.TEXT) || p.got(token.STRING) {

143

		tx.Payee = p.parsePayee()

144

145

		// check for | separator

146

		if p.got(token.WHITESPACE) {

147

			p.skipWhitespace()

148

149

150

		if p.got(token.PIPE) {

151

			p.advance()

152

			p.skipWhitespace()

153

154

			var note strings.Builder

155

			for p.got(token.TEXT) || p.got(token.WHITESPACE) {

156

				_, _ = note.WriteString(p.cur.Literal)

157

				p.advance()

158

159

			tx.Note = new(note.String())

160

161

162

163

	tx.Comment = p.parseOptInlineComment()

164

	p.expectNewline()

165

166

	// header comments — indented ; lines before first posting

167

	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {

168

		p.advance() // consume indent

169

		c := p.parseComment()

170

		tx.HeaderComments = append(tx.HeaderComments, *c)

171

172

173

	// postings

174

	for p.got(token.INDENT) {

175

		if p := p.parsePosting(); p != nil {

176

			tx.Postings = append(tx.Postings, p)

177

178

179

180

	tx.Span = p.span(s)

181

	return tx

182

183

184

func unquote(s string) string {

185

	if len(s) >= 2 && ((s[0] == '"' && s[len(s)-1] == '"') || (s[0] == '\'' && s[len(s)-1] == '\'')) {

186

		return s[1 : len(s)-1]

187

188

	return s

189

190

191

func (p *Parser) parsePayee() *ast.Payee {

192

	s := p.cur.Span

193

194

	if p.got(token.STRING) {

195

		name := unquote(p.cur.Literal)

196

		p.advance()

197

		return &ast.Payee{Name: name, Span: p.span(s)}

198

199

200

	// keep spaces/tags between text tokens; stop before trailing whitespace

201

	var name strings.Builder

202

	for p.got(token.TEXT) || p.got(token.INT) || p.got(token.DECIMAL) || (p.got(token.WHITESPACE) && (p.willGet(token.TEXT) || p.willGet(token.INT) || p.willGet(token.DECIMAL))) {

203

		_, _ = name.WriteString(p.cur.Literal)

204

		p.advance()

205

206

	return &ast.Payee{Name: unquote(name.String()), Span: p.span(s)}

207

208

209

func (p *Parser) parsePeriodicTransaction() *ast.PeriodicTransaction {

210

	s := p.cur.Span

211

	p.expect(token.TILDE)

212

	p.skipWhitespace()

213

214

	pt := &ast.PeriodicTransaction{}

215

216

	pt.Span = p.span(s)

217

	pt.Period = p.parsePeriod()

218

219

	if desc := p.parseOptPeriodicDescription(); desc != "" {

220

		pt.Description = &desc

221

222

223

	comment := p.parseOptInlineComment()

224

	p.expectNewline()

225

226

	var headerComments []*ast.Comment

227

	var postings []*ast.Posting

228

	for p.got(token.INDENT) || p.got(token.SEMICOLON) {

229

		if p.got(token.SEMICOLON) {

230

			c := p.parseComment()

231

			headerComments = append(headerComments, c)

232

			continue

233

234

		posting := p.parsePosting()

235

		if posting != nil {

236

			postings = append(postings, posting)

237

238

239

240

	pt.HeaderComments = headerComments

241

	pt.Postings = postings

242

	pt.Comment = comment

243

	return pt

244

245

246

func (p *Parser) parseAutomatedTransaction() *ast.AutomatedTransaction {

247

	s := p.cur.Span

248

	p.expect(token.EQ)

249

	p.skipWhitespace()

250

251

	at := &ast.AutomatedTransaction{}

252

	at.Span = p.span(s)

253

254

	at.Expr = p.parseDirectiveExpr()

255

	at.Comment = p.parseOptInlineComment()

256

	p.expectNewline()

257

258

	// header comments

259

	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {

260

		p.advance()

261

		at.HeaderComments = append(at.HeaderComments, p.parseComment())

262

263

264

	// postings

265

	for p.got(token.INDENT) {

266

		if p := p.parsePosting(); p != nil {

267

			at.Postings = append(at.Postings, p)

268

269

270

271

	return at

272

273

274

func (p *Parser) parsePeriod() ast.Period {

275

	s := p.cur.Span

276

277

	var periodBuf strings.Builder

278

279

	for !p.got(token.NEWLINE) && !p.got(token.EOF) &&

280

		!p.got(token.SEMICOLON) && !p.got(token.HASH) && !p.got(token.PERCENT) && !p.got(token.STAR) {

281

282

		if p.got(token.WHITESPACE) {

283

			if len(p.cur.Literal) >= 2 {

284

				break

285

286

			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) ||

287

				p.willGet(token.SEMICOLON) || p.willGet(token.HASH) ||

288

				p.willGet(token.PERCENT) || p.willGet(token.STAR) {

289

				p.advance()

290

				continue

291

292

293

294

		periodBuf.WriteString(p.cur.Literal)

295

		p.advance()

296

297

298

	str := periodBuf.String()

299

	period := ast.Period{Raw: str, Span: p.span(s)}

300

301

	if _, after, ok := strings.Cut(str, " from "); ok {

302

		end := strings.Index(after, " ")

303

		dateStr := after

304

		if end >= 0 {

305

			dateStr = after[:end]

306

307

		if d := parseSimpleDate(dateStr); d.Year > 0 {

308

			period.From = &d

309

			rest := after

310

			if end >= 0 {

311

				rest = after[end:]

312

313

			if _, toAfter, ok := strings.Cut(rest, " to "); ok {

314

				if toEnd := strings.Index(toAfter, " "); toEnd >= 0 {

315

					toAfter = toAfter[:toEnd]

316

317

				if d := parseSimpleDate(toAfter); d.Year > 0 {

318

					period.To = &d

319

320

321

322

323

	return period

324

325

326

func (p *Parser) parseComment() *ast.Comment {

327

	s := p.cur.Span

328

	marker := p.cur.Literal[0]

329

	p.advance()

330

	p.skipWhitespace()

331

332

	var text string

333

	if p.got(token.TEXT) {

334

		text = p.cur.Literal

335

		p.advance()

336

337

338

	p.expectNewline()

339

340

	return &ast.Comment{

341

		Marker: marker,

342

		Text:   text,

343

		Span:   p.span(s),

344

345

346

347

func (p *Parser) parseAccountDirective() *ast.AccountDirective {

348

	s := p.cur.Span

349

	p.expect(token.ACCOUNT)

350

	p.skipWhitespace()

351

352

	account := p.parseAccount()

353

	comment := p.parseOptInlineComment()

354

	p.expectNewline()

355

356

	for p.got(token.INDENT) {

357

		p.advance()

358

		for !p.got(token.NEWLINE) && !p.got(token.EOF) {

359

			p.advance()

360

361

		p.expectNewline()

362

363

364

	return &ast.AccountDirective{

365

		Account: account,

366

		Comment: comment,

367

		Span:    p.span(s),

368

369

370

371

func (p *Parser) parseCommodityDirective() *ast.CommodityDirective {

372

	s := p.cur.Span

373

	p.expect(token.COMMODITY)

374

	p.skipWhitespace()

375

376

	var commodity string

377

	var format *ast.Amount

378

379

	switch p.cur.Type {

380

	case token.TEXT, token.INT, token.DECIMAL:

381

		format = p.parseAmount()

382

		commodity = format.Commodity

383

	case token.COMMODITYMARK:

384

		commodity = p.cur.Literal

385

		p.advance()

386

		hadSpace := p.got(token.WHITESPACE)

387

		p.skipWhitespace()

388

		if p.got(token.INT) || p.got(token.DECIMAL) || p.got(token.TEXT) {

389

			format = p.parseAmount()

390

			format.Commodity = commodity

391

			format.CommodityPos = ast.CommodityBefore

392

			format.HasSpace = hadSpace

393

394

	default:

395

		p.errorf("expected commodity name or amount, got %s", p.cur.Type)

396

397

398

	if commodity == "" {

399

		p.errorf("expected commodity name, got %s", p.cur.Type)

400

401

402

	comment := p.parseOptInlineComment()

403

	p.expectNewline()

404

405

	for p.got(token.INDENT) {

406

		p.advance()

407

		p.skipWhitespace()

408

		if p.got(token.COMMODITYMARK) && p.cur.Literal == "format" {

409

			p.advance()

410

			p.skipWhitespace()

411

			format = p.parseAmount()

412

		} else {

413

			for !p.got(token.NEWLINE) && !p.got(token.EOF) {

414

				p.advance()

415

416

417

		p.expectNewline()

418

419

420

	cd := &ast.CommodityDirective{

421

		Commodity: commodity,

422

		Comment:   comment,

423

		Span:      p.span(s),

424

425

	if format != nil {

426

		cd.Format = *format

427

428

	return cd

429

430

431

func (p *Parser) parseIncludeDirective() *ast.IncludeDirective {

432

	s := p.cur.Span

433

	p.expect(token.INCLUDE)

434

	p.skipWhitespace()

435

436

	path := ""

437

	if p.got(token.TEXT) {

438

		path = p.cur.Literal

439

		p.advance()

440

	} else {

441

		p.errorf("expected file path, got %s", p.cur.Type)

442

443

444

	comment := p.parseOptInlineComment()

445

	p.expectNewline()

446

447

	return &ast.IncludeDirective{

448

		Path:    path,

449

		Comment: comment,

450

		Span:    p.span(s),

451

452

453

454

func (p *Parser) parseAliasDirective() *ast.AliasDirective {

455

	s := p.cur.Span

456

	alias := &ast.AliasDirective{}

457

	p.expect(token.ALIAS)

458

	p.skipWhitespace()

459

	alias.From = p.parseAccount().Name

460

	p.skipWhitespace()

461

	p.expect(token.EQ)

462

	p.skipWhitespace()

463

	alias.To = p.parseAccount().Name

464

	p.skipWhitespace()

465

	alias.Comment = p.parseOptInlineComment()

466

	p.expectNewline()

467

	alias.Span = p.span(s)

468

	return alias

469

470

471

func (p *Parser) parsePayeeDirective() *ast.PayeeDirective {

472

	s := p.cur.Span

473

	p.expect(token.PAYEE)

474

	p.skipWhitespace()

475

476

	name := ""

477

	if p.got(token.TEXT) || p.got(token.STRING) {

478

		name = p.parsePayee().Name

479

480

481

	comment := p.parseOptInlineComment()

482

	p.expectNewline()

483

484

	return &ast.PayeeDirective{

485

		Name:    name,

486

		Comment: comment,

487

		Span:    p.span(s),

488

489

490

491

func (p *Parser) parseTagDirective() *ast.TagDirective {

492

	s := p.cur.Span

493

	p.expect(token.TAG)

494

	p.skipWhitespace()

495

496

	name := ""

497

	if p.got(token.TEXT) {

498

		name = p.cur.Literal

499

		p.advance()

500

501

502

	comment := p.parseOptInlineComment()

503

	p.expectNewline()

504

505

	return &ast.TagDirective{

506

		Name:    name,

507

		Comment: comment,

508

		Span:    p.span(s),

509

510

511

512

func (p *Parser) parseYearDirective() *ast.YearDirective {

513

	s := p.cur.Span

514

	year := &ast.YearDirective{}

515

	p.expect(token.YEAR)

516

	p.skipWhitespace()

517

518

	if p.got(token.INT) {

519

		year.Year, _ = strconv.Atoi(p.cur.Literal)

520

		p.advance()

521

	} else {

522

		p.errorf("expected year, got %s", p.cur.Type)

523

524

525

	p.skipWhitespace()

526

	year.Comment = p.parseOptInlineComment()

527

	p.expectNewline()

528

	year.Span = p.span(s)

529

	return year

530

531

532

func (p *Parser) parseDecimalMarkDirective() *ast.DecimalMarkDirective {

533

	s := p.cur.Span

534

	mark := &ast.DecimalMarkDirective{}

535

	p.expect(token.DECIMALMARK)

536

	p.skipWhitespace()

537

538

	mark.Mark = byte('.')

539

	if p.got(token.TEXT) {

540

		if len(p.cur.Literal) > 0 {

541

			mark.Mark = p.cur.Literal[0]

542

543

		p.advance()

544

545

546

	p.skipWhitespace()

547

	mark.Comment = p.parseOptInlineComment()

548

	p.expectNewline()

549

	mark.Span = p.span(s)

550

	return mark

551

552

553

func (p *Parser) parseDefaultCommodityDirective() *ast.DefaultCommodityDirective {

554

	s := p.cur.Span

555

	com := &ast.DefaultCommodityDirective{}

556

	p.expect(token.D)

557

	p.skipWhitespace()

558

	com.Amount = *p.parseAmount()

559

	p.skipWhitespace()

560

	com.Comment = p.parseOptInlineComment()

561

	p.expectNewline()

562

	com.Span = p.span(s)

563

	return com

564

565

566

func (p *Parser) parseConversionDirective() *ast.ConversionDirective {

567

	s := p.cur.Span

568

	cd := &ast.ConversionDirective{}

569

	p.expect(token.C)

570

	p.skipWhitespace()

571

572

	if p.isAmountStart() {

573

		cd.From = *p.parseAmount()

574

	} else {

575

		p.errorf("expected amount, got %s", p.cur.Type)

576

577

578

	p.skipWhitespace()

579

	if p.got(token.EQ) {

580

		p.advance()

581

		p.skipWhitespace()

582

		if p.isAmountStart() {

583

			cd.To = *p.parseAmount()

584

		} else {

585

			p.errorf("expected amount, got %s", p.cur.Type)

586

587

588

589

	p.skipWhitespace()

590

	cd.Comment = p.parseOptInlineComment()

591

	p.expectNewline()

592

	cd.Span = p.span(s)

593

	return cd

594

595

596

func (p *Parser) parseIgnoredDirective() *ast.IgnoredDirective {

597

	s := p.cur.Span

598

	p.expect(token.N)

599

	p.skipWhitespace()

600

	if p.got(token.TEXT) || p.got(token.COMMODITYMARK) {

601

		p.advance()

602

603

	p.skipWhitespace()

604

	comment := p.parseOptInlineComment()

605

	p.expectNewline()

606

	return &ast.IgnoredDirective{

607

		Comment: comment,

608

		Span:    p.span(s),

609

610

611

612

func (p *Parser) parseMarketPriceDirective() *ast.MarketPriceDirective {

613

	s := p.cur.Span

614

	p.expect(token.P)

615

	p.skipWhitespace()

616

617

	mp := &ast.MarketPriceDirective{}

618

	mp.DateTime.Date = p.parseDate()

619

	p.skipWhitespace()

620

621

	if p.got(token.TIME) {

622

		mp.DateTime.Time = new(p.parseTime())

623

		p.skipWhitespace()

624

625

626

	tok, _ := p.expect(token.COMMODITYMARK)

627

	mp.Commodity = tok.Literal

628

	p.advance()

629

	p.skipWhitespace()

630

631

	mp.Amount = *p.parseAmount()

632

633

	p.skipWhitespace()

634

	mp.Comment = p.parseOptInlineComment()

635

636

	p.expectNewline()

637

	mp.Span = p.span(s)

638

	return mp

639

640

641

func (p *Parser) parseTime() ast.Time {

642

	s := p.cur.Span

643

	tok, _ := p.expect(token.TIME)

644

	lit := tok.Literal

645

646

	parts := strings.Split(lit, ":")

647

	if len(parts) < 2 {

648

		p.errorf("invalid time format: %q", lit)

649

		return ast.Time{Span: p.span(s)}

650

651

652

	hour, _ := strconv.Atoi(parts[0])

653

	minute, _ := strconv.Atoi(parts[1])

654

	second := 0

655

	if len(parts) > 2 {

656

		second, _ = strconv.Atoi(parts[2])

657

658

659

	if hour < 0 || hour > 23 {

660

		p.errorf("invalid hour %d in time %q", hour, lit)

661

662

	if minute < 0 || minute > 59 {

663

		p.errorf("invalid minute %d in time %q", minute, lit)

664

665

	if second < 0 || second > 59 {

666

		p.errorf("invalid second %d in time %q", second, lit)

667

668

669

	return ast.Time{

670

		Hour:   hour,

671

		Minute: minute,

672

		Second: second,

673

		Span:   p.span(s),

674

675

676

677

func (p *Parser) parseApplyDirective() *ast.ApplyDirective {

678

	s := p.cur.Span

679

	p.expect(token.APPLY)

680

	p.skipWhitespace()

681

682

	expr := p.parseDirectiveExpr()

683

	comment := p.parseOptInlineComment()

684

	p.expectNewline()

685

686

	return &ast.ApplyDirective{

687

		Expr:    expr,

688

		Comment: comment,

689

		Span:    p.span(s),

690

691

692

693

func (p *Parser) parseEndDirective() *ast.EndDirective {

694

	s := p.cur.Span

695

	p.expect(token.END)

696

	p.skipWhitespace()

697

698

	expr := p.parseDirectiveExpr()

699

	comment := p.parseOptInlineComment()

700

	p.expectNewline()

701

702

	return &ast.EndDirective{

703

		Expr:    expr,

704

		Comment: comment,

705

		Span:    p.span(s),

706

707

708

709

func (p *Parser) parseCommentBlockDirective() *ast.CommentBlockDirective {

710

	start := p.cur.Span

711

	p.expect(token.COMMENTKW)

712

	p.skipWhitespace()

713

714

	header := p.parseDirectiveExpr()

715

	comment := p.parseOptInlineComment()

716

	p.expectNewline()

717

718

	var content strings.Builder

719

	for p.cur.Type != token.EOF {

720

		if p.got(token.END) {

721

			if p.willGet(token.NEWLINE) || p.willGet(token.EOF) {

722

				p.advance()

723

				p.expectNewline()

724

				break

725

726

			if p.willGet(token.WHITESPACE) {

727

				endTok := p.cur

728

				p.advance()

729

				wsTok := p.cur

730

				p.advance()

731

				if p.got(token.TEXT) && p.cur.Literal == "comment" { // todo: this should check if it's an actual COMMENTKW token

732

					p.advance()

733

					p.parseDirectiveExpr()

734

					p.parseOptInlineComment()

735

					p.expectNewline()

736

					break

737

738

				content.WriteString(endTok.Literal)

739

				content.WriteString(wsTok.Literal)

740

				continue

741

742

743

		content.WriteString(p.cur.Literal)

744

		p.advance()

745

746

747

	return &ast.CommentBlockDirective{

748

		Header:  header,

749

		Content: content.String(),

750

		Comment: comment,

751

		Span:    p.span(start),

752

753

754

755

func (p *Parser) parseStatus() *ast.Status {

756

	if p.got(token.STAR) || p.got(token.BANG) {

757

		status := ast.StatusPending

758

		if p.cur.Literal[0] == '*' {

759

			status = ast.StatusCleared

760

761

		st := &ast.Status{Value: status, Span: p.cur.Span}

762

		p.advance()

763

		p.skipWhitespace()

764

		return st

765

766

	return nil

767

768

769

func (p *Parser) isAmountStart() bool {

770

	switch p.cur.Type {

771

	default:

772

		return false

773

	case token.COMMODITYMARK, token.INT, token.DECIMAL, token.MINUS, token.PLUS, token.PARENEXPR:

774

		return true

775

	case token.TEXT:

776

		return len(p.cur.Literal) > 0 && p.cur.Literal[0] >= '0' && p.cur.Literal[0] <= '9'

777

778

779

780

func (p *Parser) parseAmount() *ast.Amount {

781

	s := p.cur.Span

782

	amt := &ast.Amount{

783

		QuantityFmt: ast.QuantityFormat{Decimal: '.'},

784

		Span:        p.span(s),

785

786

787

	// commodity before quantity: $10.00

788

	if p.got(token.COMMODITYMARK) {

789

		amt.Commodity = p.cur.Literal

790

		amt.CommodityPos = ast.CommodityBefore

791

		p.advance()

792

		if p.got(token.WHITESPACE) {

793

			amt.HasSpace = true

794

			p.skipWhitespace()

795

796

		switch p.cur.Type {

797

		case token.MINUS:

798

			amt.IsNegative = true

799

			p.advance()

800

		case token.PLUS:

801

			p.advance()

802

803

		p.parseQuantityInto(amt)

804

	} else {

805

		// optional sign

806

		switch p.cur.Type {

807

		case token.MINUS:

808

			amt.IsNegative = true

809

			p.advance()

810

		case token.PLUS:

811

			p.advance()

812

813

814

		// commodity before quantity: -$120:

815

		if p.got(token.COMMODITYMARK) {

816

			amt.Commodity = p.cur.Literal

817

			amt.CommodityPos = ast.CommodityBefore

818

			p.advance()

819

			if p.got(token.WHITESPACE) {

820

				amt.HasSpace = true

821

				p.skipWhitespace()

822

823

824

825

		p.parseQuantityInto(amt)

826

827

		// commodity after quantity: 10.00 UAH (only if not set)

828

		if amt.Commodity == "" {

829

			switch p.cur.Type {

830

			case token.WHITESPACE:

831

				p.skipWhitespace()

832

				if p.got(token.COMMODITYMARK) || p.got(token.TEXT) {

833

					amt.HasSpace = true

834

					amt.Commodity = p.cur.Literal

835

					amt.CommodityPos = ast.CommodityAfter

836

					p.advance()

837

838

			case token.COMMODITYMARK, token.TEXT:

839

				amt.Commodity = p.cur.Literal

840

				amt.CommodityPos = ast.CommodityAfter

841

				p.advance()

842

843

844

845

846

	return amt

847

848

849

func (p *Parser) parseAmountWithOptExpr() *ast.Amount {

850

	if p.got(token.STAR) {

851

		p.advance()

852

		p.skipWhitespace()

853

		amt := p.parseAmount()

854

		if amt != nil {

855

			amt.IsExpr = true

856

857

		return amt

858

859

	if p.got(token.PARENEXPR) {

860

		lit := p.cur.Literal

861

		amt := &ast.Amount{

862

			IsExpr:      true,

863

			QuantityFmt: ast.QuantityFormat{Decimal: '.'},

864

865

		if len(lit) >= 2 && lit[0] == '(' && lit[len(lit)-1] == ')' {

866

			inner := lit[1 : len(lit)-1]

867

			i := 0

868

			for i < len(inner) && (inner[i] == ' ' || inner[i] == '\t') {

869

i++

870

871

			j := len(inner)

872

			for j > i && (inner[j-1] == ' ' || inner[j-1] == '\t') {

873

j--

874

875

			amt.Expr = inner[i:j]

876

877

		amt.Span = p.cur.Span

878

		p.advance()

879

		return amt

880

881

	return p.parseAmount()

882

883

884

func (p *Parser) parsePosting() *ast.Posting {

885

	s := p.cur.Span

886

	posting := &ast.Posting{}

887

	p.expect(token.INDENT)

888

889

	// exit if it's empty line

890

	if p.got(token.NEWLINE) || p.got(token.EOF) {

891

		p.syncToNextline()

892

		return nil

893

894

895

	// optional status, outside of brackets, '! (account)'

896

	posting.Status = p.parseStatus()

897

898

	// detect virtual posting brackets

899

	switch p.cur.Type {

900

	case token.LPAREN:

901

		posting.Type = ast.PostingVirtualUnbalanced

902

		p.advance()

903

	case token.LBRACKET:

904

		posting.Type = ast.PostingVirtualBalanced

905

		p.advance()

906

907

908

	// optional status, inside of brackets, '(* account)'

909

	if p.got(token.STAR) || p.got(token.BANG) {

910

		posting.Status = p.parseStatus()

911

912

913

	// validate, must be account text

914

	if p.cur.Type != token.TEXT {

915

		p.errorf("expected account name, got %s", p.cur.Type)

916

		p.syncToNextline()

917

		return nil

918

919

920

	posting.Account = p.parseAccount()

921

922

	// consume closing bracket

923

	switch p.cur.Type {

924

	case token.RPAREN:

925

		p.advance()

926

	case token.RBRACKET:

927

		p.advance()

928

929

930

	// optional amount - after two spaces

931

	if p.got(token.WHITESPACE) {

932

		p.skipWhitespace()

933

		if p.isAmountStart() || p.got(token.STAR) {

934

			posting.Amount = p.parseAmountWithOptExpr()

935

936

937

938

	// optional cost '@' or '@@'

939

	if p.got(token.WHITESPACE) {

940

		p.skipWhitespace()

941

942

	if p.got(token.AT) || p.got(token.ATAT) {

943

		posting.Cost = p.parseCost()

944

945

946

	// optional balance assertion

947

	if p.got(token.WHITESPACE) {

948

		p.skipWhitespace()

949

950

	if p.got(token.EQ) || p.got(token.EQEQ) || p.got(token.EQEQEQ) {

951

		posting.Balance = p.parseBalanceAssertion()

952

		p.skipWhitespace()

953

		if p.got(token.AT) || p.got(token.ATAT) {

954

			p.parseCost()

955

956

957

958

	posting.Comment = p.parseOptInlineComment()

959

	p.expectNewline()

960

961

	// continuation comments

962

	for p.got(token.INDENT) && p.willGet(token.SEMICOLON) {

963

		p.advance()

964

		c := p.parseComment()

965

		posting.Comments = append(posting.Comments, *c)

966

967

968

	posting.Span = p.span(s)

969

	return posting

970

971

972

func (p *Parser) parseCost() *ast.Cost {

973

	s := p.cur.Span

974

	isTotal := p.got(token.ATAT)

975

	p.advance() // consume '@' '@@'

976

	p.skipWhitespace()

977

	return &ast.Cost{

978

		IsTotal: isTotal,

979

		Amount:  *p.parseAmount(),

980

		Span:    p.span(s),

981

982

983

984

func (p *Parser) parseBalanceAssertion() *ast.BalanceAssertion {

985

	s := p.cur.Span

986

987

	ba := &ast.BalanceAssertion{}

988

	switch p.cur.Type {

989

	case token.EQ: // basic assertion

990

	case token.EQEQ:

991

		ba.IsStrict = true

992

	case token.EQEQEQ:

993

		ba.IsStrict = true

994

		ba.IsInclusive = true

995

996

	p.advance()

997

	p.skipWhitespace()

998

999

	ba.Amount = *p.parseAmount()

1000

	ba.Span = p.span(s)

1001

	return ba

1002

1003

1004

func (p *Parser) parseAccount() ast.Account {

1005

	s := p.cur.Span

1006

	var name strings.Builder

1007

1008

	switch p.cur.Type {

1009

	case token.TEXT:

1010

		_, _ = name.WriteString(p.cur.Literal)

1011

		p.advance()

1012

		if p.got(token.WHITESPACE) && p.willGet(token.TEXT) && p.peek.Literal[0] != '(' {

1013

			_, _ = name.WriteString(" ")

1014

			p.advance()

1015

			_, _ = name.WriteString(p.cur.Literal)

1016

			p.advance()

1017

1018

	case token.COMMODITYMARK:

1019

		_, _ = name.WriteString(p.cur.Literal)

1020

		p.advance()

1021

		for p.got(token.TEXT) {

1022

			_, _ = name.WriteString(p.cur.Literal)

1023

			p.advance()

1024

1025

1026

	return ast.Account{Name: name.String(), Span: p.span(s)}

1027

1028

1029

func (p *Parser) parseDate() ast.Date {

1030

	s := p.cur.Span

1031

	tok, ok := p.expect(token.DATE)

1032

	if !ok {

1033

		return ast.Date{Span: p.span(s)}

1034

1035

1036

	sep := byte(0)

1037

	lit := tok.Literal

1038

	for i := 0; i < len(lit); i++ {

1039

		if lit[i] == '/' || lit[i] == '-' || lit[i] == '.' {

1040

			sep = lit[i]

1041

			break

1042

1043

1044

	if sep == 0 {

1045

		p.errorf("invalid date format: %q", lit)

1046

		return ast.Date{Span: p.span(s)}

1047

1048

1049

	parts := strings.Split(lit, string(sep))

1050

1051

	// M/D or MM/DD (year inferred)

1052

	if len(parts) == 2 {

1053

		month, err := strconv.Atoi(parts[0])

1054

		day, err2 := strconv.Atoi(parts[1])

1055

		if err != nil || err2 != nil {

1056

			p.errorf("invalid date literal: %q", lit)

1057

			return ast.Date{Span: p.span(s)}

1058

1059

		if month < 1 || month > 12 {

1060

			p.errorf("invalid month %d in %q", month, lit)

1061

			return ast.Date{Span: p.span(s)}

1062

1063

		if day < 1 || day > 31 {

1064

			p.errorf("invalid day %d in %q", day, lit)

1065

			return ast.Date{Span: p.span(s)}

1066

1067

		return ast.Date{Month: month, Day: day, Sep: sep, Span: p.span(s)}

1068

1069

1070

	if len(parts) != 3 {

1071

		p.errorf("invalid date format: %q", lit)

1072

		return ast.Date{Span: p.span(s)}

1073

1074

1075

	year, err := strconv.Atoi(parts[0])

1076

	month, err2 := strconv.Atoi(parts[1])

1077

	day, err3 := strconv.Atoi(parts[2])

1078

	if err != nil || err2 != nil || err3 != nil {

1079

		p.errorf("invalid date literal: %q", lit)

1080

		return ast.Date{Span: p.span(s)}

1081

1082

	if month < 1 || month > 12 {

1083

		p.errorf("invalid month %d in %q", month, lit)

1084

		return ast.Date{Span: p.span(s)}

1085

1086

	if day < 1 || day > 31 {

1087

		p.errorf("invalid day %d in %q", day, lit)

1088

		return ast.Date{Span: p.span(s)}

1089

1090

1091

	return ast.Date{

1092

		Year:  year,

1093

		Month: month,

1094

		Day:   day,

1095

		Sep:   sep,

1096

		Span:  p.span(s),

1097

1098

1099

1100

func (p *Parser) parseOptInlineComment() *ast.Comment {

1101

	p.skipWhitespace() // todo:

1102

	if p.cur.Type != token.SEMICOLON && p.cur.Type != token.HASH {

1103

		return nil

1104

1105

1106

	s := p.cur.Span

1107

	marker := p.cur.Literal[0]

1108

	p.advance() // consume marker

1109

	p.skipWhitespace()

1110

1111

	text := ""

1112

	if p.got(token.TEXT) {

1113

		text = p.cur.Literal

1114

		p.advance()

1115

1116

1117

	return &ast.Comment{

1118

		Marker: marker,

1119

		Text:   text,

1120

		Span:   p.span(s),

1121

1122

1123

1124

func (p *Parser) parseOptPeriodicDescription() string {

1125

	if p.cur.Type != token.WHITESPACE || len(p.cur.Literal) < 2 {

1126

		return ""

1127

1128

1129

	p.skipWhitespace()

1130

1131

	if p.cur.Type != token.TEXT {

1132

		return ""

1133

1134

1135

	return p.parseDescription()

1136

1137

1138

func (p *Parser) parseDescription() string {

1139

	var desc strings.Builder

1140

	for p.got(token.TEXT) || (p.got(token.WHITESPACE) && p.willGet(token.TEXT)) {

1141

		_, _ = desc.WriteString(p.cur.Literal)

1142

		p.advance()

1143

1144

	return desc.String()

1145

1146

1147

func (p *Parser) parseDirectiveExpr() string {

1148

	var b strings.Builder

1149

	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF && p.cur.Type != token.SEMICOLON {

1150

		_, _ = b.WriteString(p.cur.Literal)

1151

		p.advance()

1152

1153

	return b.String()

1154

1155

1156

func (p *Parser) parseQuantityInto(amt *ast.Amount) {

1157

	if p.cur.Type != token.INT && p.cur.Type != token.DECIMAL && p.cur.Type != token.TEXT {

1158

		p.errorf("expected quantity, got %s", p.cur.Type)

1159

		return

1160

1161

1162

	lit := p.cur.Literal

1163

	p.advance()

1164

1165

	// detect format metadata before normalizing

1166

	amt.QuantityFmt = detectFormat(lit)

1167

1168

	// normalize for decimal.NewFromString

1169

	// remove thousands separators, replace decimal mark with '.'

1170

	normalized := normalizeLiteral(lit, amt.QuantityFmt.Thousands, amt.QuantityFmt.Decimal)

1171

1172

	q, err := decimal.FromString(normalized)

1173

	if err != nil {

1174

		p.errorf("invalid quantity %q: %v", lit, err)

1175

		return

1176

1177

1178

	if amt.IsNegative {

1179

		q = q.Neg()

1180

1181

	amt.Quantity = q

1182

1183

1184

func (p *Parser) parseBlankLine() *ast.BlankLine {

1185

	s := p.cur.Span

1186

	p.expectNewline()

1187

	return &ast.BlankLine{Span: s}

1188

1189

1190

func (p *Parser) expectNewline() {

1191

	if p.got(token.NEWLINE) || p.got(token.EOF) {

1192

		if p.got(token.NEWLINE) {

1193

			p.advance()

1194

1195

		return

1196

1197

	p.errorf("expected %s, got %s", token.NEWLINE, p.cur.Type)

1198

1199

1200

func (p *Parser) advance() token.Token {

1201

	prev := p.cur

1202

	p.cur = p.peek

1203

	p.peek = p.lexer.Next()

1204

	return prev

1205

1206

1207

func (p *Parser) got(kind token.Type) bool     { return p.cur.Type == kind }

1208

func (p *Parser) willGet(kind token.Type) bool { return p.peek.Type == kind }

1209

1210

func (p *Parser) expect(kind token.Type) (token.Token, bool) {

1211

	if p.got(kind) {

1212

		return p.advance(), true

1213

1214

	p.errorf("expected %s, got %s", kind, p.cur.Type)

1215

	return p.cur, false

1216

1217

1218

func (p *Parser) errorf(format string, args ...any) {

1219

	p.errors = append(p.errors, &ast.ParseError{

1220

		Span:    p.cur.Span,

1221

		Message: fmt.Sprintf(format, args...),

1222

})

1223

1224

1225

func (p *Parser) sync() {

1226

	for {

1227

		switch p.cur.Type {

1228

		case token.EOF:

1229

			return

1230

		case token.NEWLINE:

1231

			p.advance()

1232

			switch p.cur.Type {

1233

			case token.DATE, token.ACCOUNT, token.COMMODITY,

1234

				token.INCLUDE, token.ALIAS, token.PAYEE,

1235

				token.TAG, token.YEAR, token.D, token.P,

1236

				token.APPLY, token.END, token.COMMENTKW,

1237

				token.DECIMALMARK, token.TILDE, token.N, token.EQ:

1238

				return

1239

1240

		default:

1241

			p.advance()

1242

1243

1244

1245

1246

func (p *Parser) syncToNextline() {

1247

	for p.cur.Type != token.NEWLINE && p.cur.Type != token.EOF {

1248

		p.advance()

1249

1250

	if p.got(token.NEWLINE) {

1251

		p.advance()

1252

1253

1254

1255

func (p *Parser) skipWhitespace() {

1256

	for p.got(token.WHITESPACE) {

1257

		p.advance()

1258

1259

1260

1261

func (p *Parser) span(s token.Span) token.Span {

1262

	return token.Span{Start: s.Start, End: p.cur.Span.Start}

1263

1264

1265

func normalizeLiteral(lit string, thousands, decimal byte) string {

1266

	var b strings.Builder

1267

	for _, ch := range []byte(lit) {

1268

		if thousands != 0 && ch == thousands {

1269

			continue // skip thousands separator

1270

1271

		if ch == decimal {

1272

			b.WriteByte('.')

1273

		} else {

1274

			b.WriteByte(ch)

1275

1276

1277

	return b.String()

1278

1279

1280

func detectFormat(lit string) ast.QuantityFormat {

1281

	var separators []int

1282

	for i, ch := range []byte(lit) {

1283

		if ch == '.' || ch == ',' || ch == ' ' || ch == '_' || ch == '\'' {

1284

			separators = append(separators, i)

1285

1286

1287

1288

	if len(separators) == 0 {

1289

		return ast.QuantityFormat{Decimal: '.', Thousands: 0, Precision: 0}

1290

1291

1292

	var decimal byte

1293

	thousands := byte(0)

1294

	precision := 0

1295

1296

	if len(separators) == 1 {

1297

		pos := separators[0]

1298

		sepChar := lit[pos]

1299

		if sepChar == ' ' || sepChar == '_' || sepChar == '\'' {

1300

			thousands = sepChar

1301

			decimal = '.' // default

1302

			precision = 0

1303

		} else {

1304

			decimal = sepChar

1305

			precision = len(lit) - pos - 1

1306

1307

	} else {

1308

		last := separators[len(separators)-1]

1309

		decimal = lit[last]

1310

		thousands = lit[separators[0]]

1311

		precision = len(lit) - last - 1

1312

1313

1314

	return ast.QuantityFormat{

1315

		Decimal:   decimal,

1316

		Thousands: thousands,

1317

		Precision: precision,

1318

1319

1320

1321

func parseSimpleDate(s string) ast.Date {

1322

	if len(s) < 8 {

1323

		return ast.Date{}

1324

1325

	sep := byte('-')

1326

	if strings.Contains(s, "/") {

1327

		sep = byte('/')

1328

	} else if strings.Contains(s, ".") {

1329

		sep = byte('.')

1330

1331

	parts := strings.Split(s, string(sep))

1332

	if len(parts) != 3 {

1333

		return ast.Date{}

1334

1335

	year, _ := strconv.Atoi(parts[0])

1336

	month, _ := strconv.Atoi(parts[1])

1337

	day, _ := strconv.Atoi(parts[2])

1338

	return ast.Date{Year: year, Month: month, Day: day, Sep: sep}

1339