all repos

gbf @ 374a8c7

⭐ gleaming brainfuck

gbf/src/gbf/lexer.gleam(view raw)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import gbf/token.{type Token}
import gleam/list
import gleam/string
import splitter

pub opaque type Lexer {
  Lexer(source: String, offset: Int, newlines: splitter.Splitter)
}

pub type Position {
  Position(offset: Int)
}

pub fn new(source) {
  Lexer(source:, offset: 0, newlines: splitter.new(["\r\n", "\n"]))
}

pub fn lex(lexer: Lexer) -> List(#(Token, Position)) {
  do_lex(lexer, [])
  |> list.reverse
}

fn do_lex(lexer: Lexer, tokens: List(#(Token, Position))) {
  case next(lexer) {
    #(_, #(token.EOF, _)) -> tokens
    #(lexer, token) -> do_lex(lexer, [token, ..tokens])
  }
}

fn next(lexer: Lexer) {
  case lexer.source {
    " " <> source | "\n" <> source | "\r" <> source | "\t" <> source ->
      advance(lexer, source, 1) |> next

    ">" <> source -> token(lexer, token.IncrementPointer, source, 1)
    "<" <> source -> token(lexer, token.DecrementPointer, source, 1)
    "+" <> source -> token(lexer, token.IncrementByte, source, 1)
    "-" <> source -> token(lexer, token.DecrementByte, source, 1)
    "." <> source -> token(lexer, token.OutputByte, source, 1)
    "," <> source -> token(lexer, token.InputByte, source, 1)
    "[" <> source -> token(lexer, token.StartBlock, source, 1)
    "]" <> source -> token(lexer, token.EndBlock, source, 1)

    _ ->
      case string.pop_grapheme(lexer.source) {
        Error(_) -> #(lexer, #(token.EOF, Position(lexer.offset)))
        Ok(_) -> comment(lexer, lexer.offset)
      }
  }
}

fn advance(lexer, source, offset) {
  Lexer(..lexer, source:, offset: lexer.offset + offset)
}

fn advanced(
  token: #(Token, Position),
  lexer: Lexer,
  source: String,
  offset: Int,
) -> #(Lexer, #(Token, Position)) {
  #(advance(lexer, source, offset), token)
}

fn token(
  lexer: Lexer,
  token: Token,
  source: String,
  offset: Int,
) -> #(Lexer, #(Token, Position)) {
  #(token, Position(offset: lexer.offset))
  |> advanced(lexer, source, offset)
}

fn comment(lexer: Lexer, start: Int) -> #(Lexer, #(Token, Position)) {
  let #(prefix, suffix) = splitter.split_before(lexer.newlines, lexer.source)
  let eaten = string.byte_size(prefix)
  let lexer = advance(lexer, suffix, eaten)

  #(lexer, #(token.Comment(prefix), Position(start)))
}