all repos

scratch @ master

⭐ me doing recreational ~~drugs~~ programming

scratch/brainfuck/src/gbf/internal/lexer.gleam (view raw)

1
import gbf/internal/token.{type Token}
2
import gleam/list
3
import gleam/string
4
import splitter
5
6
pub opaque type Lexer {
7
  Lexer(source: String, offset: Int, newlines: splitter.Splitter)
8
}
9
10
pub type Position {
11
  /// A token position in a wile, represented as offset of bytes
12
  Position(offset: Int)
13
}
14
15
pub fn new(source) {
16
  Lexer(source:, offset: 0, newlines: splitter.new(["\r\n", "\n"]))
17
}
18
19
pub fn lex(lexer: Lexer) -> List(#(Token, Position)) {
20
  do_lex(lexer, [])
21
  |> list.reverse
22
}
23
24
fn do_lex(lexer: Lexer, tokens: List(#(Token, Position))) {
25
  case next(lexer) {
26
    #(_, #(token.EndOfFile, _)) -> tokens
27
    #(lexer, token) -> do_lex(lexer, [token, ..tokens])
28
  }
29
}
30
31
fn next(lexer: Lexer) {
32
  case lexer.source {
33
    " " <> source | "\n" <> source | "\r" <> source | "\t" <> source ->
34
      advance(lexer, source, 1) |> next
35
36
    ">" <> source -> token(lexer, token.IncrementPointer, source, 1)
37
    "<" <> source -> token(lexer, token.DecrementPointer, source, 1)
38
    "+" <> source -> token(lexer, token.IncrementByte, source, 1)
39
    "-" <> source -> token(lexer, token.DecrementByte, source, 1)
40
    "." <> source -> token(lexer, token.OutputByte, source, 1)
41
    "," <> source -> token(lexer, token.InputByte, source, 1)
42
    "[" <> source -> token(lexer, token.StartBlock, source, 1)
43
    "]" <> source -> token(lexer, token.EndBlock, source, 1)
44
45
    _ ->
46
      case string.pop_grapheme(lexer.source) {
47
        Error(_) -> #(lexer, #(token.EndOfFile, Position(lexer.offset)))
48
        Ok(_) -> comment(lexer, lexer.offset)
49
      }
50
  }
51
}
52
53
fn advance(lexer, source, offset) {
54
  Lexer(..lexer, source:, offset: lexer.offset + offset)
55
}
56
57
fn advanced(
58
  token: #(Token, Position),
59
  lexer: Lexer,
60
  source: String,
61
  offset: Int,
62
) -> #(Lexer, #(Token, Position)) {
63
  #(advance(lexer, source, offset), token)
64
}
65
66
fn token(
67
  lexer: Lexer,
68
  token: Token,
69
  source: String,
70
  offset: Int,
71
) -> #(Lexer, #(Token, Position)) {
72
  #(token, Position(offset: lexer.offset))
73
  |> advanced(lexer, source, offset)
74
}
75
76
fn comment(lexer: Lexer, start: Int) -> #(Lexer, #(Token, Position)) {
77
  let #(prefix, suffix) = splitter.split_before(lexer.newlines, lexer.source)
78
  let eaten = string.byte_size(prefix)
79
  let lexer = advance(lexer, suffix, eaten)
80
81
  #(lexer, #(token.Comment(prefix), Position(start)))
82
}