scratch/brainfuck/src/gbf/internal/lexer.gleam (view raw)
| 1 | import gbf/internal/token.{type Token} |
| 2 | import gleam/list |
| 3 | import gleam/string |
| 4 | import splitter |
| 5 | |
| 6 | pub opaque type Lexer { |
| 7 | Lexer(source: String, offset: Int, newlines: splitter.Splitter) |
| 8 | } |
| 9 | |
| 10 | pub type Position { |
| 11 | /// A token position in a wile, represented as offset of bytes |
| 12 | Position(offset: Int) |
| 13 | } |
| 14 | |
| 15 | pub fn new(source) { |
| 16 | Lexer(source:, offset: 0, newlines: splitter.new(["\r\n", "\n"])) |
| 17 | } |
| 18 | |
| 19 | pub fn lex(lexer: Lexer) -> List(#(Token, Position)) { |
| 20 | do_lex(lexer, []) |
| 21 | |> list.reverse |
| 22 | } |
| 23 | |
| 24 | fn do_lex(lexer: Lexer, tokens: List(#(Token, Position))) { |
| 25 | case next(lexer) { |
| 26 | #(_, #(token.EndOfFile, _)) -> tokens |
| 27 | #(lexer, token) -> do_lex(lexer, [token, ..tokens]) |
| 28 | } |
| 29 | } |
| 30 | |
| 31 | fn next(lexer: Lexer) { |
| 32 | case lexer.source { |
| 33 | " " <> source | "\n" <> source | "\r" <> source | "\t" <> source -> |
| 34 | advance(lexer, source, 1) |> next |
| 35 | |
| 36 | ">" <> source -> token(lexer, token.IncrementPointer, source, 1) |
| 37 | "<" <> source -> token(lexer, token.DecrementPointer, source, 1) |
| 38 | "+" <> source -> token(lexer, token.IncrementByte, source, 1) |
| 39 | "-" <> source -> token(lexer, token.DecrementByte, source, 1) |
| 40 | "." <> source -> token(lexer, token.OutputByte, source, 1) |
| 41 | "," <> source -> token(lexer, token.InputByte, source, 1) |
| 42 | "[" <> source -> token(lexer, token.StartBlock, source, 1) |
| 43 | "]" <> source -> token(lexer, token.EndBlock, source, 1) |
| 44 | |
| 45 | _ -> |
| 46 | case string.pop_grapheme(lexer.source) { |
| 47 | Error(_) -> #(lexer, #(token.EndOfFile, Position(lexer.offset))) |
| 48 | Ok(_) -> comment(lexer, lexer.offset) |
| 49 | } |
| 50 | } |
| 51 | } |
| 52 | |
| 53 | fn advance(lexer, source, offset) { |
| 54 | Lexer(..lexer, source:, offset: lexer.offset + offset) |
| 55 | } |
| 56 | |
| 57 | fn advanced( |
| 58 | token: #(Token, Position), |
| 59 | lexer: Lexer, |
| 60 | source: String, |
| 61 | offset: Int, |
| 62 | ) -> #(Lexer, #(Token, Position)) { |
| 63 | #(advance(lexer, source, offset), token) |
| 64 | } |
| 65 | |
| 66 | fn token( |
| 67 | lexer: Lexer, |
| 68 | token: Token, |
| 69 | source: String, |
| 70 | offset: Int, |
| 71 | ) -> #(Lexer, #(Token, Position)) { |
| 72 | #(token, Position(offset: lexer.offset)) |
| 73 | |> advanced(lexer, source, offset) |
| 74 | } |
| 75 | |
| 76 | fn comment(lexer: Lexer, start: Int) -> #(Lexer, #(Token, Position)) { |
| 77 | let #(prefix, suffix) = splitter.split_before(lexer.newlines, lexer.source) |
| 78 | let eaten = string.byte_size(prefix) |
| 79 | let lexer = advance(lexer, suffix, eaten) |
| 80 | |
| 81 | #(lexer, #(token.Comment(prefix), Position(start))) |
| 82 | } |