gbf/src/gbf/lexer.gleam(view raw)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import gbf/token.{type Token}
import gleam/list
import gleam/string
import splitter
pub opaque type Lexer {
Lexer(source: String, offset: Int, newlines: splitter.Splitter)
}
pub type Position {
Position(offset: Int)
}
pub fn new(source) {
Lexer(source:, offset: 0, newlines: splitter.new(["\r\n", "\n"]))
}
pub fn lex(lexer: Lexer) -> List(#(Token, Position)) {
do_lex(lexer, [])
|> list.reverse
}
fn do_lex(lexer: Lexer, tokens: List(#(Token, Position))) {
case next(lexer) {
#(_, #(token.EOF, _)) -> tokens
#(lexer, token) -> do_lex(lexer, [token, ..tokens])
}
}
fn next(lexer: Lexer) {
case lexer.source {
" " <> source | "\n" <> source | "\r" <> source | "\t" <> source ->
advance(lexer, source, 1) |> next
">" <> source -> token(lexer, token.IncrementPointer, source, 1)
"<" <> source -> token(lexer, token.DecrementPointer, source, 1)
"+" <> source -> token(lexer, token.IncrementByte, source, 1)
"-" <> source -> token(lexer, token.DecrementByte, source, 1)
"." <> source -> token(lexer, token.OutputByte, source, 1)
"," <> source -> token(lexer, token.InputByte, source, 1)
"[" <> source -> token(lexer, token.StartBlock, source, 1)
"]" <> source -> token(lexer, token.EndBlock, source, 1)
_ ->
case string.pop_grapheme(lexer.source) {
Error(_) -> #(lexer, #(token.EOF, Position(lexer.offset)))
Ok(_) -> comment(lexer, lexer.offset)
}
}
}
fn advance(lexer, source, offset) {
Lexer(..lexer, source:, offset: lexer.offset + offset)
}
fn advanced(
token: #(Token, Position),
lexer: Lexer,
source: String,
offset: Int,
) -> #(Lexer, #(Token, Position)) {
#(advance(lexer, source, offset), token)
}
fn token(
lexer: Lexer,
token: Token,
source: String,
offset: Int,
) -> #(Lexer, #(Token, Position)) {
#(token, Position(offset: lexer.offset))
|> advanced(lexer, source, offset)
}
fn comment(lexer: Lexer, start: Int) -> #(Lexer, #(Token, Position)) {
let #(prefix, suffix) = splitter.split_before(lexer.newlines, lexer.source)
let eaten = string.byte_size(prefix)
let lexer = advance(lexer, suffix, eaten)
#(lexer, #(token.Comment(prefix), Position(start)))
}
|