1.2 scan stuff
last updated: Oct 20, 2023
const std = @import("std");
const TokenType = enum {
// single-character tokens
left_paren,
right_paren,
left_brace,
right_brace,
comma,
dot,
minus,
plus,
semicolon,
slash,
star,
// 1-2 char tokens
bang,
bang_equal,
equal,
equal_equal,
greater,
greater_equal,
less,
less_equal,
// literals
identifier,
string,
number,
// keywords
keyword_and,
keyword_class,
keyword_else,
keyword_false,
keyword_fun,
keyword_for,
keyword_if,
keyword_nil,
keyword_or,
keyword_print,
keyword_return,
keyword_super,
keyword_this,
keyword_true,
keyword_var,
keyword_while,
eof,
};
const Lexemes = enum {
bytes,
int,
float,
empty,
};
const Token = struct {
typ: TokenType,
// I think the natural way to do a zig token would be to return a slice
// into the buffer? But instead I'm going to follow the book and represent
// the lexeme as a union
lexeme: union(Lexemes) {
bytes: []const u8,
int: i64,
float: f64,
empty: void,
},
line: u64,
};
const TokenList = std.ArrayList(*Token);
const Scanner = struct {
allocator: std.mem.Allocator,
buf: []const u8,
start: u64,
current: u64,
line: u64,
tokens: TokenList,
pub fn init(allocator: std.mem.Allocator, buf: []const u8) Scanner {
return Scanner{
.allocator = allocator,
.buf = buf,
.start = 0,
.current = 0,
.line = 1,
.tokens = TokenList.init(allocator),
};
}
pub fn scan(self: *Scanner) !TokenList {
while (self.current < self.buf.len) {
self.start = self.current;
try self.next();
}
try self.tokens.append(&Token{ .typ = .eof, .lexeme = .empty, .line = self.line });
return self.tokens;
}
pub fn next(self: *Scanner) !void {
switch (self.buf[self.current]) {
'(' => {
try self.addTok(.left_paren);
},
')' => {
try self.addTok(.right_paren);
},
'{' => {
try self.addTok(.left_brace);
},
'}' => {
try self.addTok(.right_brace);
},
',' => {
try self.addTok(.comma);
},
'.' => {
try self.addTok(.dot);
},
'-' => {
try self.addTok(.minus);
},
'+' => {
try self.addTok(.plus);
},
';' => {
try self.addTok(.semicolon);
},
'*' => {
try self.addTok(.star);
},
else => {},
}
self.current += 1;
}
fn addTok(self: *Scanner, typ: TokenType) !void {
var tok = try self.allocator.create(Token);
tok.* = Token{ .typ = typ, .lexeme = .empty, .line = 1 };
try self.tokens.append(tok);
}
};
fn run(allocator: std.mem.Allocator, buf: []const u8) void {
std.debug.print("file contents: {s}", .{buf});
var scanner = Scanner.init(allocator, buf);
var tokens = scanner.scan();
// TODO: better token printer
std.debug.print("tokens: {s}\n", .{tokens});
}
fn runFile(allocator: std.mem.Allocator, path: []const u8) !void {
std.debug.print("reading file {s}\n", .{path});
var input = try std.fs.cwd().readFileAlloc(allocator, path, std.math.maxInt(usize));
defer allocator.free(input);
run(allocator, input);
}
fn runPrompt(allocator: std.mem.Allocator) !void {
std.debug.print("run the interpreter\n", .{});
const stdin = std.io.getStdIn().reader();
const stderr = std.io.getStdErr().writer();
var repl_buf: [1024]u8 = undefined;
if (stdin.readUntilDelimiterOrEof(&repl_buf, '\n') catch |err| {
try stderr.print("\nUnable to parse command: {s}\n", .{@errorName(err)});
return;
}) |line| {
run(allocator, line);
}
}
pub fn main() anyerror!void {
var general_purpose_allocator = std.heap.GeneralPurposeAllocator(.{}){};
const gpa = general_purpose_allocator.allocator();
const args = try std.process.argsAlloc(gpa);
defer std.process.argsFree(gpa, args);
// args[0] is our executable
if (args.len > 2) {
std.debug.print("Usage: lexzical [script]\n", .{});
} else if (args.len == 2) {
try runFile(gpa, args[1]);
} else {
try runPrompt(gpa);
}
}