Merge pull request #3 from birth-software/progress

Introduce the general structure of the compiler
This commit is contained in:
David 2023-07-29 11:05:34 -06:00 committed by GitHub
commit 30931dc6f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 937 additions and 327 deletions

3
src/data_structures.zig Normal file
View File

@ -0,0 +1,3 @@
const std = @import("std");
pub const ArrayList = std.ArrayListUnmanaged;

398
src/emit.zig Normal file
View File

@ -0,0 +1,398 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const log = std.log;
const page_size = std.mem.page_size;
const assert = std.debug.assert;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
const ir = @import("ir.zig");
pub const Result = struct {
pub fn free(result: *Result, allocator: Allocator) void {
_ = allocator;
_ = result;
}
};
const Section = struct {
content: []align(page_size) u8,
index: usize = 0,
};
const Image = struct {
sections: struct {
text: Section,
rodata: Section,
data: Section,
},
entry_point: u32 = 0,
fn create() !Image {
return Image{
.sections = .{
.text = .{ .content = try mmap(page_size, .{ .executable = true }) },
.rodata = .{ .content = try mmap(page_size, .{ .executable = false }) },
.data = .{ .content = try mmap(page_size, .{ .executable = false }) },
},
};
}
fn destroy(image: *Image) void {
inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| {
std.os.munmap(@field(image.sections, field_name).content);
}
}
inline fn mmap(size: usize, flags: packed struct {
executable: bool,
}) ![]align(page_size) u8 {
const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0;
const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE;
return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
}
fn appendCode(image: *Image, code: []const u8) void {
const destination = image.sections.text.content[image.sections.text.index..][0..code.len];
@memcpy(destination, code);
image.sections.text.index += code.len;
}
fn appendCodeByte(image: *Image, code_byte: u8) void {
image.sections.text.content[image.sections.text.index] = code_byte;
image.sections.text.index += 1;
}
fn getEntryPoint(image: *const Image, comptime Function: type) *const Function {
comptime {
assert(@typeInfo(Function) == .Fn);
}
assert(image.sections.text.content.len > 0);
return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point]));
}
};
const Rex = enum(u8) {
b = upper_4_bits | (1 << 0),
x = upper_4_bits | (1 << 1),
r = upper_4_bits | (1 << 2),
w = upper_4_bits | (1 << 3),
const upper_4_bits = 0b100_0000;
};
const GPRegister = enum(u4) {
a = 0,
c = 1,
d = 2,
b = 3,
sp = 4,
bp = 5,
si = 6,
di = 7,
r8 = 8,
r9 = 9,
r10 = 10,
r11 = 11,
r12 = 12,
r13 = 13,
r14 = 14,
r15 = 15,
};
pub const BasicGPRegister = enum(u3) {
a = 0,
c = 1,
d = 2,
b = 3,
sp = 4,
bp = 5,
si = 6,
di = 7,
};
const prefix_lock = 0xf0;
const prefix_repne_nz = 0xf2;
const prefix_rep = 0xf3;
const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)};
const prefix_16_bit_operand = [1]u8{0x66};
const ret = 0xc3;
const mov_a_imm = [1]u8{0xb8};
const mov_reg_imm8: u8 = 0xb0;
inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
comptime {
assert(@typeInfo(@TypeOf(integer)) == .Int);
}
return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer));
}
inline fn movAImm(image: *Image, integer: anytype) void {
const T = @TypeOf(integer);
image.appendCode(&(switch (T) {
u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)},
u16, i16 => prefix_16_bit_operand ++ mov_a_imm,
u32, i32 => mov_a_imm,
u64, i64 => prefix_rex_w ++ mov_a_imm,
else => @compileError("Unsupported"),
} ++ intToArrayOfBytes(integer)));
}
test "ret void" {
var image = try Image.create();
defer image.destroy();
image.appendCodeByte(ret);
const function_pointer = image.getEntryPoint(fn () callconv(.C) void);
function_pointer();
}
const integer_types_to_test = [_]type{ u8, u16, u32, u64, i8, i16, i32, i64 };
fn getMaxInteger(comptime T: type) T {
comptime {
assert(@typeInfo(T) == .Int);
}
return switch (@typeInfo(T).Int.signedness) {
.unsigned => std.math.maxInt(T),
.signed => std.math.minInt(T),
};
}
test "ret integer" {
inline for (integer_types_to_test) |Int| {
var image = try Image.create();
defer image.destroy();
const expected_number = getMaxInteger(Int);
movAImm(&image, expected_number);
image.appendCodeByte(ret);
const function_pointer = image.getEntryPoint(fn () callconv(.C) Int);
const result = function_pointer();
try expect(result == expected_number);
}
}
const LastByte = packed struct(u8) {
dst: BasicGPRegister,
src: BasicGPRegister,
always_on: u2 = 0b11,
};
fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
dstRmSrcR(image, T, .mov, dst, src);
}
fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void {
const last_byte: u8 = @bitCast(LastByte{
.dst = dst,
.src = src,
});
const opcode_byte = @intFromEnum(opcode);
const bytes = switch (T) {
u8, i8 => blk: {
const base = [_]u8{ opcode_byte - 1, last_byte };
if (@intFromEnum(dst) >= @intFromEnum(BasicGPRegister.sp) or @intFromEnum(src) >= @intFromEnum(BasicGPRegister.sp)) {
image.appendCodeByte(0x40);
}
break :blk base;
},
u16, i16 => prefix_16_bit_operand ++ .{ opcode_byte, last_byte },
u32, i32 => .{ opcode_byte, last_byte },
u64, i64 => prefix_rex_w ++ .{ opcode_byte, last_byte },
else => @compileError("Not supported"),
};
image.appendCode(&bytes);
}
test "ret integer argument" {
inline for (integer_types_to_test) |Int| {
var image = try Image.create();
defer image.destroy();
const number = getMaxInteger(Int);
movRmR(&image, Int, .a, .di);
image.appendCodeByte(ret);
const functionPointer = image.getEntryPoint(fn (Int) callconv(.C) Int);
const result = functionPointer(number);
try expectEqual(number, result);
}
}
var r = std.rand.Pcg.init(0xffffffffffffffff);
fn getRandomNumberRange(comptime T: type, min: T, max: T) T {
const random = r.random();
return switch (@typeInfo(T).Int.signedness) {
.signed => random.intRangeAtMost(T, min, max),
.unsigned => random.uintAtMost(T, max),
};
}
fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
dstRmSrcR(image, T, .sub, dst, src);
}
test "ret sub arguments" {
inline for (integer_types_to_test) |Int| {
var image = try Image.create();
defer image.destroy();
const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2);
const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a);
movRmR(&image, Int, .a, .di);
subRmR(&image, Int, .a, .si);
image.appendCodeByte(ret);
const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(.C) Int);
const result = functionPointer(a, b);
try expectEqual(a - b, result);
}
}
const OpcodeRmR = enum(u8) {
add = 0x01,
@"or" = 0x09,
@"and" = 0x21,
sub = 0x29,
xor = 0x31,
@"test" = 0x85,
mov = 0x89,
};
test "test binary operations" {
inline for (integer_types_to_test) |T| {
const test_cases = [_]TestIntegerBinaryOperation(T){
.{
.opcode = .add,
.callback = struct {
fn callback(a: T, b: T) T {
return @addWithOverflow(a, b)[0];
}
}.callback,
},
.{
.opcode = .sub,
.callback = struct {
fn callback(a: T, b: T) T {
return @subWithOverflow(a, b)[0];
}
}.callback,
},
.{
.opcode = .@"or",
.callback = struct {
fn callback(a: T, b: T) T {
return a | b;
}
}.callback,
},
.{
.opcode = .@"and",
.callback = struct {
fn callback(a: T, b: T) T {
return a & b;
}
}.callback,
},
.{
.opcode = .xor,
.callback = struct {
fn callback(a: T, b: T) T {
return a ^ b;
}
}.callback,
},
};
for (test_cases) |test_case| {
try test_case.runTest();
}
}
}
fn TestIntegerBinaryOperation(comptime T: type) type {
const should_log = false;
return struct {
callback: *const fn (a: T, b: T) T,
opcode: OpcodeRmR,
pub fn runTest(test_case: @This()) !void {
for (0..10) |_| {
var image = try Image.create();
defer image.destroy();
errdefer image.destroy();
const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2);
const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a);
movRmR(&image, T, .a, .di);
dstRmSrcR(&image, T, test_case.opcode, .a, .si);
image.appendCodeByte(ret);
const functionPointer = image.getEntryPoint(fn (T, T) callconv(.C) T);
const expected = test_case.callback(a, b);
const result = functionPointer(a, b);
if (should_log) {
log.err("{s} {}, {} ({})", .{ @tagName(test_case.opcode), a, b, T });
}
try expectEqual(expected, result);
}
}
};
}
test "call after" {
var image = try Image.create();
defer image.destroy();
errdefer image.destroy();
const jump_patch_offset = image.sections.text.index + 1;
image.appendCode(&.{ 0xe8, 0x00, 0x00, 0x00, 0x00 });
const jump_source = image.sections.text.index;
image.appendCodeByte(ret);
const jump_target = image.sections.text.index;
@as(*align(1) u32, @ptrCast(&image.sections.text.content[jump_patch_offset])).* = @intCast(jump_target - jump_source);
image.appendCodeByte(ret);
const functionPointer = image.getEntryPoint(fn () callconv(.C) void);
functionPointer();
}
test "call before" {
var image = try Image.create();
defer image.destroy();
errdefer image.destroy();
const first_jump_patch_offset = image.sections.text.index + 1;
const first_call = .{0xe8} ++ .{ 0x00, 0x00, 0x00, 0x00 };
image.appendCode(&first_call);
const first_jump_source = image.sections.text.index;
image.appendCodeByte(ret);
const second_jump_target = image.sections.text.index;
image.appendCodeByte(ret);
const first_jump_target = image.sections.text.index;
@as(*align(1) i32, @ptrCast(&image.sections.text.content[first_jump_patch_offset])).* = @intCast(first_jump_target - first_jump_source);
const second_call = .{0xe8} ++ @as([4]u8, @bitCast(@as(i32, @intCast(@as(i64, @intCast(second_jump_target)) - @as(i64, @intCast(image.sections.text.index + 5))))));
image.appendCode(&second_call);
image.appendCodeByte(ret);
const functionPointer = image.getEntryPoint(fn () callconv(.C) void);
functionPointer();
}
pub fn runTest(allocator: Allocator, ir_result: *const ir.Result) !Result {
_ = allocator;
for (ir_result.functions.items) |*function| {
_ = function;
}
return Result{};
}

9
src/fs.zig Normal file
View File

@ -0,0 +1,9 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
pub const first = "src/test/main.b";
pub fn readFile(allocator: Allocator, file_relative_path: []const u8) ![]const u8 {
const file = try std.fs.cwd().readFileAlloc(allocator, file_relative_path, std.math.maxInt(usize));
return file;
}

142
src/ir.zig Normal file
View File

@ -0,0 +1,142 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const equal = std.mem.eql;
const data_structures = @import("data_structures.zig");
const ArrayList = data_structures.ArrayList;
const parser = @import("parser.zig");
const void_type = Type{
.id = .void,
};
const Type = struct {
id: Id,
inline fn isPrimitive(T: Type) bool {
return switch (T.id) {
.void => true,
};
}
const Id = enum {
void,
};
};
const Error = error{
type_mismatch,
internal,
arguments_not_used,
};
const TopLevelDeclaration = struct {
type: Id,
index: u31,
const Id = enum {
function,
expression,
};
};
const Instruction = struct {
id: Id,
index: u16,
const Id = enum {
ret_void,
};
};
const ret_void = Instruction{
.id = .ret_void,
.index = 0,
};
const ret = struct {
is_type: bool,
};
const Function = struct {
instructions: ArrayList(Instruction),
return_type: Type,
};
pub const Result = struct {
top_level_declarations: ArrayList(TopLevelDeclaration),
functions: ArrayList(Function),
pub fn free(result: *Result, allocator: Allocator) void {
for (result.functions.items) |*function| {
function.instructions.clearAndFree(allocator);
}
result.functions.clearAndFree(allocator);
result.top_level_declarations.clearAndFree(allocator);
}
};
const Analyzer = struct {
parser: *const parser.Result,
top_level_declarations: ArrayList(TopLevelDeclaration),
functions: ArrayList(Function),
allocator: Allocator,
fn analyze(allocator: Allocator, parser_result: *const parser.Result) Error!Result {
var analyzer = Analyzer{
.parser = parser_result,
.top_level_declarations = ArrayList(TopLevelDeclaration){},
.allocator = allocator,
.functions = ArrayList(Function){},
};
for (parser_result.functions.items) |ast_function| {
if (ast_function.statements.items.len != 0) {
for (ast_function.statements.items) |statement| {
_ = statement;
@panic("TODO: statement");
}
} else {
if (ast_function.arguments.items.len != 0) {
return Error.arguments_not_used;
}
try analyzer.expectPrimitiveType(void_type, ast_function.return_type);
const function_index = analyzer.functions.items.len;
var function = Function{
.instructions = ArrayList(Instruction){},
.return_type = void_type,
};
function.instructions.append(allocator, ret_void) catch return Error.internal;
analyzer.top_level_declarations.append(allocator, TopLevelDeclaration{
.type = .function,
.index = @intCast(function_index),
}) catch return Error.internal;
analyzer.functions.append(allocator, function) catch return Error.internal;
}
}
return .{
.top_level_declarations = analyzer.top_level_declarations,
.functions = analyzer.functions,
};
}
fn expectPrimitiveType(analyzer: *Analyzer, comptime type_value: Type, type_identifier_id: u32) Error!void {
assert(type_value.isPrimitive());
const type_identifier = analyzer.parser.strings.get(type_identifier_id) orelse return Error.internal;
if (!equal(u8, @tagName(type_value.id), type_identifier)) {
return Error.type_mismatch;
}
}
};
pub fn runTest(allocator: Allocator, parser_result: *const parser.Result) !Result {
return Analyzer.analyze(allocator, parser_result);
}

156
src/lexer.zig Normal file
View File

@ -0,0 +1,156 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const log = std.log;
const data_structures = @import("data_structures.zig");
const ArrayList = data_structures.ArrayList;
const fs = @import("fs.zig");
pub inline fn rdtsc() u64 {
var edx: u32 = undefined;
var eax: u32 = undefined;
asm volatile (
\\rdtsc
: [eax] "={eax}" (eax),
[edx] "={edx}" (edx),
);
return @as(u64, edx) << 32 | eax;
}
inline fn rdtscFast() u32 {
return asm volatile (
\\rdtsc
: [eax] "={eax}" (-> u32),
:
: "edx"
);
}
const vector_byte_count = 16;
// These two actually take less space due to how Zig handles bool as u1
const VBool = @Vector(vector_byte_count, bool);
const VU1 = @Vector(vector_byte_count, u1);
const VU8 = @Vector(vector_byte_count, u8);
inline fn vand(v1: VBool, v2: VBool) VBool {
return @bitCast(@as(VU1, @bitCast(v1)) & @as(VU1, @bitCast(v2)));
}
inline fn byteMask(n: u8) VU8 {
return @splat(n);
}
inline fn endOfIdentifier(ch: u8) bool {
// TODO: complete
return ch == ' ' or ch == '(' or ch == ')';
}
const Identifier = struct {
start: u32,
end: u32,
};
pub const TokenId = enum {
identifier,
special_character,
};
pub const SpecialCharacter = enum(u8) {
arrow = 0,
left_parenthesis = '(',
right_parenthesis = ')',
left_brace = '{',
right_brace = '}',
};
pub const Result = struct {
identifiers: ArrayList(Identifier),
special_characters: ArrayList(SpecialCharacter),
ids: ArrayList(TokenId),
file: []const u8,
time: u64 = 0,
pub fn free(result: *Result, allocator: Allocator) void {
result.identifiers.clearAndFree(allocator);
result.special_characters.clearAndFree(allocator);
result.ids.clearAndFree(allocator);
allocator.free(result.file);
}
};
fn lex(allocator: Allocator, text: []const u8) !Result {
const time_start = std.time.Instant.now() catch unreachable;
var index: usize = 0;
var result = Result{
.identifiers = try ArrayList(Identifier).initCapacity(allocator, text.len),
.special_characters = try ArrayList(SpecialCharacter).initCapacity(allocator, text.len),
.ids = try ArrayList(TokenId).initCapacity(allocator, text.len),
.file = text,
};
defer {
const time_end = std.time.Instant.now() catch unreachable;
result.time = time_end.since(time_start);
}
while (index < text.len) {
const first_char = text[index];
switch (first_char) {
'a'...'z', 'A'...'Z', '_' => {
const start = index;
// SIMD this
while (!endOfIdentifier(text[index])) {
index += 1;
}
result.identifiers.appendAssumeCapacity(.{
.start = @intCast(start),
.end = @intCast(index),
});
result.ids.appendAssumeCapacity(.identifier);
},
'(', ')', '{', '}' => |special_character| {
result.special_characters.appendAssumeCapacity(@enumFromInt(special_character));
result.ids.appendAssumeCapacity(.special_character);
index += 1;
},
' ', '\n' => index += 1,
'-' => {
if (text[index + 1] == '>') {
result.special_characters.appendAssumeCapacity(.arrow);
result.ids.appendAssumeCapacity(.special_character);
index += 2;
} else {
@panic("TODO");
}
},
else => {
index += 1;
},
}
}
return result;
}
pub fn runTest(allocator: Allocator, file: []const u8) !Result {
const result = try lex(allocator, file);
return result;
}
test "lexer" {
const allocator = std.testing.allocator;
const file_path = fs.first;
const file = try fs.readFile(allocator, file_path);
var result = try runTest(allocator, file);
defer result.free(allocator);
}

View File

@ -1,341 +1,45 @@
const std = @import("std");
const log = std.log;
const page_size = std.mem.page_size;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const expect = std.testing.expect;
const expectEqual = std.testing.expectEqual;
const Section = struct {
content: []align(page_size) u8,
index: usize = 0,
};
const fs = @import("fs.zig");
const Image = struct {
sections: struct {
text: Section,
rodata: Section,
data: Section,
},
entry_point: u32 = 0,
const lexer = @import("lexer.zig");
const parser = @import("parser.zig");
const ir = @import("ir.zig");
const emit = @import("emit.zig");
fn create() !Image {
return Image{
.sections = .{
.text = .{ .content = try mmap(page_size, .{ .executable = true }) },
.rodata = .{ .content = try mmap(page_size, .{ .executable = false }) },
.data = .{ .content = try mmap(page_size, .{ .executable = false }) },
},
pub const seed = std.math.maxInt(u64);
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
const allocator = gpa.allocator();
try behaviorTest(allocator, fs.first);
}
fn behaviorTest(allocator: Allocator, file_relative_path: []const u8) !void {
const file = try fs.readFile(allocator, file_relative_path);
var lexer_result = try lexer.runTest(allocator, file);
defer lexer_result.free(allocator);
var parser_result = parser.runTest(allocator, &lexer_result) catch |err| {
std.log.err("Lexer took {} ns", .{lexer_result.time});
return err;
};
}
fn destroy(image: *Image) void {
inline for (comptime std.meta.fieldNames(@TypeOf(image.sections))) |field_name| {
std.os.munmap(@field(image.sections, field_name).content);
}
}
inline fn mmap(size: usize, flags: packed struct {
executable: bool,
}) ![]align(page_size) u8 {
const protection_flags = std.os.PROT.READ | std.os.PROT.WRITE | if (flags.executable) std.os.PROT.EXEC else 0;
const mmap_flags = std.os.MAP.ANONYMOUS | std.os.MAP.PRIVATE;
return std.os.mmap(null, size, protection_flags, mmap_flags, -1, 0);
}
fn appendCode(image: *Image, code: []const u8) void {
const destination = image.sections.text.content[image.sections.text.index..][0..code.len];
@memcpy(destination, code);
image.sections.text.index += code.len;
}
fn appendCodeByte(image: *Image, code_byte: u8) void {
image.sections.text.content[image.sections.text.index] = code_byte;
image.sections.text.index += 1;
}
fn getEntryPoint(image: *const Image, comptime Function: type) *const Function {
comptime {
assert(@typeInfo(Function) == .Fn);
}
assert(image.sections.text.content.len > 0);
return @as(*const Function, @ptrCast(&image.sections.text.content[image.entry_point]));
}
};
const Rex = enum(u8) {
b = upper_4_bits | (1 << 0),
x = upper_4_bits | (1 << 1),
r = upper_4_bits | (1 << 2),
w = upper_4_bits | (1 << 3),
const upper_4_bits = 0b100_0000;
};
const GPRegister = enum(u4) {
a = 0,
c = 1,
d = 2,
b = 3,
sp = 4,
bp = 5,
si = 6,
di = 7,
r8 = 8,
r9 = 9,
r10 = 10,
r11 = 11,
r12 = 12,
r13 = 13,
r14 = 14,
r15 = 15,
};
pub const BasicGPRegister = enum(u3) {
a = 0,
c = 1,
d = 2,
b = 3,
sp = 4,
bp = 5,
si = 6,
di = 7,
};
const prefix_lock = 0xf0;
const prefix_repne_nz = 0xf2;
const prefix_rep = 0xf3;
const prefix_rex_w = [1]u8{@intFromEnum(Rex.w)};
const prefix_16_bit_operand = [1]u8{0x66};
const ret = 0xc3;
const mov_a_imm = [1]u8{0xb8};
const mov_reg_imm8: u8 = 0xb0;
inline fn intToArrayOfBytes(integer: anytype) [@sizeOf(@TypeOf(integer))]u8 {
comptime {
assert(@typeInfo(@TypeOf(integer)) == .Int);
}
return @as([@sizeOf(@TypeOf(integer))]u8, @bitCast(integer));
defer parser_result.free(allocator);
var ir_result = try ir.runTest(allocator, &parser_result);
defer ir_result.free(allocator);
var emit_result = try emit.runTest(allocator, &ir_result);
defer emit_result.free(allocator);
}
inline fn movAImm(image: *Image, integer: anytype) void {
const T = @TypeOf(integer);
image.appendCode(&(switch (T) {
u8, i8 => .{mov_reg_imm8 | @intFromEnum(GPRegister.a)},
u16, i16 => prefix_16_bit_operand ++ mov_a_imm,
u32, i32 => mov_a_imm,
u64, i64 => prefix_rex_w ++ mov_a_imm,
else => @compileError("Unsupported"),
} ++ intToArrayOfBytes(integer)));
test {
_ = lexer;
_ = parser;
_ = ir;
_ = emit;
}
test "ret void" {
var image = try Image.create();
defer image.destroy();
image.appendCodeByte(ret);
const function_pointer = image.getEntryPoint(fn () callconv(.C) void);
function_pointer();
}
const integer_types_to_test = [_]type{ u8, u16, u32, u64, i8, i16, i32, i64 };
fn getMaxInteger(comptime T: type) T {
comptime {
assert(@typeInfo(T) == .Int);
}
return switch (@typeInfo(T).Int.signedness) {
.unsigned => std.math.maxInt(T),
.signed => std.math.minInt(T),
};
}
test "ret integer" {
inline for (integer_types_to_test) |Int| {
var image = try Image.create();
defer image.destroy();
const expected_number = getMaxInteger(Int);
movAImm(&image, expected_number);
image.appendCodeByte(ret);
const function_pointer = image.getEntryPoint(fn () callconv(.C) Int);
const result = function_pointer();
try expect(result == expected_number);
}
}
const LastByte = packed struct(u8) {
dst: BasicGPRegister,
src: BasicGPRegister,
always_on: u2 = 0b11,
};
fn movRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
dstRmSrcR(image, T, .mov, dst, src);
}
fn dstRmSrcR(image: *Image, comptime T: type, opcode: OpcodeRmR, dst: BasicGPRegister, src: BasicGPRegister) void {
const last_byte: u8 = @bitCast(LastByte{
.dst = dst,
.src = src,
});
const opcode_byte = @intFromEnum(opcode);
const bytes = switch (T) {
u8, i8 => blk: {
const base = [_]u8{ opcode_byte - 1, last_byte };
if (@intFromEnum(dst) >= @intFromEnum(BasicGPRegister.sp) or @intFromEnum(src) >= @intFromEnum(BasicGPRegister.sp)) {
image.appendCodeByte(0x40);
}
break :blk base;
},
u16, i16 => prefix_16_bit_operand ++ .{ opcode_byte, last_byte },
u32, i32 => .{ opcode_byte, last_byte },
u64, i64 => prefix_rex_w ++ .{ opcode_byte, last_byte },
else => @compileError("Not supported"),
};
image.appendCode(&bytes);
}
test "ret integer argument" {
inline for (integer_types_to_test) |Int| {
var image = try Image.create();
defer image.destroy();
const number = getMaxInteger(Int);
movRmR(&image, Int, .a, .di);
image.appendCodeByte(ret);
const functionPointer = image.getEntryPoint(fn (Int) callconv(.C) Int);
const result = functionPointer(number);
try expectEqual(number, result);
}
}
var r = std.rand.Pcg.init(0xffffffffffffffff);
fn getRandomNumberRange(comptime T: type, min: T, max: T) T {
const random = r.random();
return switch (@typeInfo(T).Int.signedness) {
.signed => random.intRangeAtMost(T, min, max),
.unsigned => random.uintAtMost(T, max),
};
}
fn subRmR(image: *Image, comptime T: type, dst: BasicGPRegister, src: BasicGPRegister) void {
dstRmSrcR(image, T, .sub, dst, src);
}
test "ret sub arguments" {
inline for (integer_types_to_test) |Int| {
var image = try Image.create();
defer image.destroy();
const a = getRandomNumberRange(Int, std.math.minInt(Int) / 2, std.math.maxInt(Int) / 2);
const b = getRandomNumberRange(Int, std.math.minInt(Int) / 2, a);
movRmR(&image, Int, .a, .di);
subRmR(&image, Int, .a, .si);
image.appendCodeByte(ret);
const functionPointer = image.getEntryPoint(fn (Int, Int) callconv(.C) Int);
const result = functionPointer(a, b);
try expectEqual(a - b, result);
}
}
const OpcodeRmR = enum(u8) {
add = 0x01,
@"or" = 0x09,
@"and" = 0x21,
sub = 0x29,
xor = 0x31,
@"test" = 0x85,
mov = 0x89,
};
test "test binary operations" {
inline for (integer_types_to_test) |T| {
const test_cases = [_]TestIntegerBinaryOperation(T){
.{
.opcode = .add,
.callback = struct {
fn callback(a: T, b: T) T {
return @addWithOverflow(a, b)[0];
}
}.callback,
},
.{
.opcode = .sub,
.callback = struct {
fn callback(a: T, b: T) T {
return @subWithOverflow(a, b)[0];
}
}.callback,
},
.{
.opcode = .@"or",
.callback = struct {
fn callback(a: T, b: T) T {
return a | b;
}
}.callback,
},
.{
.opcode = .@"and",
.callback = struct {
fn callback(a: T, b: T) T {
return a & b;
}
}.callback,
},
.{
.opcode = .xor,
.callback = struct {
fn callback(a: T, b: T) T {
return a ^ b;
}
}.callback,
},
};
for (test_cases) |test_case| {
try test_case.runTest();
}
}
}
fn TestIntegerBinaryOperation(comptime T: type) type {
const should_log = false;
return struct {
callback: *const fn (a: T, b: T) T,
opcode: OpcodeRmR,
pub fn runTest(test_case: @This()) !void {
for (0..10) |_| {
var image = try Image.create();
defer image.destroy();
errdefer image.destroy();
const a = getRandomNumberRange(T, std.math.minInt(T) / 2, std.math.maxInt(T) / 2);
const b = getRandomNumberRange(T, std.math.minInt(T) / 2, a);
movRmR(&image, T, .a, .di);
dstRmSrcR(&image, T, test_case.opcode, .a, .si);
image.appendCodeByte(ret);
const functionPointer = image.getEntryPoint(fn (T, T) callconv(.C) T);
const expected = test_case.callback(a, b);
const result = functionPointer(a, b);
if (should_log) {
log.err("{s} {}, {} ({})", .{ @tagName(test_case.opcode), a, b, T });
}
try expectEqual(expected, result);
}
}
};
test "behavior test 1" {
const allocator = std.testing.allocator;
try behaviorTest(allocator, fs.first);
}

195
src/parser.zig Normal file
View File

@ -0,0 +1,195 @@
const std = @import("std");
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const data_structures = @import("data_structures.zig");
const ArrayList = data_structures.ArrayList;
const lexer = @import("lexer.zig");
pub const Result = struct {
functions: ArrayList(Function),
strings: StringMap,
pub fn free(result: *Result, allocator: Allocator) void {
result.functions.clearAndFree(allocator);
result.strings.clearAndFree(allocator);
}
};
const PeekResult = union(lexer.TokenId) {
special_character: lexer.SpecialCharacter,
identifier: []const u8,
};
const Function = struct {
name: u32,
return_type: u32,
arguments: ArrayList(Argument),
statements: ArrayList(Statement),
const Argument = struct {
foo: u32 = 0,
};
};
const Statement = struct {
foo: u32 = 0,
};
const StringMap = std.AutoHashMapUnmanaged(u32, []const u8);
const Parser = struct {
id_index: u32 = 0,
identifier_index: u32 = 0,
special_character_index: u32 = 0,
strings: StringMap,
allocator: Allocator,
functions: ArrayList(Function),
fn parse(parser: *Parser, lexer_result: *const lexer.Result) !Result {
while (parser.id_index < lexer_result.ids.items.len) {
try parser.parseTopLevelDeclaration(lexer_result);
}
return Result{
.functions = parser.functions,
.strings = parser.strings,
};
}
fn parseFunction(parser: *Parser, lexer_result: *const lexer.Result, name: u32) !Function {
assert(lexer_result.special_characters.items[parser.special_character_index] == .left_parenthesis);
parser.consume(lexer_result, .special_character);
while (true) {
if (parser.expectSpecialCharacter(lexer_result, .right_parenthesis)) {
break;
} else |_| {}
return error.not_implemented;
}
try parser.expectSpecialCharacter(lexer_result, .arrow);
const return_type_identifier = try parser.expectIdentifier(lexer_result);
try parser.expectSpecialCharacter(lexer_result, .left_brace);
while (true) {
if (parser.expectSpecialCharacter(lexer_result, .right_brace)) {
break;
} else |_| {}
return error.not_implemented;
}
return Function{
.name = name,
.statements = ArrayList(Statement){},
.arguments = ArrayList(Function.Argument){},
.return_type = return_type_identifier,
};
}
inline fn consume(parser: *Parser, lexer_result: *const lexer.Result, comptime token_id: lexer.TokenId) void {
assert(lexer_result.ids.items[parser.id_index] == token_id);
parser.id_index += 1;
switch (token_id) {
.special_character => parser.special_character_index += 1,
.identifier => parser.identifier_index += 1,
}
}
fn parseTopLevelDeclaration(parser: *Parser, lexer_result: *const lexer.Result) !void {
const top_level_identifier = try parser.expectIdentifier(lexer_result);
const next_token = parser.peek(lexer_result);
switch (next_token) {
.special_character => |special_character| switch (special_character) {
.left_parenthesis => {
const function = try parser.parseFunction(lexer_result, top_level_identifier);
try parser.functions.append(parser.allocator, function);
},
else => return error.not_implemented,
},
.identifier => |identifier| {
_ = identifier;
return error.not_implemented;
},
}
}
inline fn peek(parser: *const Parser, lexer_result: *const lexer.Result) PeekResult {
return switch (lexer_result.ids.items[parser.id_index]) {
.special_character => .{
.special_character = lexer_result.special_characters.items[parser.special_character_index],
},
.identifier => .{
.identifier = blk: {
const identifier_range = lexer_result.identifiers.items[parser.identifier_index];
break :blk lexer_result.file[identifier_range.start .. identifier_range.start + identifier_range.end];
},
},
};
}
fn expectSpecialCharacter(parser: *Parser, lexer_result: *const lexer.Result, expected: lexer.SpecialCharacter) !void {
const token_id = lexer_result.ids.items[parser.id_index];
if (token_id != .special_character) {
return error.expected_special_character;
}
defer parser.id_index += 1;
const special_character = lexer_result.special_characters.items[parser.special_character_index];
if (special_character != expected) {
return error.expected_different_special_character;
}
parser.special_character_index += 1;
}
fn acceptSpecialCharacter() void {}
fn expectIdentifier(parser: *Parser, lexer_result: *const lexer.Result) !u32 {
const token_id = lexer_result.ids.items[parser.id_index];
if (token_id != .identifier) {
return Error.expected_identifier;
}
parser.id_index += 1;
const identifier_range = lexer_result.identifiers.items[parser.identifier_index];
parser.identifier_index += 1;
const identifier = lexer_result.file[identifier_range.start..identifier_range.end];
const Hash = std.hash.Wyhash;
const seed = @intFromPtr(identifier.ptr);
var hasher = Hash.init(seed);
std.hash.autoHash(&hasher, identifier.ptr);
const hash = hasher.final();
const truncated_hash: u32 = @truncate(hash);
try parser.strings.put(parser.allocator, truncated_hash, identifier);
return truncated_hash;
}
const Error = error{
expected_identifier,
expected_special_character,
expected_different_special_character,
not_implemented,
};
};
pub fn runTest(allocator: Allocator, lexer_result: *const lexer.Result) !Result {
var parser = Parser{
.allocator = allocator,
.strings = StringMap{},
.functions = ArrayList(Function){},
};
return parser.parse(lexer_result) catch |err| {
std.log.err("error: {}", .{err});
return err;
};
}

3
src/test/main.b Normal file
View File

@ -0,0 +1,3 @@
main() -> void {
}