From eb681dc7a1e3e21527c9b699a73631902988c8cf Mon Sep 17 00:00:00 2001 From: David Gonzalez Martin Date: Sun, 12 Nov 2023 14:55:20 -0600 Subject: [PATCH] implement and, or, xor --- src/Compilation.zig | 3 + src/backend/intermediate_representation.zig | 10 +- src/backend/x86_64.zig | 186 +++++++++++++++++++- src/frontend/lexical_analyzer.zig | 39 +++- src/frontend/semantic_analyzer.zig | 18 +- src/frontend/syntactic_analyzer.zig | 92 +++++++++- test/and/main.nat | 6 + test/or/main.nat | 6 + test/xor/main.nat | 7 + 9 files changed, 339 insertions(+), 28 deletions(-) create mode 100644 test/and/main.nat create mode 100644 test/or/main.nat create mode 100644 test/xor/main.nat diff --git a/src/Compilation.zig b/src/Compilation.zig index 3a6d77e..66fd506 100644 --- a/src/Compilation.zig +++ b/src/Compilation.zig @@ -549,6 +549,9 @@ pub const BinaryOperation = struct { const Id = enum { add, sub, + logical_and, + logical_xor, + logical_or, }; }; diff --git a/src/backend/intermediate_representation.zig b/src/backend/intermediate_representation.zig index 73d877b..17ade03 100644 --- a/src/backend/intermediate_representation.zig +++ b/src/backend/intermediate_representation.zig @@ -235,6 +235,9 @@ pub const BinaryOperation = struct { const Id = enum { add, sub, + logical_and, + logical_xor, + logical_or, }; pub const List = BlockList(@This()); @@ -731,6 +734,9 @@ pub const Builder = struct { .id = switch (sema_binary_operation.id) { .add => .add, .sub => .sub, + .logical_and => .logical_and, + .logical_xor => .logical_xor, + .logical_or => .logical_or, }, .type = try builder.translateType(sema_binary_operation.type), }); @@ -912,16 +918,18 @@ pub const Builder = struct { .integer => |integer| try builder.processInteger(integer), .call => |call_index| try builder.processCall(call_index), .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + .binary_operation => |binary_operation_index| try builder.processBinaryOperation(binary_operation_index), else => |t| @panic(@tagName(t)), }; } - fn emitBinaryOperationOperand(builder: *Builder, binary_operation_index: Compilation.Value.Index) !Instruction.Index { + fn emitBinaryOperationOperand(builder: *Builder, binary_operation_index: Compilation.Value.Index) anyerror!Instruction.Index { const value = builder.ir.module.values.get(binary_operation_index); return switch (value.*) { .integer => |integer| try builder.processInteger(integer), .call => |call_index| try builder.processCall(call_index), .declaration_reference => |declaration_reference| try builder.loadDeclarationReference(declaration_reference.value), + .binary_operation => |boi| try builder.processBinaryOperation(boi), else => |t| @panic(@tagName(t)), }; } diff --git a/src/backend/x86_64.zig b/src/backend/x86_64.zig index a97c433..1ee621e 100644 --- a/src/backend/x86_64.zig +++ b/src/backend/x86_64.zig @@ -1487,6 +1487,8 @@ const Instruction = struct { add32rr, add32rm, add32mr, + and32rm, + and32rr, call64pcrel32, copy, lea64r, @@ -1501,12 +1503,15 @@ const Instruction = struct { mov32mi, movsx64rm32, movsx64rr32, + or32rm, + or32rr, ret, sub32rr, sub32rm, syscall, ud2, xor32rr, + xor32rm, }; pub const Descriptor = struct { @@ -1853,6 +1858,42 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri }, }, }, + .and32rr = .{ + // .format = .mrm_dest_reg, // right? + .opcode = 0x21, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + .{ + .id = .gp32, + .kind = .src, + }, + }, + }, + .and32rm = .{ + // .format = .mrm_dest_reg, // right? + .opcode = 0x23, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + .{ + .id = .i32mem, + .kind = .src, + }, + }, + }, .call64pcrel32 = .{ // .format = .no_operands, .opcode = 0xe8, @@ -2040,6 +2081,42 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri }, }, }, + .or32rm = .{ + // .format = .mrm_dest_reg, + .opcode = 0x0b, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + .{ + .id = .i32mem, + .kind = .src, + }, + }, + }, + .or32rr = .{ + // .format = .mrm_dest_reg, + .opcode = 0x09, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + .{ + .id = .gp32, + .kind = .src, + }, + }, + }, .ret = .{ // .format = .no_operands, .opcode = 0xc3, @@ -2102,6 +2179,24 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .two_byte_prefix = true, }, }, + .xor32rm = .{ + // .format = .mrm_dest_reg, + .opcode = 0x33, + .operands = &.{ + .{ + .id = .gp32, + .kind = .dst, + }, + .{ + .id = .gp32, + .kind = .src, + }, + .{ + .id = .i32mem, + .kind = .src, + }, + }, + }, .xor32rr = .{ // .format = .mrm_dest_reg, .opcode = 0x31, @@ -2114,6 +2209,10 @@ const instruction_descriptors = std.EnumArray(Instruction.Id, Instruction.Descri .id = .gp32, .kind = .src, }, + .{ + .id = .gp32, + .kind = .src, + }, }, }, }); @@ -2826,12 +2925,76 @@ pub const MIR = struct { const destination_register = try instruction_selection.getRegisterForValue(mir, ir_instruction_index); const value_type = resolveType(ir_binary_operation.type); - if (is_right_load and !is_left_load) { + if (!is_left_load and is_right_load) { unreachable; - } else if (!is_right_load and is_left_load) { - unreachable; - } else if (!is_right_load and !is_left_load) { + } else if (is_left_load and !is_right_load) { unreachable; + } else if (!is_left_load and !is_right_load) { + const instruction_id: Instruction.Id = switch (ir_binary_operation.id) { + .add => switch (value_type) { + .i32 => .add32rr, + else => unreachable, + }, + .sub => switch (value_type) { + .i32 => .sub32rr, + else => unreachable, + }, + .logical_and => switch (value_type) { + .i32 => .and32rr, + else => unreachable, + }, + .logical_xor => switch (value_type) { + .i32 => .xor32rr, + else => unreachable, + }, + .logical_or => switch (value_type) { + .i32 => .or32rr, + else => unreachable, + }, + }; + + const instruction_descriptor = instruction_descriptors.get(instruction_id); + const left_register = try instruction_selection.getRegisterForValue(mir, ir_binary_operation.left); + const right_register = try instruction_selection.getRegisterForValue(mir, ir_binary_operation.right); + const destination_operand_id = instruction_descriptor.operands[0].id; + const left_operand_id = instruction_descriptor.operands[1].id; + + const right_operand_id = instruction_descriptor.operands[2].id; + + const destination_operand = Operand{ + .id = destination_operand_id, + .u = .{ + .register = destination_register, + }, + .flags = .{ + .type = .def, + }, + }; + + const left_operand = Operand{ + .id = left_operand_id, + .u = .{ + .register = left_register, + }, + .flags = .{}, + }; + + const right_operand = Operand{ + .id = right_operand_id, + .u = .{ + .register = right_register, + }, + .flags = .{}, + }; + const binary_op_instruction = try mir.buildInstruction(instruction_selection, instruction_id, &.{ + destination_operand, + left_operand, + right_operand, + }); + + try instruction_selection.instruction_cache.append(mir.allocator, binary_op_instruction); + + try instruction_selection.updateValueMap(mir.allocator, ir_instruction_index, destination_register, false); } else { // If both operands come from memory (both operands are loads), load the left one into a register and operate from the stack with the right one, when possible const instruction_id: Instruction.Id = switch (ir_binary_operation.id) { @@ -2843,6 +3006,18 @@ pub const MIR = struct { .i32 => .sub32rm, else => unreachable, }, + .logical_and => switch (value_type) { + .i32 => .and32rm, + else => unreachable, + }, + .logical_xor => switch (value_type) { + .i32 => .xor32rm, + else => unreachable, + }, + .logical_or => switch (value_type) { + .i32 => .or32rm, + else => unreachable, + }, }; try instruction_selection.folded_loads.putNoClobber(mir.allocator, ir_binary_operation.right, {}); @@ -2852,7 +3027,6 @@ pub const MIR = struct { const destination_operand_id = instruction_descriptor.operands[0].id; const left_operand_id = instruction_descriptor.operands[1].id; const right_operand_id = instruction_descriptor.operands[2].id; - assert(right_operand_id == .i32mem); const ir_load = mir.ir.loads.get(mir.ir.instructions.get(ir_binary_operation.right).u.load); const right_operand_addressing_mode = instruction_selection.getAddressingModeFromIr(mir, ir_load.instruction); @@ -4457,7 +4631,7 @@ pub const MIR = struct { }; try image.section_manager.appendCodeByte(@bitCast(modrm)); }, - .add32rm, .sub32rm => { + .add32rm, .sub32rm, .and32rm, .xor32rm, .or32rm => { assert(instruction.operands.items.len == 3); const instruction_descriptor = instruction_descriptors.get(instruction.id); const opcode: u8 = @intCast(instruction_descriptor.opcode); diff --git a/src/frontend/lexical_analyzer.zig b/src/frontend/lexical_analyzer.zig index ca22468..b926b9e 100644 --- a/src/frontend/lexical_analyzer.zig +++ b/src/frontend/lexical_analyzer.zig @@ -115,8 +115,13 @@ pub const Result = struct { pub const Logger = enum { main, + new_token, + number_literals, - pub var bitset = std.EnumSet(Logger).initEmpty(); + pub var bitset = std.EnumSet(Logger).initMany(&.{ + // .new_token, + .number_literals, + }); }; pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) !Result { @@ -164,12 +169,27 @@ pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) ! inline else => |comptime_fixed_keyword| @field(Token.Id, "fixed_keyword_" ++ @tagName(comptime_fixed_keyword)), } else .identifier; }, - '(', ')', '{', '}', '[', ']', '=', ';', '#', '@', ',', '.', ':', '>', '<', '!', '+', '-', '*', '\\', '/' => |operator| blk: { + '(', ')', '{', '}', '[', ']', '=', ';', '#', '@', ',', '.', ':', '>', '<', '!', '+', '-', '*', '\\', '/', '&', '|', '^' => |operator| blk: { index += 1; break :blk @enumFromInt(operator); }, '0'...'9' => blk: { - while (text[index] >= '0' and text[index] <= '9') { + // Detect other non-decimal literals + if (text[index] == '0' and index + 1 < text.len) { + logln(.lexer, .number_literals, "Number starts with 0. Checking for non-decimal literals...", .{}); + if (text[index + 1] == 'x') { + logln(.lexer, .number_literals, "Hex", .{}); + index += 2; + } else if (text[index + 1] == 'b') { + logln(.lexer, .number_literals, "Bin", .{}); + index += 2; + } else if (text[index + 1] == 'o') { + logln(.lexer, .number_literals, "Decimal", .{}); + index += 2; + } + } + + while (text[index] >= '0' and text[index] <= '9' or text[index] >= 'a' and text[index] <= 'f' or text[index] >= 'A' and text[index] <= 'F') { index += 1; } @@ -197,18 +217,21 @@ pub fn analyze(allocator: Allocator, text: []const u8, file_index: File.Index) ! index += 1; continue; }, - else => |foo| { - std.debug.panic("NI: '{c}'", .{foo}); + else => |ch| { + std.debug.panic("NI: '{c}'", .{ch}); }, }; const end_index = index; - - try tokens.append(allocator, .{ + const token = Token{ .start = @intCast(start_index), .len = @intCast(end_index - start_index), .id = token_id, - }); + }; + + logln(.lexer, .new_token, "New token {s} added: {s}", .{ @tagName(token.id), text[token.start..][0..token.len] }); + + try tokens.append(allocator, token); } for (tokens.items, 0..) |token, i| { diff --git a/src/frontend/semantic_analyzer.zig b/src/frontend/semantic_analyzer.zig index a9895fa..2c671bd 100644 --- a/src/frontend/semantic_analyzer.zig +++ b/src/frontend/semantic_analyzer.zig @@ -540,6 +540,9 @@ const Analyzer = struct { .id = switch (node.id) { .add => .add, .sub => .sub, + .logical_and => .logical_and, + .logical_xor => .logical_xor, + .logical_or => .logical_or, else => |t| @panic(@tagName(t)), }, }); @@ -1007,7 +1010,11 @@ const Analyzer = struct { .@"return" => try analyzer.processReturn(scope_index, expect_type, node_index), .add, .sub, + .logical_and, + .logical_xor, + .logical_or, => try analyzer.processBinaryOperation(scope_index, expect_type, node_index), + .expression_group => return try analyzer.resolveNode(value, scope_index, expect_type, node.left), //unreachable, else => |t| @panic(@tagName(t)), }; } @@ -1469,12 +1476,15 @@ const Analyzer = struct { }, .integer => |destination_int| switch (source_type.*) { .integer => |source_int| { - if (destination_int.getSize() < source_int.getSize()) { - @panic("Destination integer type is smaller than sourcE"); - } else if (destination_int.getSize() > source_int.getSize()) { + const dst_size = destination_int.getSize(); + const src_size = source_int.getSize(); + logln(.sema, .typecheck, "Dst size: {}. Src size: {}", .{ dst_size, src_size }); + if (dst_size < src_size) { + @panic("Destination integer type is smaller than source"); + } else if (dst_size > src_size) { unreachable; } else { - unreachable; + return TypeCheckResult.success; } }, .comptime_int => return TypeCheckResult.success, diff --git a/src/frontend/syntactic_analyzer.zig b/src/frontend/syntactic_analyzer.zig index 6e53136..f484991 100644 --- a/src/frontend/syntactic_analyzer.zig +++ b/src/frontend/syntactic_analyzer.zig @@ -38,7 +38,18 @@ pub const Logger = enum { precedence, @"switch", - pub var bitset = std.EnumSet(Logger).initEmpty(); + pub var bitset = std.EnumSet(Logger).initMany(&.{ + .token_errors, + .symbol_declaration, + .node_creation, + .main_node, + .container_members, + .block, + .assign, + .suffix, + .precedence, + .@"switch", + }); }; // TODO: pack it to be more efficient @@ -143,6 +154,10 @@ pub const Node = packed struct(u128) { function_prototype = 60, add = 61, sub = 62, + logical_and = 63, + logical_xor = 64, + expression_group = 65, + logical_or = 66, }; }; @@ -171,7 +186,7 @@ const Analyzer = struct { const result = token_i; return result; } else { - logln(.parser, .token_errors, "Unexpected token {s} when expected {s}\n", .{ @tagName(token.id), @tagName(token_id) }); + logln(.parser, .token_errors, "Unexpected token {s} when expected {s}\n| |\n v \n```\n{s}\n```", .{ @tagName(token.id), @tagName(token_id), analyzer.source_file[token.start..] }); return error.unexpected_token; } } @@ -699,6 +714,9 @@ const Analyzer = struct { compare_not_equal, add, sub, + logical_and, + logical_xor, + logical_or, }; const operator_precedence = std.EnumArray(PrecedenceOperator, i32).init(.{ @@ -706,6 +724,9 @@ const Analyzer = struct { .compare_not_equal = 30, .add = 60, .sub = 60, + .logical_and = 40, + .logical_xor = 40, + .logical_or = 40, }); const operator_associativity = std.EnumArray(PrecedenceOperator, Associativity).init(.{ @@ -713,12 +734,19 @@ const Analyzer = struct { .compare_not_equal = .none, .add = .left, .sub = .left, + .logical_and = .left, + .logical_xor = .left, + .logical_or = .left, }); + const operator_node_id = std.EnumArray(PrecedenceOperator, Node.Id).init(.{ .compare_equal = .compare_equal, .compare_not_equal = .compare_not_equal, .add = .add, .sub = .sub, + .logical_and = .logical_and, + .logical_xor = .logical_xor, + .logical_or = .logical_or, }); fn expressionPrecedence(analyzer: *Analyzer, minimum_precedence: i32) !Node.Index { @@ -734,7 +762,16 @@ const Analyzer = struct { const token = analyzer.tokens[analyzer.token_i]; // logln("Looping in expression precedence with token {}\n", .{token}); const operator: PrecedenceOperator = switch (token.id) { - .equal, .semicolon, .right_parenthesis, .right_brace, .comma, .period, .fixed_keyword_const, .fixed_keyword_var => break, + .equal, + .semicolon, + .right_parenthesis, + .right_brace, + .comma, + .period, + .fixed_keyword_const, + .fixed_keyword_var, + .identifier, + => break, else => blk: { const next_token_index = analyzer.token_i + 1; if (next_token_index < analyzer.tokens.len) { @@ -758,6 +795,18 @@ const Analyzer = struct { .equal => unreachable, else => .sub, }, + .ampersand => switch (next_token_id) { + .equal => unreachable, + else => .logical_and, + }, + .caret => switch (next_token_id) { + .equal => unreachable, + else => .logical_xor, + }, + .vertical_bar => switch (next_token_id) { + .equal => unreachable, + else => .logical_or, + }, else => |t| @panic(@tagName(t)), }; } else { @@ -781,6 +830,9 @@ const Analyzer = struct { const extra_token = switch (operator) { .add, .sub, + .logical_and, + .logical_xor, + .logical_or, => false, .compare_equal, .compare_not_equal, @@ -844,7 +896,19 @@ const Analyzer = struct { .colon => unreachable, else => try analyzer.curlySuffixExpression(), }, - .string_literal, .number_literal, .fixed_keyword_true, .fixed_keyword_false, .hash, .fixed_keyword_unreachable, .fixed_keyword_switch, .period, .fixed_keyword_enum, .keyword_signed_integer, .keyword_unsigned_integer => try analyzer.curlySuffixExpression(), + .string_literal, + .number_literal, + .fixed_keyword_true, + .fixed_keyword_false, + .hash, + .fixed_keyword_unreachable, + .fixed_keyword_switch, + .period, + .fixed_keyword_enum, + .keyword_signed_integer, + .keyword_unsigned_integer, + .left_parenthesis, + => try analyzer.curlySuffixExpression(), .fixed_keyword_fn => try analyzer.function(), .fixed_keyword_return => try analyzer.addNode(.{ .id = .@"return", @@ -1028,6 +1092,7 @@ const Analyzer = struct { fn primaryTypeExpression(analyzer: *Analyzer) !Node.Index { const token_i = analyzer.token_i; const token = analyzer.tokens[token_i]; + return try switch (token.id) { .string_literal => blk: { analyzer.token_i += 1; @@ -1157,11 +1222,20 @@ const Analyzer = struct { .right = Node.Index.invalid, }); }, - else => |foo| { - switch (foo) { - .identifier => std.debug.panic("{s}: {s}", .{ @tagName(foo), analyzer.bytes(token_i) }), - else => @panic(@tagName(foo)), - } + .left_parenthesis => blk: { + analyzer.token_i += 1; + const expr = try analyzer.expression(); + _ = try analyzer.expectToken(.right_parenthesis); + break :blk try analyzer.addNode(.{ + .id = .expression_group, + .token = token_i, + .left = expr, + .right = Node.Index.invalid, + }); + }, + else => |t| switch (t) { + .identifier => std.debug.panic("{s}: {s}", .{ @tagName(t), analyzer.bytes(token_i) }), + else => @panic(@tagName(t)), }, }; } diff --git a/test/and/main.nat b/test/and/main.nat new file mode 100644 index 0000000..264cab0 --- /dev/null +++ b/test/and/main.nat @@ -0,0 +1,6 @@ +const main = fn() s32 { + var a: s32 = 5; + var b: s32 = 4; + var result = a & b; + return result - b; +} diff --git a/test/or/main.nat b/test/or/main.nat new file mode 100644 index 0000000..ea5f2e6 --- /dev/null +++ b/test/or/main.nat @@ -0,0 +1,6 @@ +const main = fn() s32 { + const a: u32 = 0xffff; + const b: u32 = 0xffff0000; + const c: u32 = 0xffffffff; + return c - (a | b); +} diff --git a/test/xor/main.nat b/test/xor/main.nat new file mode 100644 index 0000000..dbf8f52 --- /dev/null +++ b/test/xor/main.nat @@ -0,0 +1,7 @@ +const main = fn() s32 { + var a: s32 = 561; + var b: s32 = 124; + var c: s32 = a ^ b; + var d: s32 = a ^ b; + return c ^ d; +}