Merge pull request #19 from birth-software/schedule-early
GCM schedule early
This commit is contained in:
commit
26459d631d
18
.github/workflows/ci.yml
vendored
18
.github/workflows/ci.yml
vendored
@ -18,12 +18,12 @@ jobs:
|
||||
- name: Build and test
|
||||
run: |
|
||||
./run_tests.sh
|
||||
macos_build_and_test:
|
||||
runs-on: macos-latest
|
||||
timeout-minutes: 15
|
||||
steps:
|
||||
- name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
- name: Build and test
|
||||
run: |
|
||||
./run_tests.sh
|
||||
# macos_build_and_test:
|
||||
# runs-on: macos-latest
|
||||
# timeout-minutes: 15
|
||||
# steps:
|
||||
# - name: Checkout
|
||||
# uses: actions/checkout@v4
|
||||
# - name: Build and test
|
||||
# run: |
|
||||
# ./run_tests.sh
|
||||
|
311
bootstrap/main.c
311
bootstrap/main.c
@ -1308,6 +1308,9 @@ fn String file_read(Arena* arena, String path)
|
||||
assert(read_result >= 0);
|
||||
assert((u64)read_result == file_size);
|
||||
|
||||
auto close_result = syscall_close(file_descriptor);
|
||||
assert(close_result == 0);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -1655,6 +1658,14 @@ typedef struct NodeIndex NodeIndex;
|
||||
declare_slice(NodeIndex);
|
||||
decl_vb(NodeIndex);
|
||||
|
||||
struct NodeCFG
|
||||
{
|
||||
s32 immediate_dominator_tree_depth;
|
||||
s32 loop_depth;
|
||||
s32 anti_dependency;
|
||||
};
|
||||
typedef struct NodeCFG NodeCFG;
|
||||
|
||||
struct NodeConstant
|
||||
{
|
||||
TypeIndex type;
|
||||
@ -1663,10 +1674,17 @@ typedef struct NodeConstant NodeConstant;
|
||||
|
||||
struct NodeStart
|
||||
{
|
||||
NodeCFG cfg;
|
||||
TypeIndex arguments;
|
||||
};
|
||||
typedef struct NodeStart NodeStart;
|
||||
|
||||
struct NodeStop
|
||||
{
|
||||
NodeCFG cfg;
|
||||
};
|
||||
typedef struct NodeStop NodeStop;
|
||||
|
||||
struct ScopePair
|
||||
{
|
||||
StringMap values;
|
||||
@ -1688,14 +1706,6 @@ struct NodeScope
|
||||
};
|
||||
typedef struct NodeScope NodeScope;
|
||||
|
||||
struct NodeCFG
|
||||
{
|
||||
s32 immediate_dominator_tree_depth;
|
||||
s32 loop_depth;
|
||||
s32 anti_dependency;
|
||||
};
|
||||
typedef struct NodeCFG NodeCFG;
|
||||
|
||||
struct NodeProjection
|
||||
{
|
||||
String label;
|
||||
@ -1710,6 +1720,18 @@ struct NodeControlProjection
|
||||
};
|
||||
typedef struct NodeControlProjection NodeControlProjection;
|
||||
|
||||
struct NodeReturn
|
||||
{
|
||||
NodeCFG cfg;
|
||||
};
|
||||
typedef struct NodeReturn NodeReturn;
|
||||
|
||||
struct NodeDeadControl
|
||||
{
|
||||
NodeCFG cfg;
|
||||
};
|
||||
typedef struct NodeDeadControl NodeDeadControl;
|
||||
|
||||
struct Node
|
||||
{
|
||||
Hash hash;
|
||||
@ -1729,8 +1751,11 @@ struct Node
|
||||
{
|
||||
NodeConstant constant;
|
||||
NodeStart start;
|
||||
NodeStop stop;
|
||||
NodeScope scope;
|
||||
NodeControlProjection control_projection;
|
||||
NodeReturn return_node;
|
||||
NodeDeadControl dead_control;
|
||||
};
|
||||
};
|
||||
typedef struct Node Node;
|
||||
@ -1824,6 +1849,13 @@ fn void bitset_set_assert_unset(Bitset* bitset, u64 index)
|
||||
bitset->arr.pointer[element_index] |= 1 << bit_index;
|
||||
}
|
||||
|
||||
fn void bitset_clear(Bitset* bitset)
|
||||
{
|
||||
memset(bitset->arr.pointer, 0, bitset->arr.capacity);
|
||||
bitset->arr.length = 0;
|
||||
bitset->length = 0;
|
||||
}
|
||||
|
||||
struct WorkList
|
||||
{
|
||||
VirtualBuffer(NodeIndex) nodes;
|
||||
@ -1872,6 +1904,13 @@ struct Thread
|
||||
};
|
||||
typedef struct Thread Thread;
|
||||
|
||||
fn void thread_worklist_clear(Thread* thread)
|
||||
{
|
||||
bitset_clear(&thread->worklist.visited);
|
||||
bitset_clear(&thread->worklist.bitset);
|
||||
thread->worklist.nodes.length = 0;
|
||||
}
|
||||
|
||||
fn Type* thread_type_get(Thread* thread, TypeIndex type_index)
|
||||
{
|
||||
assert(validi(type_index));
|
||||
@ -1993,11 +2032,15 @@ fn NodeIndex node_add_output(Thread* thread, NodeIndex node_index, NodeIndex out
|
||||
return node_index;
|
||||
}
|
||||
|
||||
fn void node_unlock(Thread* thread, Node* node)
|
||||
fn NodeIndex intern_pool_remove_node(Thread* thread, NodeIndex node_index);
|
||||
fn void node_unlock(Thread* thread, NodeIndex node_index)
|
||||
{
|
||||
auto* node = thread_node_get(thread, node_index);
|
||||
if (node->hash)
|
||||
{
|
||||
trap();
|
||||
auto old_node_index = intern_pool_remove_node(thread, node_index);
|
||||
assert(index_equal(old_node_index, node_index));
|
||||
node->hash = 0;
|
||||
}
|
||||
}
|
||||
|
||||
@ -2066,7 +2109,7 @@ fn NodeIndex node_set_input(Thread* thread, NodeIndex node_index, u16 index, Nod
|
||||
{
|
||||
auto* node = thread_node_get(thread, node_index);
|
||||
assert(index < node->input_count);
|
||||
node_unlock(thread, node);
|
||||
node_unlock(thread, node_index);
|
||||
auto old_input = node_input_get(thread, node, index);
|
||||
|
||||
if (!index_equal(old_input, new_input))
|
||||
@ -2099,8 +2142,8 @@ fn NodeIndex builder_set_control(Thread* thread, FunctionBuilder* builder, NodeI
|
||||
|
||||
fn NodeIndex node_add_input(Thread* thread, NodeIndex node_index, NodeIndex input_index)
|
||||
{
|
||||
node_unlock(thread, node_index);
|
||||
Node* this_node = thread_node_get(thread, node_index);
|
||||
node_unlock(thread, this_node);
|
||||
node_add_one(thread, &this_node->input_offset, &this_node->input_capacity, &this_node->input_count, input_index);
|
||||
if (validi(input_index))
|
||||
{
|
||||
@ -2169,8 +2212,8 @@ fn void scope_push(Thread* thread, FunctionBuilder* builder)
|
||||
|
||||
fn void node_pop_inputs(Thread* thread, NodeIndex node_index, u16 input_count)
|
||||
{
|
||||
node_unlock(thread, node_index);
|
||||
auto* node = thread_node_get(thread, node_index);
|
||||
node_unlock(thread, node);
|
||||
auto inputs = node_get_inputs(thread, node);
|
||||
for (u16 i = 0; i < input_count; i += 1)
|
||||
{
|
||||
@ -2725,7 +2768,7 @@ fn Hash hash_type(Thread* thread, Type* type)
|
||||
|
||||
fn NodeIndex intern_pool_put_node_at_assume_not_existent_assume_capacity(Thread* thread, NodeIndex node, u32 index)
|
||||
{
|
||||
thread->interned.nodes.pointer[index] = geti(node);
|
||||
thread->interned.nodes.pointer[index] = *(u32*)&node;
|
||||
thread->interned.nodes.length += 1;
|
||||
|
||||
return node;
|
||||
@ -2766,7 +2809,7 @@ fn s32 intern_pool_find_node_slot(Thread* thread, u32 original_index, NodeIndex
|
||||
auto index = it_index & (existing_capacity - 1);
|
||||
u32 key = thread->interned.nodes.pointer[index];
|
||||
|
||||
if ((key == 0) | (key == geti(node_index)))
|
||||
if (key == 0)
|
||||
{
|
||||
result = index;
|
||||
break;
|
||||
@ -2774,10 +2817,18 @@ fn s32 intern_pool_find_node_slot(Thread* thread, u32 original_index, NodeIndex
|
||||
else
|
||||
{
|
||||
NodeIndex existing_node_index = *(NodeIndex*)&key;
|
||||
auto* existing_node = &thread->buffer.nodes.pointer[geti(existing_node_index)];
|
||||
if (existing_node->id == node->id)
|
||||
if (index_equal(existing_node_index, node_index))
|
||||
{
|
||||
trap();
|
||||
result = index;
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
auto* existing_node = &thread->buffer.nodes.pointer[geti(existing_node_index)];
|
||||
if (existing_node->id == node->id)
|
||||
{
|
||||
trap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -2815,7 +2866,7 @@ fn NodeGetOrPut intern_pool_get_or_put_node(Thread* thread, NodeIndex node_index
|
||||
if (slot != -1)
|
||||
{
|
||||
u32 index = slot;
|
||||
u8 existing = thread->interned.nodes.pointer[index];
|
||||
u8 existing = thread->interned.nodes.pointer[index] != 0;
|
||||
auto result = intern_pool_put_node_at_assume_not_existent_assume_capacity(thread, node_index, index);
|
||||
return (NodeGetOrPut) {
|
||||
.index = result,
|
||||
@ -2842,6 +2893,40 @@ fn NodeGetOrPut intern_pool_get_or_put_node(Thread* thread, NodeIndex node_index
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn NodeIndex intern_pool_remove_node(Thread* thread, NodeIndex node_index)
|
||||
{
|
||||
auto existing_capacity = thread->interned.nodes.capacity;
|
||||
auto* node = thread_node_get(thread, node_index);
|
||||
auto hash = hash_node(node);
|
||||
auto original_index = hash & (existing_capacity - 1);
|
||||
auto slot = intern_pool_find_node_slot(thread, original_index, node_index);
|
||||
if (slot != -1)
|
||||
{
|
||||
auto index = (u32)slot;
|
||||
auto* slot_pointer = &thread->interned.nodes.pointer[index];
|
||||
auto old_node_index = *(NodeIndex*)slot_pointer;
|
||||
*slot_pointer = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
index = (index + 1) & (existing_capacity - 1);
|
||||
if (thread->interned.nodes.pointer[index] == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
trap();
|
||||
}
|
||||
|
||||
return old_node_index;
|
||||
}
|
||||
else
|
||||
{
|
||||
trap();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
global String test_files[] = {
|
||||
strlit("tests/first/main.nat"),
|
||||
};
|
||||
@ -3762,7 +3847,7 @@ fn NodeIndex node_walk(Thread* thread, NodeIndex node_index, NodeCallback* callb
|
||||
{
|
||||
assert(thread->worklist.visited.length == 0);
|
||||
NodeIndex result = node_walk_internal(thread, node_index, callback);
|
||||
thread->worklist.visited.length = 0;
|
||||
bitset_clear(&thread->worklist.visited);
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -3797,6 +3882,190 @@ fn void iterate_peepholes(Thread* thread, NodeIndex stop_node_index)
|
||||
{
|
||||
trap();
|
||||
}
|
||||
|
||||
thread_worklist_clear(thread);
|
||||
}
|
||||
|
||||
fn u8 node_is_cfg(Node* node)
|
||||
{
|
||||
switch (node->id)
|
||||
{
|
||||
case NODE_START:
|
||||
case NODE_DEAD_CONTROL:
|
||||
case NODE_CONTROL_PROJECTION:
|
||||
case NODE_RETURN:
|
||||
case NODE_STOP:
|
||||
return 1;
|
||||
case NODE_CONSTANT:
|
||||
return 0;
|
||||
default:
|
||||
trap();
|
||||
}
|
||||
}
|
||||
|
||||
fn void rpo_cfg(Thread* thread, NodeIndex node_index)
|
||||
{
|
||||
auto* node = thread_node_get(thread, node_index);
|
||||
if (node_is_cfg(node) && !bitset_get(&thread->worklist.visited, geti(node_index)))
|
||||
{
|
||||
bitset_set_assert_unset(&thread->worklist.visited, geti(node_index));
|
||||
auto outputs = node_get_outputs(thread, node);
|
||||
for (u64 i = 0; i < outputs.length; i += 1)
|
||||
{
|
||||
auto output = outputs.pointer[i];
|
||||
if (validi(output))
|
||||
{
|
||||
rpo_cfg(thread, output);
|
||||
}
|
||||
}
|
||||
|
||||
*vb_add(&thread->worklist.nodes, 1) = node_index;
|
||||
}
|
||||
}
|
||||
|
||||
fn s32 node_loop_depth(Thread* thread, Node* node)
|
||||
{
|
||||
assert(node_is_cfg(node));
|
||||
s32 loop_depth;
|
||||
switch (node->id)
|
||||
{
|
||||
case NODE_START:
|
||||
{
|
||||
loop_depth = node->start.cfg.loop_depth;
|
||||
if (!loop_depth)
|
||||
{
|
||||
loop_depth = node->start.cfg.loop_depth = 1;
|
||||
}
|
||||
} break;
|
||||
case NODE_STOP:
|
||||
{
|
||||
loop_depth = node->stop.cfg.loop_depth;
|
||||
if (!loop_depth)
|
||||
{
|
||||
loop_depth = node->stop.cfg.loop_depth = 1;
|
||||
}
|
||||
} break;
|
||||
case NODE_RETURN:
|
||||
{
|
||||
loop_depth = node->return_node.cfg.loop_depth;
|
||||
if (!loop_depth)
|
||||
{
|
||||
auto input_index = node_input_get(thread, node, 0);
|
||||
auto input = thread_node_get(thread, input_index);
|
||||
node->return_node.cfg.loop_depth = loop_depth = node_loop_depth(thread, input);
|
||||
}
|
||||
} break;
|
||||
case NODE_CONTROL_PROJECTION:
|
||||
{
|
||||
loop_depth = node->control_projection.cfg.loop_depth;
|
||||
if (!loop_depth)
|
||||
{
|
||||
auto input_index = node_input_get(thread, node, 0);
|
||||
auto input = thread_node_get(thread, input_index);
|
||||
node->control_projection.cfg.loop_depth = loop_depth = node_loop_depth(thread, input);
|
||||
}
|
||||
} break;
|
||||
case NODE_DEAD_CONTROL:
|
||||
{
|
||||
loop_depth = node->dead_control.cfg.loop_depth;
|
||||
if (!loop_depth)
|
||||
{
|
||||
auto input_index = node_input_get(thread, node, 0);
|
||||
auto input = thread_node_get(thread, input_index);
|
||||
node->dead_control.cfg.loop_depth = loop_depth = node_loop_depth(thread, input);
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
trap();
|
||||
}
|
||||
|
||||
return loop_depth;
|
||||
}
|
||||
|
||||
fn u8 node_is_region(Node* node)
|
||||
{
|
||||
return (node->id == NODE_REGION) | (node->id == NODE_REGION_LOOP);
|
||||
}
|
||||
|
||||
fn u8 node_is_pinned(Node* node)
|
||||
{
|
||||
switch (node->id)
|
||||
{
|
||||
case NODE_START:
|
||||
return 1;
|
||||
case NODE_CONSTANT:
|
||||
return 0;
|
||||
default:
|
||||
trap();
|
||||
}
|
||||
}
|
||||
|
||||
fn void schedule_early(Thread* thread, NodeIndex node_index, NodeIndex start_node)
|
||||
{
|
||||
if (validi(node_index) && !bitset_get(&thread->worklist.visited, geti(node_index)))
|
||||
{
|
||||
bitset_set_assert_unset(&thread->worklist.visited, geti(node_index));
|
||||
auto* node = thread_node_get(thread, node_index);
|
||||
auto inputs = node_get_inputs(thread, node);
|
||||
for (u64 i = 0; i < inputs.length; i += 1)
|
||||
{
|
||||
auto input = inputs.pointer[i];
|
||||
if (validi(input))
|
||||
{
|
||||
auto* input_node = thread_node_get(thread, input);
|
||||
if (!node_is_pinned(input_node))
|
||||
{
|
||||
trap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!node_is_pinned(node))
|
||||
{
|
||||
auto early = start_node;
|
||||
for (u64 i = 1; i < inputs.length; i += 1)
|
||||
{
|
||||
auto input_index = inputs.pointer[i];
|
||||
auto input_node = thread_node_get(thread, input_index);
|
||||
auto control_input_index = node_input_get(thread, input_node, 0);
|
||||
auto* control_input_node = thread_node_get(thread, control_input_index);
|
||||
assert(node_is_cfg(control_input_node));
|
||||
trap();
|
||||
}
|
||||
node_set_input(thread, node_index, 0, early);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn void gcm_build_cfg(Thread* thread, NodeIndex start_node_index, NodeIndex stop_node_index)
|
||||
{
|
||||
// Fix loops
|
||||
{
|
||||
// TODO:
|
||||
}
|
||||
|
||||
// Schedule early
|
||||
rpo_cfg(thread, start_node_index);
|
||||
|
||||
u32 i = thread->worklist.nodes.length;
|
||||
while (i > 0)
|
||||
{
|
||||
i -= 1;
|
||||
auto node_index = thread->worklist.nodes.pointer[i];
|
||||
auto* node = thread_node_get(thread, node_index);
|
||||
node_loop_depth(thread, node);
|
||||
auto inputs = node_get_inputs(thread, node);
|
||||
for (u64 i = 0; i < inputs.length; i += 1)
|
||||
{
|
||||
auto input = inputs.pointer[i];
|
||||
schedule_early(thread, input, start_node_index);
|
||||
}
|
||||
|
||||
if (node_is_region(node))
|
||||
{
|
||||
trap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn void thread_init(Thread* thread)
|
||||
@ -4080,8 +4349,10 @@ extern "C" void entry_point()
|
||||
for (u32 function_i = 0; function_i < thread->buffer.functions.length; function_i += 1)
|
||||
{
|
||||
Function* function = &thread->buffer.functions.pointer[function_i];
|
||||
NodeIndex start_node_index = function->start;
|
||||
NodeIndex stop_node_index = function->stop;
|
||||
iterate_peepholes(thread, stop_node_index);
|
||||
gcm_build_cfg(thread, start_node_index, stop_node_index);
|
||||
}
|
||||
|
||||
thread_clear(thread);
|
||||
|
Loading…
x
Reference in New Issue
Block a user