diff options
| author | Ginger Bill <bill@gingerbill.org> | 2017-06-08 12:03:40 +0100 |
|---|---|---|
| committer | Ginger Bill <bill@gingerbill.org> | 2017-06-08 12:03:40 +0100 |
| commit | 9b61adb97dd78e1cf04ad410e72166f684f97925 (patch) | |
| tree | ccb50b757f31c36dcd2bac161d191e2d23dcb6d1 /src/ir_opt.cpp | |
| parent | 333924cce15e10e941ee63d6fcdc19d5cb95bb3c (diff) | |
Build as C++
Diffstat (limited to 'src/ir_opt.cpp')
| -rw-r--r-- | src/ir_opt.cpp | 491 |
1 files changed, 491 insertions, 0 deletions
diff --git a/src/ir_opt.cpp b/src/ir_opt.cpp new file mode 100644 index 000000000..e9d83941d --- /dev/null +++ b/src/ir_opt.cpp @@ -0,0 +1,491 @@ +// Optimizations for the IR code + +void ir_opt_add_operands(irValueArray *ops, irInstr *i) { + switch (i->kind) { + case irInstr_Comment: + break; + case irInstr_Local: + break; + case irInstr_ZeroInit: + array_add(ops, i->ZeroInit.address); + break; + case irInstr_Store: + array_add(ops, i->Store.address); + array_add(ops, i->Store.value); + break; + case irInstr_Load: + array_add(ops, i->Load.address); + break; + case irInstr_ArrayElementPtr: + array_add(ops, i->ArrayElementPtr.address); + array_add(ops, i->ArrayElementPtr.elem_index); + break; + case irInstr_StructElementPtr: + array_add(ops, i->StructElementPtr.address); + break; + case irInstr_PtrOffset: + array_add(ops, i->PtrOffset.address); + array_add(ops, i->PtrOffset.offset); + break; + case irInstr_StructExtractValue: + array_add(ops, i->StructExtractValue.address); + break; + case irInstr_Conv: + array_add(ops, i->Conv.value); + break; + case irInstr_Jump: + break; + case irInstr_If: + array_add(ops, i->If.cond); + break; + case irInstr_Return: + if (i->Return.value != NULL) { + array_add(ops, i->Return.value); + } + break; + case irInstr_Select: + array_add(ops, i->Select.cond); + break; + case irInstr_Phi: + for_array(j, i->Phi.edges) { + array_add(ops, i->Phi.edges.e[j]); + } + break; + case irInstr_Unreachable: + break; + case irInstr_UnaryOp: + array_add(ops, i->UnaryOp.expr); + break; + case irInstr_BinaryOp: + array_add(ops, i->BinaryOp.left); + array_add(ops, i->BinaryOp.right); + break; + case irInstr_Call: + array_add(ops, i->Call.value); + for (isize j = 0; j < i->Call.arg_count; j++) { + array_add(ops, i->Call.args[j]); + } + break; + // case irInstr_VectorExtractElement: + // array_add(ops, i->VectorExtractElement.vector); + // array_add(ops, i->VectorExtractElement.index); + // break; + // case irInstr_VectorInsertElement: + // array_add(ops, i->VectorInsertElement.vector); + // array_add(ops, i->VectorInsertElement.elem); + // array_add(ops, i->VectorInsertElement.index); + // break; + // case irInstr_VectorShuffle: + // array_add(ops, i->VectorShuffle.vector); + // break; + case irInstr_StartupRuntime: + break; + case irInstr_BoundsCheck: + array_add(ops, i->BoundsCheck.index); + array_add(ops, i->BoundsCheck.len); + break; + case irInstr_SliceBoundsCheck: + array_add(ops, i->SliceBoundsCheck.low); + array_add(ops, i->SliceBoundsCheck.high); + break; + } +} + + + + + +void ir_opt_block_replace_pred(irBlock *b, irBlock *from, irBlock *to) { + for_array(i, b->preds) { + irBlock *pred = b->preds.e[i]; + if (pred == from) { + b->preds.e[i] = to; + } + } +} + +void ir_opt_block_replace_succ(irBlock *b, irBlock *from, irBlock *to) { + for_array(i, b->succs) { + irBlock *succ = b->succs.e[i]; + if (succ == from) { + b->succs.e[i] = to; + } + } +} + +bool ir_opt_block_has_phi(irBlock *b) { + return b->instrs.e[0]->Instr.kind == irInstr_Phi; +} + + + + + + + + + + +irValueArray ir_get_block_phi_nodes(irBlock *b) { + irValueArray phis = {0}; + for_array(i, b->instrs) { + irInstr *instr = &b->instrs.e[i]->Instr; + if (instr->kind != irInstr_Phi) { + phis = b->instrs; + phis.count = i; + return phis; + } + } + return phis; +} + +void ir_remove_pred(irBlock *b, irBlock *p) { + irValueArray phis = ir_get_block_phi_nodes(b); + isize i = 0; + for_array(j, b->preds) { + irBlock *pred = b->preds.e[j]; + if (pred != p) { + b->preds.e[i] = b->preds.e[j]; + for_array(k, phis) { + irInstrPhi *phi = &phis.e[k]->Instr.Phi; + phi->edges.e[i] = phi->edges.e[j]; + } + i++; + } + } + b->preds.count = i; + for_array(k, phis) { + irInstrPhi *phi = &phis.e[k]->Instr.Phi; + phi->edges.count = i; + } + +} + +void ir_remove_dead_blocks(irProcedure *proc) { + isize j = 0; + for_array(i, proc->blocks) { + irBlock *b = proc->blocks.e[i]; + if (b == NULL) { + continue; + } + // NOTE(bill): Swap order + b->index = j; + proc->blocks.e[j++] = b; + } + proc->blocks.count = j; +} + +void ir_mark_reachable(irBlock *b) { + isize const WHITE = 0; + isize const BLACK = -1; + b->index = BLACK; + for_array(i, b->succs) { + irBlock *succ = b->succs.e[i]; + if (succ->index == WHITE) { + ir_mark_reachable(succ); + } + } +} + +void ir_remove_unreachable_blocks(irProcedure *proc) { + isize const WHITE = 0; + isize const BLACK = -1; + for_array(i, proc->blocks) { + proc->blocks.e[i]->index = WHITE; + } + + ir_mark_reachable(proc->blocks.e[0]); + + for_array(i, proc->blocks) { + irBlock *b = proc->blocks.e[i]; + if (b->index == WHITE) { + for_array(j, b->succs) { + irBlock *c = b->succs.e[j]; + if (c->index == BLACK) { + ir_remove_pred(c, b); + } + } + // NOTE(bill): Mark as empty but don't actually free it + // As it's been allocated with an arena + proc->blocks.e[i] = NULL; + } + } + ir_remove_dead_blocks(proc); +} + +bool ir_opt_block_fusion(irProcedure *proc, irBlock *a) { + if (a->succs.count != 1) { + return false; + } + irBlock *b = a->succs.e[0]; + if (b->preds.count != 1) { + return false; + } + + if (ir_opt_block_has_phi(b)) { + return false; + } + + array_pop(&a->instrs); // Remove branch at end + for_array(i, b->instrs) { + array_add(&a->instrs, b->instrs.e[i]); + ir_set_instr_parent(b->instrs.e[i], a); + } + + array_clear(&a->succs); + for_array(i, b->succs) { + array_add(&a->succs, b->succs.e[i]); + } + + // Fix preds links + for_array(i, b->succs) { + ir_opt_block_replace_pred(b->succs.e[i], b, a); + } + + proc->blocks.e[b->index] = NULL; + return true; +} + +void ir_opt_blocks(irProcedure *proc) { + ir_remove_unreachable_blocks(proc); + +#if 1 + bool changed = true; + while (changed) { + changed = false; + for_array(i, proc->blocks) { + irBlock *b = proc->blocks.e[i]; + if (b == NULL) { + continue; + } + GB_ASSERT_MSG(b->index == i, "%d, %td", b->index, i); + + if (ir_opt_block_fusion(proc, b)) { + changed = true; + } + // TODO(bill): other simple block optimizations + } + } +#endif + + ir_remove_dead_blocks(proc); +} +void ir_opt_build_referrers(irProcedure *proc) { + gbTempArenaMemory tmp = gb_temp_arena_memory_begin(&proc->module->tmp_arena); + + irValueArray ops = {0}; // NOTE(bill): Act as a buffer + array_init_reserve(&ops, proc->module->tmp_allocator, 64); // HACK(bill): This _could_ overflow the temp arena + for_array(i, proc->blocks) { + irBlock *b = proc->blocks.e[i]; + for_array(j, b->instrs) { + irValue *instr = b->instrs.e[j]; + array_clear(&ops); + ir_opt_add_operands(&ops, &instr->Instr); + for_array(k, ops) { + irValue *op = ops.e[k]; + if (op == NULL) { + continue; + } + irValueArray *refs = ir_value_referrers(op); + if (refs != NULL) { + array_add(refs, instr); + } + } + } + } + + gb_temp_arena_memory_end(tmp); +} + + + + + + + +// State of Lengauer-Tarjan algorithm +// Based on this paper: http://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf +typedef struct irLTState { + isize count; + // NOTE(bill): These are arrays + irBlock **sdom; // Semidominator + irBlock **parent; // Parent in DFS traversal of CFG + irBlock **ancestor; +} irLTState; + +// ยง2.2 - bottom of page +void ir_lt_link(irLTState *lt, irBlock *p, irBlock *q) { + lt->ancestor[q->index] = p; +} + +i32 ir_lt_depth_first_search(irLTState *lt, irBlock *p, i32 i, irBlock **preorder) { + preorder[i] = p; + p->dom.pre = i++; + lt->sdom[p->index] = p; + ir_lt_link(lt, NULL, p); + for_array(index, p->succs) { + irBlock *q = p->succs.e[index]; + if (lt->sdom[q->index] == NULL) { + lt->parent[q->index] = p; + i = ir_lt_depth_first_search(lt, q, i, preorder); + } + } + return i; +} + +irBlock *ir_lt_eval(irLTState *lt, irBlock *v) { + irBlock *u = v; + for (; + lt->ancestor[v->index] != NULL; + v = lt->ancestor[v->index]) { + if (lt->sdom[v->index]->dom.pre < lt->sdom[u->index]->dom.pre) { + u = v; + } + } + return u; +} + +typedef struct irDomPrePost { + i32 pre, post; +} irDomPrePost; + +irDomPrePost ir_opt_number_dom_tree(irBlock *v, i32 pre, i32 post) { + irDomPrePost result = {pre, post}; + + v->dom.pre = pre++; + for_array(i, v->dom.children) { + result = ir_opt_number_dom_tree(v->dom.children.e[i], result.pre, result.post); + } + v->dom.post = post++; + + result.pre = pre; + result.post = post; + return result; +} + + +// NOTE(bill): Requires `ir_opt_blocks` to be called before this +void ir_opt_build_dom_tree(irProcedure *proc) { + // Based on this paper: http://jgaa.info/accepted/2006/GeorgiadisTarjanWerneck2006.10.1.pdf + + gbTempArenaMemory tmp = gb_temp_arena_memory_begin(&proc->module->tmp_arena); + + isize n = proc->blocks.count; + irBlock **buf = gb_alloc_array(proc->module->tmp_allocator, irBlock *, 5*n); + + irLTState lt = {0}; + lt.count = n; + lt.sdom = &buf[0*n]; + lt.parent = &buf[1*n]; + lt.ancestor = &buf[2*n]; + + irBlock **preorder = &buf[3*n]; + irBlock **buckets = &buf[4*n]; + irBlock *root = proc->blocks.e[0]; + + // Step 1 - number vertices + i32 pre_num = ir_lt_depth_first_search(<, root, 0, preorder); + gb_memmove(buckets, preorder, n*gb_size_of(preorder[0])); + + for (i32 i = n-1; i > 0; i--) { + irBlock *w = preorder[i]; + + // Step 3 - Implicitly define idom for nodes + for (irBlock *v = buckets[i]; v != w; v = buckets[v->dom.pre]) { + irBlock *u = ir_lt_eval(<, v); + if (lt.sdom[u->index]->dom.pre < i) { + v->dom.idom = u; + } else { + v->dom.idom = w; + } + } + + // Step 2 - Compute all sdoms + lt.sdom[w->index] = lt.parent[w->index]; + for_array(pred_index, w->preds) { + irBlock *v = w->preds.e[pred_index]; + irBlock *u = ir_lt_eval(<, v); + if (lt.sdom[u->index]->dom.pre < lt.sdom[w->index]->dom.pre) { + lt.sdom[w->index] = lt.sdom[u->index]; + } + } + + ir_lt_link(<, lt.parent[w->index], w); + + if (lt.parent[w->index] == lt.sdom[w->index]) { + w->dom.idom = lt.parent[w->index]; + } else { + buckets[i] = buckets[lt.sdom[w->index]->dom.pre]; + buckets[lt.sdom[w->index]->dom.pre] = w; + } + } + + // The rest of Step 3 + for (irBlock *v = buckets[0]; v != root; v = buckets[v->dom.pre]) { + v->dom.idom = root; + } + + // Step 4 - Explicitly define idom for nodes (in preorder) + for (isize i = 1; i < n; i++) { + irBlock *w = preorder[i]; + if (w == root) { + w->dom.idom = NULL; + } else { + // Weird tree relationships here! + + if (w->dom.idom != lt.sdom[w->index]) { + w->dom.idom = w->dom.idom->dom.idom; + } + + // Calculate children relation as inverse of idom + if (w->dom.idom->dom.children.e == NULL) { + // TODO(bill): Is this good enough for memory allocations? + array_init(&w->dom.idom->dom.children, heap_allocator()); + } + array_add(&w->dom.idom->dom.children, w); + } + } + + ir_opt_number_dom_tree(root, 0, 0); + + gb_temp_arena_memory_end(tmp); +} + +void ir_opt_mem2reg(irProcedure *proc) { + // TODO(bill): ir_opt_mem2reg +} + + + +void ir_opt_tree(irGen *s) { + s->opt_called = true; + + for_array(member_index, s->module.procs) { + irProcedure *proc = s->module.procs.e[member_index]; + if (proc->blocks.count == 0) { // Prototype/external procedure + continue; + } + + ir_opt_blocks(proc); + #if 0 + ir_opt_build_referrers(proc); + ir_opt_build_dom_tree(proc); + + // TODO(bill): ir optimization + // [ ] cse (common-subexpression) elim + // [ ] copy elim + // [ ] dead code elim + // [ ] dead store/load elim + // [ ] phi elim + // [ ] short circuit elim + // [ ] bounds check elim + // [ ] lift/mem2reg + // [ ] lift/mem2reg + + ir_opt_mem2reg(proc); + #endif + + GB_ASSERT(proc->blocks.count > 0); + ir_number_proc_registers(proc); + } +} |