diff options
| author | gingerBill <bill@gingerbill.org> | 2023-09-11 23:36:39 +0100 |
|---|---|---|
| committer | gingerBill <bill@gingerbill.org> | 2023-09-11 23:36:39 +0100 |
| commit | 984a95b8c76cc5a42b401d9a1761c52ede344754 (patch) | |
| tree | 66c8d4cfbe1bb57bb12de9776fd10af3673b9f88 /src/tilde | |
| parent | 72118fcc6ac4e03710cc32659d7978a7496ea027 (diff) | |
Update Tilde
Diffstat (limited to 'src/tilde')
| -rw-r--r-- | src/tilde/tb.h | 305 | ||||
| -rw-r--r-- | src/tilde/tb.lib | bin | 4229318 -> 4265424 bytes | |||
| -rw-r--r-- | src/tilde/tb_arena.h | 6 |
3 files changed, 211 insertions, 100 deletions
diff --git a/src/tilde/tb.h b/src/tilde/tb.h index 081a84995..5bb98fe70 100644 --- a/src/tilde/tb.h +++ b/src/tilde/tb.h @@ -1,3 +1,12 @@ +// Glossary (because i don't know where else to put it) +// IR - intermediate representation +// SoN - sea of nodes (https://www.oracle.com/technetwork/java/javase/tech/c2-ir95-150110.pdf) +// SSA - single static assignment +// GVN - global value numbering +// CSE - common subexpression elimination +// DSE - dead store elimination +// GCM - global code motion +// SROA - scalar replacement of aggregates #ifndef TB_CORE_H #define TB_CORE_H @@ -150,18 +159,19 @@ typedef enum TB_ISelMode { typedef enum TB_DataTypeEnum { // Integers, note void is an i0 and bool is an i1 - // i(0-2047) + // i(0-64) TB_INT, // Floating point numbers // f{32,64} TB_FLOAT, // Pointers - // ptr(0-2047) TB_PTR, // Tuples, these cannot be used in memory ops, just accessed via projections TB_TUPLE, - // represents control flow as a kind of data + // represents control flow for REGION, BRANCH TB_CONTROL, + // represents memory (and I/O) + TB_MEMORY, } TB_DataTypeEnum; typedef enum TB_FloatFormat { @@ -179,6 +189,7 @@ typedef union TB_DataType { }; uint32_t raw; } TB_DataType; +static_assert(sizeof(TB_DataType) == 4, "im expecting this to be a uint32_t"); // classify data types #define TB_IS_VOID_TYPE(x) ((x).type == TB_INT && (x).data == 0) @@ -192,75 +203,136 @@ typedef union TB_DataType { #define TB_GET_FLOAT_FORMAT(x) ((x).data) #define TB_GET_PTR_ADDRSPACE(x) ((x).data) +//////////////////////////////// +// ANNOTATIONS +//////////////////////////////// +// +// (A, B) -> (C, D) +// +// node takes A and B, produces C, D. if there's multiple +// results we need to use projections and the indices are +// based on the order seen here, proj0 is C, proj1 is D. +// +// (A, B) & C -> Int +// +// nodes takes A and B along with C in it's extra data. this is +// where non-node inputs fit. +// typedef enum TB_NodeTypeEnum { TB_NULL = 0, - // Immediates + //////////////////////////////// + // CONSTANTS + //////////////////////////////// TB_INTEGER_CONST, TB_FLOAT32_CONST, TB_FLOAT64_CONST, - // only one per function - TB_START, // fn() - - // regions represent the begining of BBs - TB_REGION, // fn(preds: []region) - - // projection - TB_PROJ, - - TB_CALL, // normal call - TB_SYSCALL, // system call - - // Managed ops - TB_SAFEPOINT, - - // Memory operations - TB_STORE, // fn(r: control, addr: data, src: data) - TB_MEMCPY, - TB_MEMSET, - - // Atomics - TB_ATOMIC_TEST_AND_SET, - TB_ATOMIC_CLEAR, - - TB_ATOMIC_LOAD, - TB_ATOMIC_XCHG, - TB_ATOMIC_ADD, - TB_ATOMIC_SUB, - TB_ATOMIC_AND, - TB_ATOMIC_XOR, - TB_ATOMIC_OR, - - TB_ATOMIC_CMPXCHG, - TB_DEBUGBREAK, - - // Terminators - TB_BRANCH, - TB_RET, - TB_UNREACHABLE, - TB_TRAP, - - TB_POISON, - - // Load - TB_LOAD, - - // Pointers - TB_LOCAL, - - TB_GET_SYMBOL_ADDRESS, - - TB_MEMBER_ACCESS, - TB_ARRAY_ACCESS, + //////////////////////////////// + // MISCELLANEOUS + //////////////////////////////// + // this is an unspecified value, usually generated by the optimizer + // when malformed input is folded into an operation. + TB_POISON, // () -> Any + // projections just extract a single field of a tuple + TB_PROJ, // Tuple & Int -> Any + // this is a simple way to embed machine code into the code + TB_MACHINE_OP, // (Control, Memory) & Buffer -> (Control, Memory) + // reads the TSC on x64 + TB_CYCLE_COUNTER, // (Control) -> Int64 + + //////////////////////////////// + // CONTROL + //////////////////////////////// + // there's only one START and STOP per function + TB_START, // () -> (Control, Memory, Data...) + TB_END, // (Control, Memory, Data?) -> () + // regions are used to represent paths which have multiple entries. + // each input is a predecessor. + TB_REGION, // (Control...) -> (Control) + // phi nodes work the same as in SSA CFG, the value is based on which predecessor was taken. + // each input lines up with the regions such that region.in[i] will use phi.in[i+1] as the + // subsequent data. + TB_PHI, // (Control, Data...) -> Data + // branch is used to implement most control flow, it acts like a switch + // statement in C usually. they take a key and match against some cases, + // if they match, it'll jump to that successor, if none match it'll take + // the default successor. + // + // if (cond) { A; } else { B; } is just switch (cond) { case 0: B; default: A; } + // + // it's possible to not pass a key and the default successor is always called, this is + // a GOTO. tb_inst_goto, tb_inst_if can handle common cases for you. + TB_BRANCH, // (Control, Data?) -> (Control...) + // debugbreak will trap in a continuable manner. + TB_DEBUGBREAK, // (Control, Memory) -> (Control) + // trap will not be continuable but will stop execution. + TB_TRAP, // (Control) -> (Control) + // unreachable means it won't trap or be continuable. + TB_UNREACHABLE, // (Control) -> (Control) + + //////////////////////////////// + // CONTROL + MEMORY + //////////////////////////////// + // nothing special, it's just a function call, 3rd argument here is the + // target pointer (or syscall number) and the rest are just data args. + TB_CALL, // (Control, Memory, Data, Data...) -> (Control, Memory, Data) + TB_SYSCALL, // (Control, Memory, Data, Data...) -> (Control, Memory, Data) + // safepoint polls are the same except they only trigger if the poll site + // says to (platform specific but almost always just the page being made + // unmapped/guard), 3rd argument is the poll site. + TB_SAFEPOINT_POLL, // (Control, Memory, Ptr, Data...) -> (Control) + + //////////////////////////////// + // MEMORY + //////////////////////////////// + // LOAD and STORE are standard memory accesses, they can be folded away. + TB_LOAD, // (Memory, Ptr) -> Data + TB_STORE, // (Memory, Ptr, Data) -> Memory + // bulk memory ops. + TB_MEMCPY, // (Memory, Ptr, Ptr, Size) -> Memory + TB_MEMSET, // (Memory, Ptr, Int8, Size) -> Memory + // these memory accesses represent "volatile" which means + // they may produce side effects and thus cannot be eliminated. + TB_READ, // (Memory, Ptr) -> (Memory, Data) + TB_WRITE, // (Memory, Ptr, Data) -> (Memory, Data) + // atomics have multiple observers (if not they wouldn't need to + // be atomic) and thus produce side effects everywhere just like + // volatiles except they have synchronization guarentees. the atomic + // data ops will return the value before the operation is performed. + // Atomic CAS return the old value and a boolean for success (true if + // the value was changed) + TB_ATOMIC_LOAD, // (Memory, Ptr) -> (Memory, Data) + TB_ATOMIC_XCHG, // (Memory, Ptr, Data) -> (Memory, Data) + TB_ATOMIC_ADD, // (Memory, Ptr, Data) -> (Memory, Data) + TB_ATOMIC_SUB, // (Memory, Ptr, Data) -> (Memory, Data) + TB_ATOMIC_AND, // (Memory, Ptr, Data) -> (Memory, Data) + TB_ATOMIC_XOR, // (Memory, Ptr, Data) -> (Memory, Data) + TB_ATOMIC_OR, // (Memory, Ptr, Data) -> (Memory, Data) + TB_ATOMIC_CAS, // (Memory, Data, Data) -> (Memory, Data, Bool) + + //////////////////////////////// + // POINTERS + //////////////////////////////// + // LOCAL will statically allocate stack space + TB_LOCAL, // () & (Int, Int) -> Ptr + // SYMBOL will return a pointer to a TB_Symbol + TB_SYMBOL, // () & TB_Symbol* -> Ptr + // offsets pointer by constant value + TB_MEMBER_ACCESS, // Ptr & Int -> Ptr + // arguments represent base, index, and stride respectively + // and will perform `base + index*stride` + TB_ARRAY_ACCESS, // (Ptr, Int) & Int -> Ptr + // converts an integer to a pointer + TB_INT2PTR, // Int -> Ptr + // converts a pointer to an integer + TB_PTR2INT, // Ptr -> Int // Conversions TB_TRUNCATE, TB_FLOAT_EXT, TB_SIGN_EXT, TB_ZERO_EXT, - TB_INT2PTR, - TB_PTR2INT, TB_UINT2FLOAT, TB_FLOAT2UINT, TB_INT2FLOAT, @@ -315,18 +387,17 @@ typedef enum TB_NodeTypeEnum { TB_CMP_FLE, // Special ops - // does full multiplication (64x64=128 and so on) returning - // the low and high values in separate projections + // adds two paired integers to two other paired integers and returns + // a low and high value + TB_ADDPAIR, + // does full multiplication (64x64=128 and so on) returning + // the low and high values in separate projections TB_MULPAIR, - // PHI - TB_PHI, // fn(r: region, x: []data) - // variadic TB_VA_START, // x86 intrinsics - TB_X86INTRIN_RDTSC, TB_X86INTRIN_LDMXCSR, TB_X86INTRIN_STMXCSR, TB_X86INTRIN_SQRT, @@ -372,6 +443,9 @@ typedef struct TB_FunctionPrototype TB_FunctionPrototype; typedef struct TB_Attrib TB_Attrib; +// target-specific, just a unique ID for the registers +typedef int TB_PhysicalReg; + // Refers generically to objects within a module // // TB_Function, TB_Global, and TB_External are all subtypes of TB_Symbol @@ -412,12 +486,11 @@ typedef struct TB_Symbol { typedef struct TB_Node TB_Node; struct TB_Node { TB_NodeType type; + uint16_t input_count; // number of node inputs. TB_DataType dt; - uint16_t input_count; // number of node inputs - uint16_t extra_count; // number of bytes for extra operand data - // local to the TB_Passes - uint32_t lattice_id; + // makes it easier to track in graph walks + size_t gvn; TB_Attrib* attribs; TB_Node** inputs; @@ -442,9 +515,8 @@ typedef struct { // TB_PROJ int index; } TB_NodeProj; -typedef struct { // TB_INT - uint64_t num_words; - uint64_t words[]; +typedef struct { // TB_INTEGER_CONST + uint64_t value; } TB_NodeInt; typedef struct { // any compare operator @@ -457,11 +529,10 @@ typedef struct { // any integer binary operator typedef struct { // TB_MULPAIR TB_Node *lo, *hi; -} TB_NodeMulPair; +} TB_NodeArithPair; typedef struct { TB_CharUnits align; - bool is_volatile; } TB_NodeMemAccess; typedef struct { @@ -469,6 +540,16 @@ typedef struct { } TB_NodeLocal; typedef struct { + // this is the raw buffer + size_t length; + const uint8_t* data; + + // represents the outputs, inputs and temporaries in that order + size_t outs, ins, tmps; + TB_PhysicalReg regs[]; +} TB_NodeMachineOp; + +typedef struct { float value; } TB_NodeFloat32; @@ -491,6 +572,8 @@ typedef struct { typedef struct { TB_MemoryOrder order; TB_MemoryOrder order2; + TB_Node* proj0; + TB_Node* proj1; } TB_NodeAtomic; typedef struct { @@ -506,9 +589,18 @@ typedef struct { TB_Node* end; const char* tag; + // position in a postorder walk + int postorder_id; // immediate dominator (can be approximate) int dom_depth; TB_Node* dom; + + // used for IR building only, stale after that. + // + // this represents the first and last memory values within a region, + // if a region ever has multiple predecessors we apply a join on these + // memory. + TB_Node *mem_in, *mem_out; } TB_NodeRegion; typedef struct TB_MultiOutput { @@ -558,7 +650,7 @@ typedef struct { #define TB_TYPE_F64 TB_DataType{ { TB_FLOAT, 0, TB_FLT_64 } } #define TB_TYPE_BOOL TB_DataType{ { TB_INT, 0, 1 } } #define TB_TYPE_PTR TB_DataType{ { TB_PTR, 0, 0 } } - +#define TB_TYPE_MEMORY TB_DataType{ { TB_MEMORY,0, 0 } } #define TB_TYPE_INTN(N) TB_DataType{ { TB_INT, 0, (N) } } #define TB_TYPE_PTRN(N) TB_DataType{ { TB_PTR, 0, (N) } } @@ -575,8 +667,9 @@ typedef struct { #define TB_TYPE_F64 (TB_DataType){ { TB_FLOAT, 0, TB_FLT_64 } } #define TB_TYPE_BOOL (TB_DataType){ { TB_INT, 0, 1 } } #define TB_TYPE_PTR (TB_DataType){ { TB_PTR, 0, 0 } } -#define TB_TYPE_INTN(N) (TB_DataType){ { TB_INT, 0, (N) } } -#define TB_TYPE_PTRN(N) (TB_DataType){ { TB_PTR, 0, (N) } } +#define TB_TYPE_MEMORY (TB_DataType){ { TB_MEMORY,0, 0 } } +#define TB_TYPE_INTN(N) (TB_DataType){ { TB_INT, 0, (N) } } +#define TB_TYPE_PTRN(N) (TB_DataType){ { TB_PTR, 0, (N) } } #endif @@ -737,7 +830,7 @@ TB_API TB_External* tb_next_external(TB_External* e); // this is used JIT scenarios to tell the compiler what externals map to TB_API TB_ExternalType tb_extern_get_type(TB_External* e); -TB_Global* tb_extern_transmute(TB_External* e, TB_DebugType* dbg_type, TB_Linkage linkage); +TB_API TB_Global* tb_extern_transmute(TB_External* e, TB_DebugType* dbg_type, TB_Linkage linkage); TB_API TB_External* tb_extern_create(TB_Module* m, ptrdiff_t len, const char* name, TB_ExternalType type); @@ -884,6 +977,11 @@ TB_API void tb_default_print_callback(void* user_data, const char* fmt, ...); TB_API void tb_inst_set_location(TB_Function* f, TB_SourceFile* file, int line, int column); TB_API void tb_inst_reset_location(TB_Function* f); +// this is where the STOP will be +TB_API void tb_inst_set_exit_location(TB_Function* f, TB_SourceFile* file, int line, int column); + +TB_API bool tb_has_effects(TB_Node* n); + // if section is NULL, default to .text TB_API TB_Function* tb_function_create(TB_Module* m, ptrdiff_t len, const char* name, TB_Linkage linkage, TB_ComdatType comdat); @@ -927,9 +1025,12 @@ TB_API TB_Node* tb_inst_float2int(TB_Function* f, TB_Node* src, TB_DataType dt, TB_API TB_Node* tb_inst_bitcast(TB_Function* f, TB_Node* src, TB_DataType dt); TB_API TB_Node* tb_inst_local(TB_Function* f, TB_CharUnits size, TB_CharUnits align); + TB_API TB_Node* tb_inst_load(TB_Function* f, TB_DataType dt, TB_Node* addr, TB_CharUnits align, bool is_volatile); TB_API void tb_inst_store(TB_Function* f, TB_DataType dt, TB_Node* addr, TB_Node* val, TB_CharUnits align, bool is_volatile); +TB_API void tb_inst_safepoint_poll(TB_Function* f, TB_Node* addr, int input_count, TB_Node** inputs); + TB_API TB_Node* tb_inst_bool(TB_Function* f, bool imm); TB_API TB_Node* tb_inst_sint(TB_Function* f, TB_DataType dt, int64_t imm); TB_API TB_Node* tb_inst_uint(TB_Function* f, TB_DataType dt, uint64_t imm); @@ -939,14 +1040,14 @@ TB_API TB_Node* tb_inst_cstring(TB_Function* f, const char* str); TB_API TB_Node* tb_inst_string(TB_Function* f, size_t len, const char* str); // write 'val' over 'count' bytes on 'dst' -TB_API void tb_inst_memset(TB_Function* f, TB_Node* dst, TB_Node* val, TB_Node* count, TB_CharUnits align, bool is_volatile); +TB_API void tb_inst_memset(TB_Function* f, TB_Node* dst, TB_Node* val, TB_Node* count, TB_CharUnits align); // zero 'count' bytes on 'dst' -TB_API void tb_inst_memzero(TB_Function* f, TB_Node* dst, TB_Node* count, TB_CharUnits align, bool is_volatile); +TB_API void tb_inst_memzero(TB_Function* f, TB_Node* dst, TB_Node* count, TB_CharUnits align); // performs a copy of 'count' elements from one memory location to another // both locations cannot overlap. -TB_API void tb_inst_memcpy(TB_Function* f, TB_Node* dst, TB_Node* src, TB_Node* count, TB_CharUnits align, bool is_volatile); +TB_API void tb_inst_memcpy(TB_Function* f, TB_Node* dst, TB_Node* src, TB_Node* count, TB_CharUnits align); // result = base + (index * stride) TB_API TB_Node* tb_inst_array_access(TB_Function* f, TB_Node* base, TB_Node* index, int64_t stride); @@ -1033,9 +1134,9 @@ TB_API TB_Node* tb_inst_cmp_fge(TB_Function* f, TB_Node* a, TB_Node* b); // General intrinsics TB_API TB_Node* tb_inst_va_start(TB_Function* f, TB_Node* a); +TB_API TB_Node* tb_inst_cycle_counter(TB_Function* f); // x86 Intrinsics -TB_API TB_Node* tb_inst_x86_rdtsc(TB_Function* f); TB_API TB_Node* tb_inst_x86_ldmxcsr(TB_Function* f, TB_Node* a); TB_API TB_Node* tb_inst_x86_stmxcsr(TB_Function* f); TB_API TB_Node* tb_inst_x86_sqrt(TB_Function* f, TB_Node* a); @@ -1061,6 +1162,19 @@ TB_API void tb_inst_ret(TB_Function* f, size_t count, TB_Node** values); //////////////////////////////// // Passes //////////////////////////////// +typedef enum { + // allowed to remove PHIs nodes, this is + // helpful because the default IR building + // will produce tons of useless memory PHIs. + TB_PEEPHOLE_PHI = 1, + + // it's allowed to fold memory operations (store or load elimination) + TB_PEEPHOLE_MEMORY = 2, + + // just do every reduction rule i can provide you + TB_PEEPHOLE_ALL = 7, +} TB_PeepholeFlags; + // Function analysis, optimizations, and codegen are all part of this typedef struct TB_Passes TB_Passes; @@ -1069,35 +1183,32 @@ TB_API TB_Passes* tb_pass_enter(TB_Function* f, TB_Arena* arena); TB_API void tb_pass_exit(TB_Passes* opt); // transformation passes: -// peephole: runs most simple reductions on the code, -// should be run after any bigger passes (it's incremental -// so it's not that bad) +// peephole: 99% of the optimizer, i'm sea of nodes pilled so i +// break down most optimizations into local rewrites, it's +// incremental and recommended to run after any non-peephole +// pass. // -// mem2reg: lowers TB_LOCALs into SSA values, this makes more +// mem2reg: lowers TB_LOCALs into SoN values, this makes more // data flow analysis possible on the code and allows to codegen // to place variables into registers. // -// cfg: performs simplifications on the CFG like `a && b => select(a, b, 0)` -// or removing redundant branches. -// -// loop: NOT READY -// -TB_API bool tb_pass_peephole(TB_Passes* opt); +// SROA: splits LOCALs into multiple to allow for more dataflow +// analysis later on. +TB_API void tb_pass_peephole(TB_Passes* opt, TB_PeepholeFlags flags); +TB_API void tb_pass_sroa(TB_Passes* opt); TB_API bool tb_pass_mem2reg(TB_Passes* opt); -TB_API bool tb_pass_loop(TB_Passes* opt); -TB_API bool tb_pass_cfg(TB_Passes* opt); + +TB_API void tb_pass_schedule(TB_Passes* opt); // analysis // print: prints IR in a flattened text form. TB_API bool tb_pass_print(TB_Passes* opt); -TB_API void tb_pass_schedule(TB_Passes* opt); - // codegen TB_API TB_FunctionOutput* tb_pass_codegen(TB_Passes* opt, bool emit_asm); TB_API void tb_pass_kill_node(TB_Passes* opt, TB_Node* n); -TB_API bool tb_pass_mark(TB_Passes* opt, TB_Node* n); +TB_API void tb_pass_mark(TB_Passes* opt, TB_Node* n); TB_API void tb_pass_mark_users(TB_Passes* opt, TB_Node* n); //////////////////////////////// diff --git a/src/tilde/tb.lib b/src/tilde/tb.lib Binary files differindex 2ec6ae708..4b3cdb9b0 100644 --- a/src/tilde/tb.lib +++ b/src/tilde/tb.lib diff --git a/src/tilde/tb_arena.h b/src/tilde/tb_arena.h index 548427139..67ec0e181 100644 --- a/src/tilde/tb_arena.h +++ b/src/tilde/tb_arena.h @@ -20,9 +20,9 @@ #endif enum { - TB_ARENA_SMALL_CHUNK_SIZE = 4 * 1024, - TB_ARENA_MEDIUM_CHUNK_SIZE = 512 * 1024, - TB_ARENA_LARGE_CHUNK_SIZE = 2 * 1024 * 1024, + TB_ARENA_SMALL_CHUNK_SIZE = 4 * 1024, + TB_ARENA_MEDIUM_CHUNK_SIZE = 512 * 1024, + TB_ARENA_LARGE_CHUNK_SIZE = 16 * 1024 * 1024, TB_ARENA_ALIGNMENT = 16, }; |