aboutsummaryrefslogtreecommitdiff
path: root/src/tilde
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2023-09-19 15:13:05 +0100
committergingerBill <bill@gingerbill.org>2023-09-19 15:13:05 +0100
commit6257d0e1a9d9107043e13af410228a298d14abcb (patch)
tree35677554f24da2d66c8fb0c96893a2fc04189b00 /src/tilde
parentb2f1c58321a21c3376e6ba329bdf928da5fabc94 (diff)
parentecde06e3a31179bd8f86383fd65cfbce31ab6d9a (diff)
Merge branch 'master' into windows-llvm-11.1.0windows-llvm-11.1.0
Diffstat (limited to 'src/tilde')
-rw-r--r--src/tilde/tb.h305
-rw-r--r--src/tilde/tb.libbin4229318 -> 4265424 bytes
-rw-r--r--src/tilde/tb_arena.h6
3 files changed, 211 insertions, 100 deletions
diff --git a/src/tilde/tb.h b/src/tilde/tb.h
index 081a84995..5bb98fe70 100644
--- a/src/tilde/tb.h
+++ b/src/tilde/tb.h
@@ -1,3 +1,12 @@
+// Glossary (because i don't know where else to put it)
+// IR - intermediate representation
+// SoN - sea of nodes (https://www.oracle.com/technetwork/java/javase/tech/c2-ir95-150110.pdf)
+// SSA - single static assignment
+// GVN - global value numbering
+// CSE - common subexpression elimination
+// DSE - dead store elimination
+// GCM - global code motion
+// SROA - scalar replacement of aggregates
#ifndef TB_CORE_H
#define TB_CORE_H
@@ -150,18 +159,19 @@ typedef enum TB_ISelMode {
typedef enum TB_DataTypeEnum {
// Integers, note void is an i0 and bool is an i1
- // i(0-2047)
+ // i(0-64)
TB_INT,
// Floating point numbers
// f{32,64}
TB_FLOAT,
// Pointers
- // ptr(0-2047)
TB_PTR,
// Tuples, these cannot be used in memory ops, just accessed via projections
TB_TUPLE,
- // represents control flow as a kind of data
+ // represents control flow for REGION, BRANCH
TB_CONTROL,
+ // represents memory (and I/O)
+ TB_MEMORY,
} TB_DataTypeEnum;
typedef enum TB_FloatFormat {
@@ -179,6 +189,7 @@ typedef union TB_DataType {
};
uint32_t raw;
} TB_DataType;
+static_assert(sizeof(TB_DataType) == 4, "im expecting this to be a uint32_t");
// classify data types
#define TB_IS_VOID_TYPE(x) ((x).type == TB_INT && (x).data == 0)
@@ -192,75 +203,136 @@ typedef union TB_DataType {
#define TB_GET_FLOAT_FORMAT(x) ((x).data)
#define TB_GET_PTR_ADDRSPACE(x) ((x).data)
+////////////////////////////////
+// ANNOTATIONS
+////////////////////////////////
+//
+// (A, B) -> (C, D)
+//
+// node takes A and B, produces C, D. if there's multiple
+// results we need to use projections and the indices are
+// based on the order seen here, proj0 is C, proj1 is D.
+//
+// (A, B) & C -> Int
+//
+// nodes takes A and B along with C in it's extra data. this is
+// where non-node inputs fit.
+//
typedef enum TB_NodeTypeEnum {
TB_NULL = 0,
- // Immediates
+ ////////////////////////////////
+ // CONSTANTS
+ ////////////////////////////////
TB_INTEGER_CONST,
TB_FLOAT32_CONST,
TB_FLOAT64_CONST,
- // only one per function
- TB_START, // fn()
-
- // regions represent the begining of BBs
- TB_REGION, // fn(preds: []region)
-
- // projection
- TB_PROJ,
-
- TB_CALL, // normal call
- TB_SYSCALL, // system call
-
- // Managed ops
- TB_SAFEPOINT,
-
- // Memory operations
- TB_STORE, // fn(r: control, addr: data, src: data)
- TB_MEMCPY,
- TB_MEMSET,
-
- // Atomics
- TB_ATOMIC_TEST_AND_SET,
- TB_ATOMIC_CLEAR,
-
- TB_ATOMIC_LOAD,
- TB_ATOMIC_XCHG,
- TB_ATOMIC_ADD,
- TB_ATOMIC_SUB,
- TB_ATOMIC_AND,
- TB_ATOMIC_XOR,
- TB_ATOMIC_OR,
-
- TB_ATOMIC_CMPXCHG,
- TB_DEBUGBREAK,
-
- // Terminators
- TB_BRANCH,
- TB_RET,
- TB_UNREACHABLE,
- TB_TRAP,
-
- TB_POISON,
-
- // Load
- TB_LOAD,
-
- // Pointers
- TB_LOCAL,
-
- TB_GET_SYMBOL_ADDRESS,
-
- TB_MEMBER_ACCESS,
- TB_ARRAY_ACCESS,
+ ////////////////////////////////
+ // MISCELLANEOUS
+ ////////////////////////////////
+ // this is an unspecified value, usually generated by the optimizer
+ // when malformed input is folded into an operation.
+ TB_POISON, // () -> Any
+ // projections just extract a single field of a tuple
+ TB_PROJ, // Tuple & Int -> Any
+ // this is a simple way to embed machine code into the code
+ TB_MACHINE_OP, // (Control, Memory) & Buffer -> (Control, Memory)
+ // reads the TSC on x64
+ TB_CYCLE_COUNTER, // (Control) -> Int64
+
+ ////////////////////////////////
+ // CONTROL
+ ////////////////////////////////
+ // there's only one START and STOP per function
+ TB_START, // () -> (Control, Memory, Data...)
+ TB_END, // (Control, Memory, Data?) -> ()
+ // regions are used to represent paths which have multiple entries.
+ // each input is a predecessor.
+ TB_REGION, // (Control...) -> (Control)
+ // phi nodes work the same as in SSA CFG, the value is based on which predecessor was taken.
+ // each input lines up with the regions such that region.in[i] will use phi.in[i+1] as the
+ // subsequent data.
+ TB_PHI, // (Control, Data...) -> Data
+ // branch is used to implement most control flow, it acts like a switch
+ // statement in C usually. they take a key and match against some cases,
+ // if they match, it'll jump to that successor, if none match it'll take
+ // the default successor.
+ //
+ // if (cond) { A; } else { B; } is just switch (cond) { case 0: B; default: A; }
+ //
+ // it's possible to not pass a key and the default successor is always called, this is
+ // a GOTO. tb_inst_goto, tb_inst_if can handle common cases for you.
+ TB_BRANCH, // (Control, Data?) -> (Control...)
+ // debugbreak will trap in a continuable manner.
+ TB_DEBUGBREAK, // (Control, Memory) -> (Control)
+ // trap will not be continuable but will stop execution.
+ TB_TRAP, // (Control) -> (Control)
+ // unreachable means it won't trap or be continuable.
+ TB_UNREACHABLE, // (Control) -> (Control)
+
+ ////////////////////////////////
+ // CONTROL + MEMORY
+ ////////////////////////////////
+ // nothing special, it's just a function call, 3rd argument here is the
+ // target pointer (or syscall number) and the rest are just data args.
+ TB_CALL, // (Control, Memory, Data, Data...) -> (Control, Memory, Data)
+ TB_SYSCALL, // (Control, Memory, Data, Data...) -> (Control, Memory, Data)
+ // safepoint polls are the same except they only trigger if the poll site
+ // says to (platform specific but almost always just the page being made
+ // unmapped/guard), 3rd argument is the poll site.
+ TB_SAFEPOINT_POLL, // (Control, Memory, Ptr, Data...) -> (Control)
+
+ ////////////////////////////////
+ // MEMORY
+ ////////////////////////////////
+ // LOAD and STORE are standard memory accesses, they can be folded away.
+ TB_LOAD, // (Memory, Ptr) -> Data
+ TB_STORE, // (Memory, Ptr, Data) -> Memory
+ // bulk memory ops.
+ TB_MEMCPY, // (Memory, Ptr, Ptr, Size) -> Memory
+ TB_MEMSET, // (Memory, Ptr, Int8, Size) -> Memory
+ // these memory accesses represent "volatile" which means
+ // they may produce side effects and thus cannot be eliminated.
+ TB_READ, // (Memory, Ptr) -> (Memory, Data)
+ TB_WRITE, // (Memory, Ptr, Data) -> (Memory, Data)
+ // atomics have multiple observers (if not they wouldn't need to
+ // be atomic) and thus produce side effects everywhere just like
+ // volatiles except they have synchronization guarentees. the atomic
+ // data ops will return the value before the operation is performed.
+ // Atomic CAS return the old value and a boolean for success (true if
+ // the value was changed)
+ TB_ATOMIC_LOAD, // (Memory, Ptr) -> (Memory, Data)
+ TB_ATOMIC_XCHG, // (Memory, Ptr, Data) -> (Memory, Data)
+ TB_ATOMIC_ADD, // (Memory, Ptr, Data) -> (Memory, Data)
+ TB_ATOMIC_SUB, // (Memory, Ptr, Data) -> (Memory, Data)
+ TB_ATOMIC_AND, // (Memory, Ptr, Data) -> (Memory, Data)
+ TB_ATOMIC_XOR, // (Memory, Ptr, Data) -> (Memory, Data)
+ TB_ATOMIC_OR, // (Memory, Ptr, Data) -> (Memory, Data)
+ TB_ATOMIC_CAS, // (Memory, Data, Data) -> (Memory, Data, Bool)
+
+ ////////////////////////////////
+ // POINTERS
+ ////////////////////////////////
+ // LOCAL will statically allocate stack space
+ TB_LOCAL, // () & (Int, Int) -> Ptr
+ // SYMBOL will return a pointer to a TB_Symbol
+ TB_SYMBOL, // () & TB_Symbol* -> Ptr
+ // offsets pointer by constant value
+ TB_MEMBER_ACCESS, // Ptr & Int -> Ptr
+ // arguments represent base, index, and stride respectively
+ // and will perform `base + index*stride`
+ TB_ARRAY_ACCESS, // (Ptr, Int) & Int -> Ptr
+ // converts an integer to a pointer
+ TB_INT2PTR, // Int -> Ptr
+ // converts a pointer to an integer
+ TB_PTR2INT, // Ptr -> Int
// Conversions
TB_TRUNCATE,
TB_FLOAT_EXT,
TB_SIGN_EXT,
TB_ZERO_EXT,
- TB_INT2PTR,
- TB_PTR2INT,
TB_UINT2FLOAT,
TB_FLOAT2UINT,
TB_INT2FLOAT,
@@ -315,18 +387,17 @@ typedef enum TB_NodeTypeEnum {
TB_CMP_FLE,
// Special ops
- // does full multiplication (64x64=128 and so on) returning
- // the low and high values in separate projections
+ // adds two paired integers to two other paired integers and returns
+ // a low and high value
+ TB_ADDPAIR,
+ // does full multiplication (64x64=128 and so on) returning
+ // the low and high values in separate projections
TB_MULPAIR,
- // PHI
- TB_PHI, // fn(r: region, x: []data)
-
// variadic
TB_VA_START,
// x86 intrinsics
- TB_X86INTRIN_RDTSC,
TB_X86INTRIN_LDMXCSR,
TB_X86INTRIN_STMXCSR,
TB_X86INTRIN_SQRT,
@@ -372,6 +443,9 @@ typedef struct TB_FunctionPrototype TB_FunctionPrototype;
typedef struct TB_Attrib TB_Attrib;
+// target-specific, just a unique ID for the registers
+typedef int TB_PhysicalReg;
+
// Refers generically to objects within a module
//
// TB_Function, TB_Global, and TB_External are all subtypes of TB_Symbol
@@ -412,12 +486,11 @@ typedef struct TB_Symbol {
typedef struct TB_Node TB_Node;
struct TB_Node {
TB_NodeType type;
+ uint16_t input_count; // number of node inputs.
TB_DataType dt;
- uint16_t input_count; // number of node inputs
- uint16_t extra_count; // number of bytes for extra operand data
- // local to the TB_Passes
- uint32_t lattice_id;
+ // makes it easier to track in graph walks
+ size_t gvn;
TB_Attrib* attribs;
TB_Node** inputs;
@@ -442,9 +515,8 @@ typedef struct { // TB_PROJ
int index;
} TB_NodeProj;
-typedef struct { // TB_INT
- uint64_t num_words;
- uint64_t words[];
+typedef struct { // TB_INTEGER_CONST
+ uint64_t value;
} TB_NodeInt;
typedef struct { // any compare operator
@@ -457,11 +529,10 @@ typedef struct { // any integer binary operator
typedef struct { // TB_MULPAIR
TB_Node *lo, *hi;
-} TB_NodeMulPair;
+} TB_NodeArithPair;
typedef struct {
TB_CharUnits align;
- bool is_volatile;
} TB_NodeMemAccess;
typedef struct {
@@ -469,6 +540,16 @@ typedef struct {
} TB_NodeLocal;
typedef struct {
+ // this is the raw buffer
+ size_t length;
+ const uint8_t* data;
+
+ // represents the outputs, inputs and temporaries in that order
+ size_t outs, ins, tmps;
+ TB_PhysicalReg regs[];
+} TB_NodeMachineOp;
+
+typedef struct {
float value;
} TB_NodeFloat32;
@@ -491,6 +572,8 @@ typedef struct {
typedef struct {
TB_MemoryOrder order;
TB_MemoryOrder order2;
+ TB_Node* proj0;
+ TB_Node* proj1;
} TB_NodeAtomic;
typedef struct {
@@ -506,9 +589,18 @@ typedef struct {
TB_Node* end;
const char* tag;
+ // position in a postorder walk
+ int postorder_id;
// immediate dominator (can be approximate)
int dom_depth;
TB_Node* dom;
+
+ // used for IR building only, stale after that.
+ //
+ // this represents the first and last memory values within a region,
+ // if a region ever has multiple predecessors we apply a join on these
+ // memory.
+ TB_Node *mem_in, *mem_out;
} TB_NodeRegion;
typedef struct TB_MultiOutput {
@@ -558,7 +650,7 @@ typedef struct {
#define TB_TYPE_F64 TB_DataType{ { TB_FLOAT, 0, TB_FLT_64 } }
#define TB_TYPE_BOOL TB_DataType{ { TB_INT, 0, 1 } }
#define TB_TYPE_PTR TB_DataType{ { TB_PTR, 0, 0 } }
-
+#define TB_TYPE_MEMORY TB_DataType{ { TB_MEMORY,0, 0 } }
#define TB_TYPE_INTN(N) TB_DataType{ { TB_INT, 0, (N) } }
#define TB_TYPE_PTRN(N) TB_DataType{ { TB_PTR, 0, (N) } }
@@ -575,8 +667,9 @@ typedef struct {
#define TB_TYPE_F64 (TB_DataType){ { TB_FLOAT, 0, TB_FLT_64 } }
#define TB_TYPE_BOOL (TB_DataType){ { TB_INT, 0, 1 } }
#define TB_TYPE_PTR (TB_DataType){ { TB_PTR, 0, 0 } }
-#define TB_TYPE_INTN(N) (TB_DataType){ { TB_INT, 0, (N) } }
-#define TB_TYPE_PTRN(N) (TB_DataType){ { TB_PTR, 0, (N) } }
+#define TB_TYPE_MEMORY (TB_DataType){ { TB_MEMORY,0, 0 } }
+#define TB_TYPE_INTN(N) (TB_DataType){ { TB_INT, 0, (N) } }
+#define TB_TYPE_PTRN(N) (TB_DataType){ { TB_PTR, 0, (N) } }
#endif
@@ -737,7 +830,7 @@ TB_API TB_External* tb_next_external(TB_External* e);
// this is used JIT scenarios to tell the compiler what externals map to
TB_API TB_ExternalType tb_extern_get_type(TB_External* e);
-TB_Global* tb_extern_transmute(TB_External* e, TB_DebugType* dbg_type, TB_Linkage linkage);
+TB_API TB_Global* tb_extern_transmute(TB_External* e, TB_DebugType* dbg_type, TB_Linkage linkage);
TB_API TB_External* tb_extern_create(TB_Module* m, ptrdiff_t len, const char* name, TB_ExternalType type);
@@ -884,6 +977,11 @@ TB_API void tb_default_print_callback(void* user_data, const char* fmt, ...);
TB_API void tb_inst_set_location(TB_Function* f, TB_SourceFile* file, int line, int column);
TB_API void tb_inst_reset_location(TB_Function* f);
+// this is where the STOP will be
+TB_API void tb_inst_set_exit_location(TB_Function* f, TB_SourceFile* file, int line, int column);
+
+TB_API bool tb_has_effects(TB_Node* n);
+
// if section is NULL, default to .text
TB_API TB_Function* tb_function_create(TB_Module* m, ptrdiff_t len, const char* name, TB_Linkage linkage, TB_ComdatType comdat);
@@ -927,9 +1025,12 @@ TB_API TB_Node* tb_inst_float2int(TB_Function* f, TB_Node* src, TB_DataType dt,
TB_API TB_Node* tb_inst_bitcast(TB_Function* f, TB_Node* src, TB_DataType dt);
TB_API TB_Node* tb_inst_local(TB_Function* f, TB_CharUnits size, TB_CharUnits align);
+
TB_API TB_Node* tb_inst_load(TB_Function* f, TB_DataType dt, TB_Node* addr, TB_CharUnits align, bool is_volatile);
TB_API void tb_inst_store(TB_Function* f, TB_DataType dt, TB_Node* addr, TB_Node* val, TB_CharUnits align, bool is_volatile);
+TB_API void tb_inst_safepoint_poll(TB_Function* f, TB_Node* addr, int input_count, TB_Node** inputs);
+
TB_API TB_Node* tb_inst_bool(TB_Function* f, bool imm);
TB_API TB_Node* tb_inst_sint(TB_Function* f, TB_DataType dt, int64_t imm);
TB_API TB_Node* tb_inst_uint(TB_Function* f, TB_DataType dt, uint64_t imm);
@@ -939,14 +1040,14 @@ TB_API TB_Node* tb_inst_cstring(TB_Function* f, const char* str);
TB_API TB_Node* tb_inst_string(TB_Function* f, size_t len, const char* str);
// write 'val' over 'count' bytes on 'dst'
-TB_API void tb_inst_memset(TB_Function* f, TB_Node* dst, TB_Node* val, TB_Node* count, TB_CharUnits align, bool is_volatile);
+TB_API void tb_inst_memset(TB_Function* f, TB_Node* dst, TB_Node* val, TB_Node* count, TB_CharUnits align);
// zero 'count' bytes on 'dst'
-TB_API void tb_inst_memzero(TB_Function* f, TB_Node* dst, TB_Node* count, TB_CharUnits align, bool is_volatile);
+TB_API void tb_inst_memzero(TB_Function* f, TB_Node* dst, TB_Node* count, TB_CharUnits align);
// performs a copy of 'count' elements from one memory location to another
// both locations cannot overlap.
-TB_API void tb_inst_memcpy(TB_Function* f, TB_Node* dst, TB_Node* src, TB_Node* count, TB_CharUnits align, bool is_volatile);
+TB_API void tb_inst_memcpy(TB_Function* f, TB_Node* dst, TB_Node* src, TB_Node* count, TB_CharUnits align);
// result = base + (index * stride)
TB_API TB_Node* tb_inst_array_access(TB_Function* f, TB_Node* base, TB_Node* index, int64_t stride);
@@ -1033,9 +1134,9 @@ TB_API TB_Node* tb_inst_cmp_fge(TB_Function* f, TB_Node* a, TB_Node* b);
// General intrinsics
TB_API TB_Node* tb_inst_va_start(TB_Function* f, TB_Node* a);
+TB_API TB_Node* tb_inst_cycle_counter(TB_Function* f);
// x86 Intrinsics
-TB_API TB_Node* tb_inst_x86_rdtsc(TB_Function* f);
TB_API TB_Node* tb_inst_x86_ldmxcsr(TB_Function* f, TB_Node* a);
TB_API TB_Node* tb_inst_x86_stmxcsr(TB_Function* f);
TB_API TB_Node* tb_inst_x86_sqrt(TB_Function* f, TB_Node* a);
@@ -1061,6 +1162,19 @@ TB_API void tb_inst_ret(TB_Function* f, size_t count, TB_Node** values);
////////////////////////////////
// Passes
////////////////////////////////
+typedef enum {
+ // allowed to remove PHIs nodes, this is
+ // helpful because the default IR building
+ // will produce tons of useless memory PHIs.
+ TB_PEEPHOLE_PHI = 1,
+
+ // it's allowed to fold memory operations (store or load elimination)
+ TB_PEEPHOLE_MEMORY = 2,
+
+ // just do every reduction rule i can provide you
+ TB_PEEPHOLE_ALL = 7,
+} TB_PeepholeFlags;
+
// Function analysis, optimizations, and codegen are all part of this
typedef struct TB_Passes TB_Passes;
@@ -1069,35 +1183,32 @@ TB_API TB_Passes* tb_pass_enter(TB_Function* f, TB_Arena* arena);
TB_API void tb_pass_exit(TB_Passes* opt);
// transformation passes:
-// peephole: runs most simple reductions on the code,
-// should be run after any bigger passes (it's incremental
-// so it's not that bad)
+// peephole: 99% of the optimizer, i'm sea of nodes pilled so i
+// break down most optimizations into local rewrites, it's
+// incremental and recommended to run after any non-peephole
+// pass.
//
-// mem2reg: lowers TB_LOCALs into SSA values, this makes more
+// mem2reg: lowers TB_LOCALs into SoN values, this makes more
// data flow analysis possible on the code and allows to codegen
// to place variables into registers.
//
-// cfg: performs simplifications on the CFG like `a && b => select(a, b, 0)`
-// or removing redundant branches.
-//
-// loop: NOT READY
-//
-TB_API bool tb_pass_peephole(TB_Passes* opt);
+// SROA: splits LOCALs into multiple to allow for more dataflow
+// analysis later on.
+TB_API void tb_pass_peephole(TB_Passes* opt, TB_PeepholeFlags flags);
+TB_API void tb_pass_sroa(TB_Passes* opt);
TB_API bool tb_pass_mem2reg(TB_Passes* opt);
-TB_API bool tb_pass_loop(TB_Passes* opt);
-TB_API bool tb_pass_cfg(TB_Passes* opt);
+
+TB_API void tb_pass_schedule(TB_Passes* opt);
// analysis
// print: prints IR in a flattened text form.
TB_API bool tb_pass_print(TB_Passes* opt);
-TB_API void tb_pass_schedule(TB_Passes* opt);
-
// codegen
TB_API TB_FunctionOutput* tb_pass_codegen(TB_Passes* opt, bool emit_asm);
TB_API void tb_pass_kill_node(TB_Passes* opt, TB_Node* n);
-TB_API bool tb_pass_mark(TB_Passes* opt, TB_Node* n);
+TB_API void tb_pass_mark(TB_Passes* opt, TB_Node* n);
TB_API void tb_pass_mark_users(TB_Passes* opt, TB_Node* n);
////////////////////////////////
diff --git a/src/tilde/tb.lib b/src/tilde/tb.lib
index 2ec6ae708..4b3cdb9b0 100644
--- a/src/tilde/tb.lib
+++ b/src/tilde/tb.lib
Binary files differ
diff --git a/src/tilde/tb_arena.h b/src/tilde/tb_arena.h
index 548427139..67ec0e181 100644
--- a/src/tilde/tb_arena.h
+++ b/src/tilde/tb_arena.h
@@ -20,9 +20,9 @@
#endif
enum {
- TB_ARENA_SMALL_CHUNK_SIZE = 4 * 1024,
- TB_ARENA_MEDIUM_CHUNK_SIZE = 512 * 1024,
- TB_ARENA_LARGE_CHUNK_SIZE = 2 * 1024 * 1024,
+ TB_ARENA_SMALL_CHUNK_SIZE = 4 * 1024,
+ TB_ARENA_MEDIUM_CHUNK_SIZE = 512 * 1024,
+ TB_ARENA_LARGE_CHUNK_SIZE = 16 * 1024 * 1024,
TB_ARENA_ALIGNMENT = 16,
};