From 2aaef48c5c362bb3e04d0c9cd1e722e21b3755e5 Mon Sep 17 00:00:00 2001 From: gingerBill Date: Fri, 5 Aug 2016 00:54:05 +0100 Subject: String support --- src/checker/checker.cpp | 37 ++- src/checker/expr.cpp | 5 +- src/checker/stmt.cpp | 22 +- src/codegen/codegen.cpp | 8 +- src/codegen/print.cpp | 540 ------------------------------------------- src/codegen/print_llvm.cpp | 564 +++++++++++++++++++++++++++++++++++++++++++++ src/codegen/ssa.cpp | 323 ++++++++++++++++++-------- src/common.cpp | 80 +------ src/gb/gb.h | 40 +++- src/main.cpp | 2 +- src/parser.cpp | 5 +- src/string.cpp | 262 +++++++++++++++++++++ src/tokenizer.cpp | 77 +++++-- 13 files changed, 1196 insertions(+), 769 deletions(-) delete mode 100644 src/codegen/print.cpp create mode 100644 src/codegen/print_llvm.cpp create mode 100644 src/string.cpp (limited to 'src') diff --git a/src/checker/checker.cpp b/src/checker/checker.cpp index 53ce8ac18..98e371e33 100644 --- a/src/checker/checker.cpp +++ b/src/checker/checker.cpp @@ -219,6 +219,23 @@ void destroy_scope(Scope *scope) { // NOTE(bill): No need to free scope as it "should" be allocated in an arena (except for the global scope) } +void add_scope(Checker *c, AstNode *node, Scope *scope) { + GB_ASSERT(node != NULL); + GB_ASSERT(scope != NULL); + map_set(&c->info.scopes, hash_pointer(node), scope); +} + + +void check_open_scope(Checker *c, AstNode *stmt) { + GB_ASSERT(is_ast_node_stmt(stmt) || stmt->kind == AstNode_ProcType); + Scope *scope = make_scope(c->context.scope, c->allocator); + add_scope(c, stmt, scope); + c->context.scope = scope; +} + +void check_close_scope(Checker *c) { + c->context.scope = c->context.scope->parent; +} void scope_lookup_parent_entity(Scope *s, String name, Scope **scope, Entity **entity) { u64 key = hash_string(name); @@ -359,7 +376,6 @@ void init_checker(Checker *c, Parser *parser) { c->sizes.word_size = 8; c->sizes.max_align = 8; - gb_array_init(c->procedure_stack, a); gb_array_init(c->procedures, a); @@ -487,25 +503,6 @@ void check_procedure_later(Checker *c, AstFile *file, Token token, DeclInfo *dec gb_array_append(c->procedures, info); } - - -void add_scope(Checker *c, AstNode *node, Scope *scope) { - GB_ASSERT(node != NULL); - GB_ASSERT(scope != NULL); - map_set(&c->info.scopes, hash_pointer(node), scope); -} - - -void check_open_scope(Checker *c, AstNode *statement) { - Scope *scope = make_scope(c->context.scope, c->allocator); - add_scope(c, statement, scope); - c->context.scope = scope; -} - -void check_close_scope(Checker *c) { - c->context.scope = c->context.scope->parent; -} - void check_add_deferred_stmt(Checker *c, AstNode *stmt) { GB_ASSERT(stmt != NULL); GB_ASSERT(is_ast_node_stmt(stmt)); diff --git a/src/checker/expr.cpp b/src/checker/expr.cpp index 7a1fdb70e..4129e6e73 100644 --- a/src/checker/expr.cpp +++ b/src/checker/expr.cpp @@ -954,6 +954,7 @@ b32 check_index_value(Checker *c, AstNode *index_value, i64 max_count, i64 *valu } Entity *lookup_field(Type *type, AstNode *field_node, isize *index = NULL) { + GB_ASSERT(type != NULL); GB_ASSERT(field_node->kind == AstNode_Ident); type = get_base_type(type); if (type->kind == Type_Pointer) @@ -1192,7 +1193,7 @@ b32 check_builtin_procedure(Checker *c, Operand *operand, AstNode *call, i32 id) if (is_type_string(t)) { if (operand->mode == Addressing_Constant) { mode = Addressing_Constant; - value = make_exact_value_integer(operand->value.value_string.len); + value = make_exact_value_integer(operand->value.value_string); } else { mode = Addressing_Value; } @@ -1683,7 +1684,6 @@ ExpressionKind check__expr_base(Checker *c, Operand *o, AstNode *node, Type *typ if (o->mode == Addressing_Constant) { max_count = o->value.value_string.len; } - o->mode = Addressing_Value; o->type = t_u8; } break; @@ -1743,6 +1743,7 @@ ExpressionKind check__expr_base(Checker *c, Operand *o, AstNode *node, Type *typ if (o->mode == Addressing_Constant) { max_count = o->value.value_string.len; } + o->type = t_string; o->mode = Addressing_Value; } break; diff --git a/src/checker/stmt.cpp b/src/checker/stmt.cpp index 676e74fa9..67ade56a5 100644 --- a/src/checker/stmt.cpp +++ b/src/checker/stmt.cpp @@ -408,11 +408,15 @@ void check_proc_decl(Checker *c, Entity *e, DeclInfo *d, b32 check_body_later) { e->type = proc_type; ast_node(pd, ProcDecl, d->proc_decl); -#if 1 Scope *original_curr_scope = c->context.scope; c->context.scope = c->global_scope; check_open_scope(c, pd->type); -#endif + defer ({ + check_close_scope(c); + c->context.scope = original_curr_scope; + }); + + check_procedure_type(c, proc_type, pd->type); b32 is_foreign = false; b32 is_inline = false; @@ -455,11 +459,6 @@ void check_proc_decl(Checker *c, Entity *e, DeclInfo *d, b32 check_body_later) { } } -#if 1 - check_close_scope(c); - c->context.scope = original_curr_scope; -#endif - } void check_var_decl(Checker *c, Entity *e, Entity **entities, isize entity_count, AstNode *type_expr, AstNode *init_expr) { @@ -554,8 +553,7 @@ void check_stmt(Checker *c, AstNode *node, u32 flags) { case_end; case_ast_node(ids, IncDecStmt, node); - Token op = {}; - op = ids->op; + Token op = ids->op; switch (ids->op.kind) { case Token_Increment: op.kind = Token_Add; @@ -717,9 +715,9 @@ void check_stmt(Checker *c, AstNode *node, u32 flags) { result_count = proc_type->procedure.results->tuple.variable_count; if (result_count != rs->result_count) { error(&c->error_collector, rs->token, "Expected %td return %s, got %td", - result_count, - (result_count != 1 ? "values" : "value"), - rs->result_count); + result_count, + (result_count != 1 ? "values" : "value"), + rs->result_count); } else if (result_count > 0) { auto *tuple = &proc_type->procedure.results->tuple; check_init_variables(c, tuple->variables, tuple->variable_count, diff --git a/src/codegen/codegen.cpp b/src/codegen/codegen.cpp index f799497ca..412daae3d 100644 --- a/src/codegen/codegen.cpp +++ b/src/codegen/codegen.cpp @@ -1,5 +1,5 @@ #include "ssa.cpp" -#include "print.cpp" +#include "print_llvm.cpp" struct ssaGen { ssaModule module; @@ -7,14 +7,14 @@ struct ssaGen { }; b32 ssa_gen_init(ssaGen *s, Checker *c) { - if (c->error_collector.count != 0) + if (c->error_collector.count > 0) return false; gb_for_array(i, c->parser->files) { AstFile *f = &c->parser->files[i]; - if (f->error_collector.count != 0) + if (f->error_collector.count > 0) return false; - if (f->tokenizer.error_count != 0) + if (f->tokenizer.error_count > 0) return false; } diff --git a/src/codegen/print.cpp b/src/codegen/print.cpp deleted file mode 100644 index f6f1afe42..000000000 --- a/src/codegen/print.cpp +++ /dev/null @@ -1,540 +0,0 @@ -void ssa_fprintf(gbFile *f, char *fmt, ...) { - va_list va; - va_start(va, fmt); - gb_fprintf_va(f, fmt, va); -#if 1 - gb_printf_va(fmt, va); -#endif - va_end(va); -} - - -b32 ssa_valid_char(u8 c) { - if (gb_char_is_alphanumeric(c)) - return true; - - switch (c) { - case '$': - case '-': - case '.': - case '_': - return true; - } - - return false; -} - -void ssa_print_escape_string(gbFile *f, String name) { - isize extra = 0; - for (isize i = 0; i < name.len; i++) { - u8 c = name.text[i]; - if (!ssa_valid_char(c)) - extra += 2; - } - - if (extra == 0) { - ssa_fprintf(f, "%.*s", LIT(name)); - return; - } - - char hex_table[] = "0123456789ABCDEF"; - isize buf_len = name.len + extra; - u8 *buf = gb_alloc_array(gb_heap_allocator(), u8, buf_len); - defer (gb_free(gb_heap_allocator(), buf)); - - isize j = 0; - for (isize i = 0; i < name.len; i++) { - u8 c = name.text[i]; - if (ssa_valid_char(c)) { - buf[j++] = c; - } else { - buf[j] = '\\'; - buf[j+1] = hex_table[c >> 4]; - buf[j+2] = hex_table[c & 0x0f]; - j += 3; - } - } - - gb_file_write(f, buf, buf_len); -} - - - -void ssa_print_encoded_local(gbFile *f, String name) { - ssa_fprintf(f, "%%"); - ssa_print_escape_string(f, name); -} - -void ssa_print_encoded_global(gbFile *f, String name) { - ssa_fprintf(f, "@"); - ssa_print_escape_string(f, name); -} - - -void ssa_print_type(gbFile *f, BaseTypeSizes s, Type *t) { - i64 word_bits = 8*s.word_size; - GB_ASSERT_NOT_NULL(t); - t = default_type(t); - - switch (t->kind) { - case Type_Basic: - switch (t->basic.kind) { - case Basic_bool: ssa_fprintf(f, "i1"); break; - case Basic_i8: ssa_fprintf(f, "i8"); break; - case Basic_i16: ssa_fprintf(f, "i16"); break; - case Basic_i32: ssa_fprintf(f, "i32"); break; - case Basic_i64: ssa_fprintf(f, "i64"); break; - case Basic_u8: ssa_fprintf(f, "i8"); break; - case Basic_u16: ssa_fprintf(f, "i16"); break; - case Basic_u32: ssa_fprintf(f, "i32"); break; - case Basic_u64: ssa_fprintf(f, "i64"); break; - case Basic_f32: ssa_fprintf(f, "float"); break; - case Basic_f64: ssa_fprintf(f, "double"); break; - case Basic_rawptr: ssa_fprintf(f, "void*"); break; - case Basic_string: ssa_fprintf(f, "{i8*, i%lld}", word_bits); break; - case Basic_int: ssa_fprintf(f, "i%lld", word_bits); break; - case Basic_uint: ssa_fprintf(f, "i%lld", word_bits); break; - } - break; - case Type_Array: - ssa_fprintf(f, "[%lld x ", t->array.count); - ssa_print_type(f, s, t->array.element); - ssa_fprintf(f, "]"); - break; - case Type_Slice: - ssa_fprintf(f, "{"); - ssa_print_type(f, s, t->slice.element); - ssa_fprintf(f, "*, i%lld, i%lld}", word_bits, word_bits); - break; - case Type_Structure: - ssa_fprintf(f, "{"); - for (isize i = 0; i < t->structure.field_count; i++) { - if (i > 0) ssa_fprintf(f, ", "); - ssa_print_type(f, s, t->structure.fields[i]->type); - } - ssa_fprintf(f, "}"); - break; - case Type_Pointer: - ssa_print_type(f, s, t->pointer.element); - ssa_fprintf(f, "*"); - break; - case Type_Named: - ssa_print_encoded_local(f, t->named.name); - break; - case Type_Alias: - ssa_print_type(f, s, t->alias.base); - break; - case Type_Tuple: - if (t->tuple.variable_count == 1) { - ssa_print_type(f, s, t->tuple.variables[0]->type); - } else { - ssa_fprintf(f, "{"); - for (isize i = 0; i < t->tuple.variable_count; i++) { - if (i > 0) ssa_fprintf(f, ", "); - ssa_print_type(f, s, t->tuple.variables[i]->type); - } - ssa_fprintf(f, "}"); - } - break; - case Type_Procedure: - if (t->procedure.result_count == 0) - ssa_fprintf(f, "void"); - else - ssa_print_type(f, s, t->procedure.results); - ssa_fprintf(f, " ("); - for (isize i = 0; i < t->procedure.param_count; i++) { - if (i > 0) ssa_fprintf(f, ", "); - ssa_print_type(f, s, &t->procedure.params[i]); - } - ssa_fprintf(f, ")*"); - break; - } -} - -void ssa_print_exact_value(gbFile *f, ssaModule *m, ExactValue value, Type *type) { - switch (value.kind) { - case ExactValue_Bool: - ssa_fprintf(f, (value.value_bool ? "true" : "false")); - break; - case ExactValue_String: { - ssa_fprintf(f, "c\""); - // TODO(bill): Make unquote string function - String unquoted = value.value_string; - unquoted.text++; - unquoted.len -= 2; - ssa_print_escape_string(f, unquoted); - ssa_fprintf(f, "\""); - } break; - case ExactValue_Integer: - ssa_fprintf(f, "%lld", value.value_integer); - break; - case ExactValue_Float: { - u64 u = 0; - if (is_type_float(type) && type->basic.kind == Basic_f32) { - // IMPORTANT NOTE(bill): LLVM requires all floating point constants to be - // a 64 bit number if bits_of(float type) <= 64. - // To overcome this problem, fill the "bottom" 32 bits with zeros - // https://groups.google.com/forum/#!topic/llvm-dev/IlqV3TbSk6M - f32 fp = cast(f32)value.value_float; - u = *cast(u32 *)&fp; - u <<= 32; - - } else { - u = *cast(u64 *)&value.value_float; - } - ssa_fprintf(f, "0x%llx", u); - } break; - case ExactValue_Pointer: - if (value.value_float == NULL) { - ssa_fprintf(f, "null"); - } else { - GB_PANIC("TODO(bill): ExactValue_Pointer"); - } - break; - default: - GB_PANIC("Invalid ExactValue"); - break; - } -} - -void ssa_print_block_name(gbFile *f, ssaBlock *b) { - ssa_fprintf(f, "\""); - ssa_print_escape_string(f, b->label); - ssa_fprintf(f, " - %d", b->id); - ssa_fprintf(f, "\""); -} - -void ssa_print_value(gbFile *f, ssaModule *m, ssaValue *value, Type *type_hint) { - if (value == NULL) { - ssa_fprintf(f, "!!!NULL_VALUE"); - return; - } - switch (value->kind) { - case ssaValue_Constant: - ssa_print_exact_value(f, m, value->constant.value, type_hint); - break; - case ssaValue_TypeName: - ssa_print_encoded_local(f, value->type_name.entity->token.string); - break; - case ssaValue_Global: - ssa_print_encoded_global(f, value->global.entity->token.string); - break; - case ssaValue_Param: - ssa_print_encoded_local(f, value->param.entity->token.string); - break; - case ssaValue_Proc: - ssa_print_encoded_global(f, value->proc.entity->token.string); - break; - case ssaValue_Instr: - ssa_fprintf(f, "%%%d", value->id); - break; - } -} - -void ssa_print_instr(gbFile *f, ssaModule *m, ssaValue *value) { - GB_ASSERT(value->kind == ssaValue_Instr); - ssaInstr *instr = &value->instr; - - ssa_fprintf(f, "\t"); - switch (instr->kind) { - case ssaInstr_Local: { - Type *type = instr->local.entity->type; - ssa_fprintf(f, "%%%d = alloca ", value->id); - ssa_print_type(f, m->sizes, type); - ssa_fprintf(f, ", align %lld ", type_align_of(m->sizes, gb_heap_allocator(), type)); - { - String str = instr->local.entity->token.string; - if (str.len > 0) - ssa_fprintf(f, "; %.*s", LIT(instr->local.entity->token.string)); - } - ssa_fprintf(f, "\n"); - ssa_fprintf(f, "\tstore "); - ssa_print_type(f, m->sizes, type); - ssa_fprintf(f, " zeroinitializer, "); - ssa_print_type(f, m->sizes, type); - ssa_fprintf(f, "* %%%d\n", value->id); - } break; - - case ssaInstr_Store: { - Type *type = ssa_value_type(instr->store.address); - ssa_fprintf(f, "store "); - ssa_print_type(f, m->sizes, type); - ssa_fprintf(f, " "); - ssa_print_value(f, m, instr->store.value, type); - ssa_fprintf(f, ", "); - ssa_print_type(f, m->sizes, type); - ssa_fprintf(f, "* "); - ssa_print_value(f, m, instr->store.address, type); - ssa_fprintf(f, "\n"); - } break; - - case ssaInstr_Load: { - Type *type = instr->load.type; - ssa_fprintf(f, "%%%d = load ", value->id); - ssa_print_type(f, m->sizes, type); - ssa_fprintf(f, ", "); - ssa_print_type(f, m->sizes, type); - ssa_fprintf(f, "* "); - ssa_print_value(f, m, instr->load.address, type); - ssa_fprintf(f, "\n"); - } break; - - case ssaInstr_GetElementPtr: { - Type *et = instr->get_element_ptr.element_type; - ssa_fprintf(f, "%%%d = getelementptr ", value->id); - if (instr->get_element_ptr.inbounds) - ssa_fprintf(f, "inbounds "); - - ssa_print_type(f, m->sizes, et); - ssa_fprintf(f, ", "); - ssa_print_type(f, m->sizes, et); - ssa_fprintf(f, "* "); - ssa_print_value(f, m, instr->get_element_ptr.address, et); - for (isize i = 0; i < instr->get_element_ptr.index_count; i++) { - ssaValue *index = instr->get_element_ptr.indices[i]; - Type *t = ssa_value_type(index); - ssa_fprintf(f, ", "); - ssa_print_type(f, m->sizes, t); - ssa_fprintf(f, " "); - ssa_print_value(f, m, index, t); - } - ssa_fprintf(f, "\n"); - } break; - - case ssaInstr_Br: { - ssa_fprintf(f, "br "); - if (instr->br.cond != NULL) { - ssa_print_type(f, m->sizes, t_bool); - ssa_fprintf(f, " "); - ssa_print_value(f, m, instr->br.cond, t_bool); - ssa_fprintf(f, ", ", instr->br.cond->id); - } - ssa_fprintf(f, "label "); - ssa_fprintf(f, "%%"); ssa_print_block_name(f, instr->br.true_block); - if (instr->br.false_block != NULL) { - ssa_fprintf(f, ", label "); - ssa_fprintf(f, "%%"); ssa_print_block_name(f, instr->br.false_block); - } - ssa_fprintf(f, "\n"); - } break; - - case ssaInstr_Ret: { - auto *ret = &instr->ret; - ssa_fprintf(f, "ret "); - if (ret->value == NULL) { - ssa_fprintf(f, "void"); - } else { - Type *t = ssa_value_type(ret->value); - ssa_print_type(f, m->sizes, t); - ssa_fprintf(f, " "); - ssa_print_value(f, m, ret->value, t); - } - - ssa_fprintf(f, "\n"); - - } break; - - case ssaInstr_Unreachable: { - ssa_fprintf(f, "unreachable\n"); - } break; - - case ssaInstr_BinaryOp: { - auto *bo = &value->instr.binary_op; - Type *type = ssa_value_type(bo->left); - - ssa_fprintf(f, "%%%d = ", value->id); - - if (gb_is_between(bo->op.kind, Token__ComparisonBegin+1, Token__ComparisonEnd-1)) { - if (is_type_float(type)) { - ssa_fprintf(f, "fcmp "); - switch (bo->op.kind) { - case Token_CmpEq: ssa_fprintf(f, "oeq"); break; - case Token_NotEq: ssa_fprintf(f, "one"); break; - case Token_Lt: ssa_fprintf(f, "olt"); break; - case Token_Gt: ssa_fprintf(f, "ogt"); break; - case Token_LtEq: ssa_fprintf(f, "ole"); break; - case Token_GtEq: ssa_fprintf(f, "oge"); break; - } - } else { - ssa_fprintf(f, "icmp "); - if (bo->op.kind != Token_CmpEq && - bo->op.kind != Token_NotEq) { - if (is_type_unsigned(type)) { - ssa_fprintf(f, "u"); - } else { - ssa_fprintf(f, "s"); - } - } - switch (bo->op.kind) { - case Token_CmpEq: ssa_fprintf(f, "eq"); break; - case Token_NotEq: ssa_fprintf(f, "ne"); break; - case Token_Lt: ssa_fprintf(f, "lt"); break; - case Token_Gt: ssa_fprintf(f, "gt"); break; - case Token_LtEq: ssa_fprintf(f, "le"); break; - case Token_GtEq: ssa_fprintf(f, "ge"); break; - } - } - } else { - if (is_type_float(type)) - ssa_fprintf(f, "f"); - - switch (bo->op.kind) { - case Token_Add: ssa_fprintf(f, "add"); break; - case Token_Sub: ssa_fprintf(f, "sub"); break; - case Token_And: ssa_fprintf(f, "and"); break; - case Token_Or: ssa_fprintf(f, "or"); break; - case Token_Xor: ssa_fprintf(f, "xor"); break; - - case Token_AndNot: GB_PANIC("Token_AndNot Should never be called"); - - case Token_Mul: ssa_fprintf(f, "mul"); break; - - default: { - if (!is_type_float(type)) { - if (is_type_unsigned(type)) ssa_fprintf(f, "u"); - else ssa_fprintf(f, "s"); - } - - switch (bo->op.kind) { - case Token_Quo: ssa_fprintf(f, "div"); break; - case Token_Mod: ssa_fprintf(f, "rem"); break; - } - } break; - } - } - - ssa_fprintf(f, " "); - ssa_print_type(f, m->sizes, type); - ssa_fprintf(f, " "); - ssa_print_value(f, m, bo->left, type); - ssa_fprintf(f, ", "); - ssa_print_value(f, m, bo->right, type); - ssa_fprintf(f, "\n"); - - } break; - - case ssaInstr_Call: { - auto *call = &instr->call; - if (call->type) { - ssa_fprintf(f, "%%%d = ", value->id); - } - ssa_fprintf(f, "call "); - if (call->type) { - ssa_print_type(f, m->sizes, call->type); - } else { - ssa_fprintf(f, "void"); - } - ssa_fprintf(f, " "); - ssa_print_value(f, m, call->value, call->type); - - - ssa_fprintf(f, "("); - for (isize i = 0; i < call->arg_count; i++) { - ssaValue *arg = call->args[i]; - Type *t = ssa_value_type(arg); - if (i > 0) { - ssa_fprintf(f, ", "); - } - ssa_print_type(f, m->sizes, t); - ssa_fprintf(f, " "); - ssa_print_value(f, m, arg, t); - } - ssa_fprintf(f, ")\n"); - - } break; - - default: - ssa_fprintf(f, "; %d\n", instr->kind); - break; - } -} - -void ssa_print_llvm_ir(gbFile *f, ssaModule *m) { - if (m->layout.len > 0) { - ssa_fprintf(f, "target datalayout = %.*s\n", LIT(m->layout)); - } - - gb_for_array(member_index, m->members.entries) { - auto *entry = &m->members.entries[member_index]; - ssaValue *v = entry->value; - switch (v->kind) { - case ssaValue_TypeName: { - ssa_print_encoded_local(f, v->type_name.entity->token.string); - ssa_fprintf(f, " = type "); - ssa_print_type(f, m->sizes, get_base_type(v->type_name.type)); - ssa_fprintf(f, "\n"); - } break; - - case ssaValue_Global: { - auto *g = &v->global; - ssa_print_encoded_global(f, g->entity->token.string); - ssa_fprintf(f, " = "); - if (g->is_constant) { - ssa_fprintf(f, "private constant "); - } else { - ssa_fprintf(f, "global "); - } - - ssa_print_type(f, m->sizes, get_base_type(g->entity->type)); - ssa_fprintf(f, " "); - ssa_print_value(f, m, g->value, g->entity->type); - ssa_fprintf(f, "\n"); - } break; - - case ssaValue_Proc: { - ssaProcedure *proc = &v->proc; - if (proc->body == NULL) { - ssa_fprintf(f, "declare "); - } else { - ssa_fprintf(f, "define "); - } - - auto *proc_type = &proc->entity->type->procedure; - - if (proc_type->result_count == 0) { - ssa_fprintf(f, "void"); - } else { - ssa_print_type(f, m->sizes, proc_type->results); - } - - ssa_fprintf(f, " "); - - ssa_print_encoded_global(f, proc->name); - ssa_fprintf(f, "("); - - if (proc_type->param_count > 0) { - auto *params = &proc_type->params->tuple; - for (isize i = 0; i < params->variable_count; i++) { - Entity *e = params->variables[i]; - if (i > 0) - ssa_fprintf(f, ", "); - ssa_print_type(f, m->sizes, e->type); - ssa_fprintf(f, " %%%.*s", LIT(e->token.string)); - } - } - - ssa_fprintf(f, ") "); - - if (proc->body == NULL) { - ssa_fprintf(f, "\n"); - } else { - ssa_fprintf(f, "{\n"); - gb_for_array(i, proc->blocks) { - ssaBlock *block = proc->blocks[i]; - - if (i > 0) ssa_fprintf(f, "\n"); - ssa_print_block_name(f, block); - ssa_fprintf(f, ":\n"); - - gb_for_array(j, block->instrs) { - ssaValue *value = block->instrs[j]; - ssa_print_instr(f, m, value); - } - } - ssa_fprintf(f, "}\n\n"); - } - - } break; - } - } -} diff --git a/src/codegen/print_llvm.cpp b/src/codegen/print_llvm.cpp new file mode 100644 index 000000000..8f0d0a413 --- /dev/null +++ b/src/codegen/print_llvm.cpp @@ -0,0 +1,564 @@ +#define SSA_PRINT_TO_STDOUT 0 + +void ssa_fprintf(gbFile *f, char *fmt, ...) { + va_list va; + va_start(va, fmt); + gb_fprintf_va(f, fmt, va); +#if SSA_PRINT_TO_STDOUT + gb_printf_va(fmt, va); +#endif + va_end(va); +} + +void ssa_file_write(gbFile *f, void *data, isize len) { + gb_file_write(f, data, len); +#if SSA_PRINT_TO_STDOUT + gb_file_write(gb_file_get_standard(gbFileStandard_Output), data, len); +#endif +} + +b32 ssa_valid_char(u8 c) { + if (c >= 0x80) + return false; + + if (gb_char_is_alphanumeric(c)) + return true; + + switch (c) { + case '$': + case '-': + case '.': + case '_': + return true; + } + + return false; +} + +void ssa_print_escape_string(gbFile *f, String name) { + isize extra = 0; + for (isize i = 0; i < name.len; i++) { + u8 c = name.text[i]; + if (!ssa_valid_char(c)) + extra += 2; + } + + if (extra == 0) { + ssa_fprintf(f, "%.*s", LIT(name)); + return; + } + + char hex_table[] = "0123456789ABCDEF"; + isize buf_len = name.len + extra; + u8 *buf = gb_alloc_array(gb_heap_allocator(), u8, buf_len); + defer (gb_free(gb_heap_allocator(), buf)); + + isize j = 0; + for (isize i = 0; i < name.len; i++) { + u8 c = name.text[i]; + if (ssa_valid_char(c)) { + buf[j++] = c; + } else { + buf[j] = '\\'; + buf[j+1] = hex_table[c >> 4]; + buf[j+2] = hex_table[c & 0x0f]; + j += 3; + } + } + + ssa_file_write(f, buf, buf_len); +} + + + +void ssa_print_encoded_local(gbFile *f, String name) { + ssa_fprintf(f, "%%"); + ssa_print_escape_string(f, name); +} + +void ssa_print_encoded_global(gbFile *f, String name) { + ssa_fprintf(f, "@"); + ssa_print_escape_string(f, name); +} + + +void ssa_print_type(gbFile *f, BaseTypeSizes s, Type *t) { + i64 word_bits = 8*s.word_size; + GB_ASSERT_NOT_NULL(t); + t = default_type(t); + + switch (t->kind) { + case Type_Basic: + switch (t->basic.kind) { + case Basic_bool: ssa_fprintf(f, "i1"); break; + case Basic_i8: ssa_fprintf(f, "i8"); break; + case Basic_i16: ssa_fprintf(f, "i16"); break; + case Basic_i32: ssa_fprintf(f, "i32"); break; + case Basic_i64: ssa_fprintf(f, "i64"); break; + case Basic_u8: ssa_fprintf(f, "i8"); break; + case Basic_u16: ssa_fprintf(f, "i16"); break; + case Basic_u32: ssa_fprintf(f, "i32"); break; + case Basic_u64: ssa_fprintf(f, "i64"); break; + case Basic_f32: ssa_fprintf(f, "float"); break; + case Basic_f64: ssa_fprintf(f, "double"); break; + case Basic_rawptr: ssa_fprintf(f, "void*"); break; + case Basic_string: ssa_fprintf(f, "{i8*, i%lld}", word_bits); break; + case Basic_uint: ssa_fprintf(f, "i%lld", word_bits); break; + case Basic_int: ssa_fprintf(f, "i%lld", word_bits); break; + } + break; + case Type_Array: + ssa_fprintf(f, "[%lld x ", t->array.count); + ssa_print_type(f, s, t->array.element); + ssa_fprintf(f, "]"); + break; + case Type_Slice: + ssa_fprintf(f, "{"); + ssa_print_type(f, s, t->slice.element); + ssa_fprintf(f, "*, i%lld, i%lld}", word_bits, word_bits); + break; + case Type_Structure: + ssa_fprintf(f, "{"); + for (isize i = 0; i < t->structure.field_count; i++) { + if (i > 0) { + ssa_fprintf(f, ", "); + } + ssa_print_type(f, s, t->structure.fields[i]->type); + } + ssa_fprintf(f, "}"); + break; + case Type_Pointer: + ssa_print_type(f, s, t->pointer.element); + ssa_fprintf(f, "*"); + break; + case Type_Named: + ssa_print_encoded_local(f, t->named.name); + break; + case Type_Alias: + ssa_print_type(f, s, t->alias.base); + break; + case Type_Tuple: + if (t->tuple.variable_count == 1) { + ssa_print_type(f, s, t->tuple.variables[0]->type); + } else { + ssa_fprintf(f, "{"); + for (isize i = 0; i < t->tuple.variable_count; i++) { + if (i > 0) ssa_fprintf(f, ", "); + ssa_print_type(f, s, t->tuple.variables[i]->type); + } + ssa_fprintf(f, "}"); + } + break; + case Type_Procedure: + if (t->procedure.result_count == 0) { + ssa_fprintf(f, "void"); + } else { + ssa_print_type(f, s, t->procedure.results); + } + ssa_fprintf(f, " ("); + for (isize i = 0; i < t->procedure.param_count; i++) { + if (i > 0) { + ssa_fprintf(f, ", "); + } + ssa_print_type(f, s, &t->procedure.params[i]); + } + ssa_fprintf(f, ")*"); + break; + } +} + +void ssa_print_exact_value(gbFile *f, ssaModule *m, ExactValue value, Type *type) { + switch (value.kind) { + case ExactValue_Bool: + ssa_fprintf(f, (value.value_bool ? "true" : "false")); + break; + case ExactValue_String: { + ssa_fprintf(f, "c\""); + ssa_print_escape_string(f, value.value_string); + ssa_fprintf(f, "\""); + } break; + case ExactValue_Integer: + ssa_fprintf(f, "%lld", value.value_integer); + break; + case ExactValue_Float: { + u64 u = 0; + if (is_type_float(type) && type->basic.kind == Basic_f32) { + // IMPORTANT NOTE(bill): LLVM requires all floating point constants to be + // a 64 bit number if bits_of(float type) <= 64. + // To overcome this problem, fill the "bottom" 32 bits with zeros + // https://groups.google.com/forum/#!topic/llvm-dev/IlqV3TbSk6M + f32 fp = cast(f32)value.value_float; + u = *cast(u32 *)&fp; + u <<= 32; + + } else { + u = *cast(u64 *)&value.value_float; + } + ssa_fprintf(f, "0x%llx", u); + } break; + case ExactValue_Pointer: + if (value.value_float == NULL) { + ssa_fprintf(f, "null"); + } else { + GB_PANIC("TODO(bill): ExactValue_Pointer"); + } + break; + default: + GB_PANIC("Invalid ExactValue"); + break; + } +} + +void ssa_print_block_name(gbFile *f, ssaBlock *b) { + ssa_fprintf(f, "\""); + ssa_print_escape_string(f, b->label); + ssa_fprintf(f, " - %d", b->id); + ssa_fprintf(f, "\""); +} + +void ssa_print_value(gbFile *f, ssaModule *m, ssaValue *value, Type *type_hint) { + if (value == NULL) { + ssa_fprintf(f, "!!!NULL_VALUE"); + return; + } + switch (value->kind) { + case ssaValue_Constant: + ssa_print_exact_value(f, m, value->constant.value, type_hint); + break; + case ssaValue_TypeName: + ssa_print_encoded_local(f, value->type_name.entity->token.string); + break; + case ssaValue_Global: + ssa_print_encoded_global(f, value->global.entity->token.string); + break; + case ssaValue_Param: + ssa_print_encoded_local(f, value->param.entity->token.string); + break; + case ssaValue_Proc: + ssa_print_encoded_global(f, value->proc.entity->token.string); + break; + case ssaValue_Instr: + ssa_fprintf(f, "%%%d", value->id); + break; + } +} + +void ssa_print_instr(gbFile *f, ssaModule *m, ssaValue *value) { + GB_ASSERT(value->kind == ssaValue_Instr); + ssaInstr *instr = &value->instr; + + ssa_fprintf(f, "\t"); + switch (instr->kind) { + case ssaInstr_Local: { + Type *type = instr->local.entity->type; + ssa_fprintf(f, "%%%d = alloca ", value->id); + ssa_print_type(f, m->sizes, type); + ssa_fprintf(f, ", align %lld ", type_align_of(m->sizes, gb_heap_allocator(), type)); + { + String str = instr->local.entity->token.string; + if (str.len > 0) + ssa_fprintf(f, "; %.*s", LIT(instr->local.entity->token.string)); + } + ssa_fprintf(f, "\n"); + ssa_fprintf(f, "\tstore "); + ssa_print_type(f, m->sizes, type); + ssa_fprintf(f, " zeroinitializer, "); + ssa_print_type(f, m->sizes, type); + ssa_fprintf(f, "* %%%d\n", value->id); + } break; + + case ssaInstr_Store: { + Type *type = ssa_value_type(instr->store.address); + ssa_fprintf(f, "store "); + ssa_print_type(f, m->sizes, type); + ssa_fprintf(f, " "); + ssa_print_value(f, m, instr->store.value, type); + ssa_fprintf(f, ", "); + ssa_print_type(f, m->sizes, type); + ssa_fprintf(f, "* "); + ssa_print_value(f, m, instr->store.address, type); + ssa_fprintf(f, "\n"); + } break; + + case ssaInstr_Load: { + Type *type = instr->load.type; + ssa_fprintf(f, "%%%d = load ", value->id); + ssa_print_type(f, m->sizes, type); + ssa_fprintf(f, ", "); + ssa_print_type(f, m->sizes, type); + ssa_fprintf(f, "* "); + ssa_print_value(f, m, instr->load.address, type); + ssa_fprintf(f, "\n"); + } break; + + case ssaInstr_GetElementPtr: { + Type *et = instr->get_element_ptr.element_type; + ssa_fprintf(f, "%%%d = getelementptr ", value->id); + if (instr->get_element_ptr.inbounds) + ssa_fprintf(f, "inbounds "); + + ssa_print_type(f, m->sizes, et); + ssa_fprintf(f, ", "); + ssa_print_type(f, m->sizes, et); + ssa_fprintf(f, "* "); + ssa_print_value(f, m, instr->get_element_ptr.address, et); + for (isize i = 0; i < instr->get_element_ptr.index_count; i++) { + ssaValue *index = instr->get_element_ptr.indices[i]; + Type *t = ssa_value_type(index); + ssa_fprintf(f, ", "); + ssa_print_type(f, m->sizes, t); + ssa_fprintf(f, " "); + ssa_print_value(f, m, index, t); + } + ssa_fprintf(f, "\n"); + } break; + + case ssaInstr_Br: { + ssa_fprintf(f, "br "); + if (instr->br.cond != NULL) { + ssa_print_type(f, m->sizes, t_bool); + ssa_fprintf(f, " "); + ssa_print_value(f, m, instr->br.cond, t_bool); + ssa_fprintf(f, ", ", instr->br.cond->id); + } + ssa_fprintf(f, "label "); + ssa_fprintf(f, "%%"); ssa_print_block_name(f, instr->br.true_block); + if (instr->br.false_block != NULL) { + ssa_fprintf(f, ", label "); + ssa_fprintf(f, "%%"); ssa_print_block_name(f, instr->br.false_block); + } + ssa_fprintf(f, "\n"); + } break; + + case ssaInstr_Ret: { + auto *ret = &instr->ret; + ssa_fprintf(f, "ret "); + if (ret->value == NULL) { + ssa_fprintf(f, "void"); + } else { + Type *t = ssa_value_type(ret->value); + ssa_print_type(f, m->sizes, t); + ssa_fprintf(f, " "); + ssa_print_value(f, m, ret->value, t); + } + + ssa_fprintf(f, "\n"); + + } break; + + case ssaInstr_Conv: { + auto *c = &instr->conv; + ssa_fprintf(f, "%%%d = %.*s ", value->id, LIT(ssa_conv_strings[c->kind])); + ssa_print_type(f, m->sizes, c->from); + ssa_fprintf(f, " "); + ssa_print_value(f, m, c->value, c->from); + ssa_fprintf(f, " to "); + ssa_print_type(f, m->sizes, c->to); + ssa_fprintf(f, "\n"); + + } break; + + case ssaInstr_Unreachable: { + ssa_fprintf(f, "unreachable\n"); + } break; + + case ssaInstr_BinaryOp: { + auto *bo = &value->instr.binary_op; + Type *type = ssa_value_type(bo->left); + + ssa_fprintf(f, "%%%d = ", value->id); + + if (gb_is_between(bo->op.kind, Token__ComparisonBegin+1, Token__ComparisonEnd-1)) { + if (is_type_float(type)) { + ssa_fprintf(f, "fcmp "); + switch (bo->op.kind) { + case Token_CmpEq: ssa_fprintf(f, "oeq"); break; + case Token_NotEq: ssa_fprintf(f, "one"); break; + case Token_Lt: ssa_fprintf(f, "olt"); break; + case Token_Gt: ssa_fprintf(f, "ogt"); break; + case Token_LtEq: ssa_fprintf(f, "ole"); break; + case Token_GtEq: ssa_fprintf(f, "oge"); break; + } + } else { + ssa_fprintf(f, "icmp "); + if (bo->op.kind != Token_CmpEq && + bo->op.kind != Token_NotEq) { + if (is_type_unsigned(type)) { + ssa_fprintf(f, "u"); + } else { + ssa_fprintf(f, "s"); + } + } + switch (bo->op.kind) { + case Token_CmpEq: ssa_fprintf(f, "eq"); break; + case Token_NotEq: ssa_fprintf(f, "ne"); break; + case Token_Lt: ssa_fprintf(f, "lt"); break; + case Token_Gt: ssa_fprintf(f, "gt"); break; + case Token_LtEq: ssa_fprintf(f, "le"); break; + case Token_GtEq: ssa_fprintf(f, "ge"); break; + } + } + } else { + if (is_type_float(type)) + ssa_fprintf(f, "f"); + + switch (bo->op.kind) { + case Token_Add: ssa_fprintf(f, "add"); break; + case Token_Sub: ssa_fprintf(f, "sub"); break; + case Token_And: ssa_fprintf(f, "and"); break; + case Token_Or: ssa_fprintf(f, "or"); break; + case Token_Xor: ssa_fprintf(f, "xor"); break; + + case Token_AndNot: GB_PANIC("Token_AndNot Should never be called"); + + case Token_Mul: ssa_fprintf(f, "mul"); break; + + default: { + if (!is_type_float(type)) { + if (is_type_unsigned(type)) ssa_fprintf(f, "u"); + else ssa_fprintf(f, "s"); + } + + switch (bo->op.kind) { + case Token_Quo: ssa_fprintf(f, "div"); break; + case Token_Mod: ssa_fprintf(f, "rem"); break; + } + } break; + } + } + + ssa_fprintf(f, " "); + ssa_print_type(f, m->sizes, type); + ssa_fprintf(f, " "); + ssa_print_value(f, m, bo->left, type); + ssa_fprintf(f, ", "); + ssa_print_value(f, m, bo->right, type); + ssa_fprintf(f, "\n"); + + } break; + + case ssaInstr_Call: { + auto *call = &instr->call; + if (call->type) { + ssa_fprintf(f, "%%%d = ", value->id); + } + ssa_fprintf(f, "call "); + if (call->type) { + ssa_print_type(f, m->sizes, call->type); + } else { + ssa_fprintf(f, "void"); + } + ssa_fprintf(f, " "); + ssa_print_value(f, m, call->value, call->type); + + + ssa_fprintf(f, "("); + for (isize i = 0; i < call->arg_count; i++) { + ssaValue *arg = call->args[i]; + Type *t = ssa_value_type(arg); + if (i > 0) { + ssa_fprintf(f, ", "); + } + ssa_print_type(f, m->sizes, t); + ssa_fprintf(f, " "); + ssa_print_value(f, m, arg, t); + } + ssa_fprintf(f, ")\n"); + + } break; + + default: + ssa_fprintf(f, "; %d\n", instr->kind); + break; + } +} + +void ssa_print_llvm_ir(gbFile *f, ssaModule *m) { + if (m->layout.len > 0) { + ssa_fprintf(f, "target datalayout = %.*s\n", LIT(m->layout)); + } + + gb_for_array(member_index, m->members.entries) { + auto *entry = &m->members.entries[member_index]; + ssaValue *v = entry->value; + switch (v->kind) { + case ssaValue_TypeName: { + ssa_print_encoded_local(f, v->type_name.entity->token.string); + ssa_fprintf(f, " = type "); + ssa_print_type(f, m->sizes, get_base_type(v->type_name.type)); + ssa_fprintf(f, "\n"); + } break; + + case ssaValue_Global: { + auto *g = &v->global; + ssa_print_encoded_global(f, g->entity->token.string); + ssa_fprintf(f, " = "); + if (g->is_constant) { + ssa_fprintf(f, "private constant "); + } else { + ssa_fprintf(f, "global "); + } + + ssa_print_type(f, m->sizes, get_base_type(g->entity->type)); + ssa_fprintf(f, " "); + ssa_print_value(f, m, g->value, g->entity->type); + ssa_fprintf(f, "\n"); + } break; + + case ssaValue_Proc: { + ssaProcedure *proc = &v->proc; + if (proc->body == NULL) { + ssa_fprintf(f, "declare "); + } else { + ssa_fprintf(f, "define "); + } + + auto *proc_type = &proc->entity->type->procedure; + + if (proc_type->result_count == 0) { + ssa_fprintf(f, "void"); + } else { + ssa_print_type(f, m->sizes, proc_type->results); + } + + ssa_fprintf(f, " "); + + ssa_print_encoded_global(f, proc->name); + ssa_fprintf(f, "("); + + if (proc_type->param_count > 0) { + auto *params = &proc_type->params->tuple; + for (isize i = 0; i < params->variable_count; i++) { + Entity *e = params->variables[i]; + if (i > 0) + ssa_fprintf(f, ", "); + ssa_print_type(f, m->sizes, e->type); + ssa_fprintf(f, " %%%.*s", LIT(e->token.string)); + } + } + + ssa_fprintf(f, ") "); + + if (proc->body == NULL) { + ssa_fprintf(f, "\n"); + } else { + ssa_fprintf(f, "{\n"); + gb_for_array(i, proc->blocks) { + ssaBlock *block = proc->blocks[i]; + + if (i > 0) ssa_fprintf(f, "\n"); + ssa_print_block_name(f, block); + ssa_fprintf(f, ":\n"); + + gb_for_array(j, block->instrs) { + ssaValue *value = block->instrs[j]; + ssa_print_instr(f, m, value); + } + } + ssa_fprintf(f, "}\n\n"); + } + + } break; + } + } +} diff --git a/src/codegen/ssa.cpp b/src/codegen/ssa.cpp index b387df1fc..db91a4093 100644 --- a/src/codegen/ssa.cpp +++ b/src/codegen/ssa.cpp @@ -59,7 +59,7 @@ struct ssaProcedure { SSA_INSTR_KIND(Store), \ SSA_INSTR_KIND(Load), \ SSA_INSTR_KIND(GetElementPtr), \ - SSA_INSTR_KIND(Convert), \ + SSA_INSTR_KIND(Conv), \ SSA_INSTR_KIND(Br), \ SSA_INSTR_KIND(Ret), \ SSA_INSTR_KIND(Unreachable), \ @@ -79,20 +79,31 @@ String const ssa_instr_strings[] = { #undef SSA_INSTR_KIND }; -enum ssaConversionKind { - ssaConversion_Invalid, - - ssaConversion_ZExt, - ssaConversion_FPExt, - ssaConversion_FPToUI, - ssaConversion_FPToSI, - ssaConversion_UIToFP, - ssaConversion_SIToFP, - ssaConversion_PtrToInt, - ssaConversion_IntToPtr, - ssaConversion_BitCast, +#define SSA_CONV_KINDS \ + SSA_CONV_KIND(Invalid), \ + SSA_CONV_KIND(trunc), \ + SSA_CONV_KIND(zext), \ + SSA_CONV_KIND(fptrunc), \ + SSA_CONV_KIND(fpext), \ + SSA_CONV_KIND(fptoui), \ + SSA_CONV_KIND(fptosi), \ + SSA_CONV_KIND(uitofp), \ + SSA_CONV_KIND(sitofp), \ + SSA_CONV_KIND(ptrtoint), \ + SSA_CONV_KIND(inttoptr), \ + SSA_CONV_KIND(bitcast), \ + SSA_CONV_KIND(Count) + +enum ssaConvKind { +#define SSA_CONV_KIND(x) GB_JOIN2(ssaConv_, x) + SSA_CONV_KINDS +#undef SSA_CONV_KIND +}; - ssaConversion_Count, +String const ssa_conv_strings[] = { +#define SSA_CONV_KIND(x) {cast(u8 *)#x, gb_size_of(#x)-1} + SSA_CONV_KINDS +#undef SSA_CONV_KIND }; struct ssaInstr { @@ -124,10 +135,10 @@ struct ssaInstr { b32 inbounds; } get_element_ptr; struct { - ssaConversionKind kind; + ssaConvKind kind; ssaValue *value; Type *from, *to; - } conversion; + } conv; struct { ssaValue *cond; ssaBlock *true_block; @@ -266,6 +277,8 @@ Type *ssa_instr_type(ssaInstr *instr) { return instr->get_element_ptr.result_type; case ssaInstr_BinaryOp: return instr->binary_op.type; + case ssaInstr_Conv: + return instr->conv.to; } return NULL; } @@ -287,6 +300,9 @@ void ssa_instr_set_type(ssaInstr *instr, Type *type) { case ssaInstr_BinaryOp: instr->binary_op.type = type; break; + case ssaInstr_Conv: + instr->conv.to = type; + break; } } @@ -481,6 +497,18 @@ ssaValue *ssa_make_instr_call(ssaProcedure *p, ssaValue *value, ssaValue **args, return v; } +ssaValue *ssa_make_instr_conv(ssaProcedure *p, ssaConvKind kind, ssaValue *value, Type *from, Type *to) { + ssaValue *v = ssa_alloc_instr(p->module->allocator, ssaInstr_Conv); + v->instr.conv.kind = kind; + v->instr.conv.value = value; + v->instr.conv.from = from; + v->instr.conv.to = to; + if (p->curr_block) { + gb_array_append(p->curr_block->values, v); + } + return v; +} + @@ -736,16 +764,73 @@ ssaValue *ssa_emit_conv(ssaProcedure *proc, ssaValue *value, Type *t) { if (are_types_identical(t, src_type)) return value; - Type *dst = get_base_type(t); Type *src = get_base_type(src_type); + Type *dst = get_base_type(t); if (value->kind == ssaValue_Constant) { if (dst->kind == Type_Basic) return ssa_make_value_constant(proc->module->allocator, t, value->constant.value); } + // integer -> integer + if (is_type_integer(src) && is_type_integer(dst)) { + i64 sz = basic_type_sizes[src->basic.kind]; + i64 dz = basic_type_sizes[dst->basic.kind]; + ssaConvKind kind = ssaConv_trunc; + if (dz >= sz) { + kind = ssaConv_zext; + } + return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst)); + } + + // float -> float + if (is_type_float(src) && is_type_float(dst)) { + i64 sz = basic_type_sizes[src->basic.kind]; + i64 dz = basic_type_sizes[dst->basic.kind]; + ssaConvKind kind = ssaConv_fptrunc; + if (dz >= sz) { + kind = ssaConv_fpext; + } + return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst)); + } + + // float -> integer + if (is_type_float(src) && is_type_integer(dst)) { + ssaConvKind kind = ssaConv_fptosi; + if (is_type_unsigned(dst)) { + kind = ssaConv_fptoui; + } + return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst)); + } + + // integer -> float + if (is_type_integer(src) && is_type_float(dst)) { + ssaConvKind kind = ssaConv_sitofp; + if (is_type_unsigned(dst)) { + kind = ssaConv_uitofp; + } + return ssa_emit(proc, ssa_make_instr_conv(proc, kind, value, src, dst)); + } + + // Pointer to int + if (is_type_pointer(src) && is_type_integer(dst)) { + return ssa_emit(proc, ssa_make_instr_conv(proc, ssaConv_ptrtoint, value, src, dst)); + } + + // int to Pointer + if (is_type_integer(src) && is_type_pointer(dst)) { + return ssa_emit(proc, ssa_make_instr_conv(proc, ssaConv_inttoptr, value, src, dst)); + } + + // Pointer to Pointer + if (is_type_pointer(src) && is_type_pointer(dst)) { + return ssa_emit(proc, ssa_make_instr_conv(proc, ssaConv_bitcast, value, src, dst)); + } + GB_PANIC("TODO(bill): ssa_emit_conv"); + GB_PANIC("TODO(bill): string -> []byte"); + GB_PANIC("TODO(bill): []byte -> string"); return NULL; } @@ -930,6 +1015,32 @@ ssaValue *ssa_emit_slice(ssaProcedure *proc, Type *slice_type, ssaValue *base, s return ssa_emit_load(proc, slice); } +ssaValue *ssa_emit_substring(ssaProcedure *proc, ssaValue *base, ssaValue *low, ssaValue *high) { + Type *bt = get_base_type(ssa_value_type(base)); + GB_ASSERT(bt == t_string); + if (low == NULL) { + low = v_zero; + } + if (high == NULL) { + high = ssa_string_len(proc, base); + } + + Token op_sub = {Token_Sub}; + ssaValue *len = ssa_emit_arith(proc, op_sub, high, low, t_int); + + ssaValue *elem = ssa_string_elem(proc, base); + elem = ssa_emit_ptr_offset(proc, elem, low); + + ssaValue *str = ssa_add_local_generated(proc, t_string); + ssaValue *gep = NULL; + gep = ssa_emit_struct_gep(proc, str, v_zero32, ssa_value_type(elem)); + ssa_emit_store(proc, gep, elem); + + gep = ssa_emit_struct_gep(proc, str, v_one32, t_int); + ssa_emit_store(proc, gep, len); + + return ssa_emit_load(proc, str); +} ssaValue *ssa_add_global_string_array(ssaProcedure *proc, ExactValue value) { @@ -944,13 +1055,11 @@ ssaValue *ssa_add_global_string_array(ssaProcedure *proc, ExactValue value) { String name = make_string(str, len-1); Token token = {Token_String}; token.string = name; - // TODO(bill): unquote function - Type *type = make_type_array(a, t_u8, value.value_string.len-2); + Type *type = make_type_array(a, t_u8, value.value_string.len); Entity *entity = make_entity_constant(a, NULL, token, type, value); ssaValue *v = ssa_make_value_constant(a, type, value); ssaValue *g = ssa_make_value_global(a, entity, v); - g->global.is_constant = true; map_set(&proc->module->values, hash_pointer(entity), g); map_set(&proc->module->members, hash_string(name), g); @@ -971,39 +1080,6 @@ ssaValue *ssa_emit_string(ssaProcedure *proc, ssaValue *elem, ssaValue *len) { return ssa_emit_load(proc, str); } -ssaValue *ssa_emit_call(ssaProcedure *proc, AstNode *expr, Type *result_type) { - ast_node(ce, CallExpr, expr); - - ssaValue *value = ssa_build_expr(proc, ce->proc); - Type *proc_type_ = ssa_value_type(value); - GB_ASSERT(proc_type_->kind == Type_Procedure); - auto *type = &proc_type_->procedure; - - isize arg_index = 0; - isize arg_count = type->param_count; - ssaValue **args = gb_alloc_array(proc->module->allocator, ssaValue *, arg_count); - - for (AstNode *arg = ce->arg_list; arg != NULL; arg = arg->next) { - ssaValue *a = ssa_build_expr(proc, arg); - Type *at = ssa_value_type(a); - if (at->kind == Type_Tuple) { - GB_PANIC("TODO(bill): tuple call arguments"); - } else { - args[arg_index++] = a; - } - } - - for (isize i = 0; i < arg_count; i++) { - Entity *e = type->params->tuple.variables[i]; - args[i] = ssa_emit_conv(proc, args[i], e->type); - } - - ssaValue *call = ssa_make_instr_call(proc, value, args, arg_count, result_type); - return ssa_emit(proc, call); -} - - - ssaValue *ssa_build_single_expr(ssaProcedure *proc, AstNode *expr, TypeAndValue *tv) { switch (expr->kind) { @@ -1105,7 +1181,8 @@ ssaValue *ssa_build_single_expr(ssaProcedure *proc, AstNode *expr, TypeAndValue case_end; case_ast_node(ce, CastExpr, expr); - GB_PANIC("TODO(bill): ssa_build_single_expr CastExpr"); + ssaValue *v = ssa_build_expr(proc, ce->expr); + return ssa_emit_conv(proc, v, tv->type); case_end; case_ast_node(ce, CallExpr, expr); @@ -1113,33 +1190,90 @@ ssaValue *ssa_build_single_expr(ssaProcedure *proc, AstNode *expr, TypeAndValue if (p->kind == AstNode_Ident) { Entity **found = map_get(&proc->module->info->uses, hash_pointer(p)); if (found && (*found)->kind == Entity_Builtin) { - GB_PANIC("TODO(bill): CallExpr Builtin"); + Entity *e = *found; + switch (e->builtin.id) { + case BuiltinProcedure_len: { + ssaValue *v = ssa_lvalue_address(ssa_build_addr(proc, ce->arg_list), proc); + Type *t = get_base_type(ssa_value_type(v)); + if (t == t_string) + return ssa_string_len(proc, v); + else if (t->kind == Type_Slice) + return ssa_slice_len(proc, v); + } break; + case BuiltinProcedure_cap: { + ssaValue *v = ssa_lvalue_address(ssa_build_addr(proc, ce->arg_list), proc); + Type *t = get_base_type(ssa_value_type(v)); + if (t == t_string) + return ssa_string_cap(proc, v); + else if (t->kind == Type_Slice) + return ssa_slice_cap(proc, v); + } break; + case BuiltinProcedure_copy: { + GB_PANIC("TODO(bill): BuiltinProcedure_copy"); + } break; + case BuiltinProcedure_print: { + GB_PANIC("TODO(bill): BuiltinProcedure_print"); + } break; + case BuiltinProcedure_println: { + GB_PANIC("TODO(bill): BuiltinProcedure_println"); + } break; + } } } - return ssa_emit_call(proc, expr, tv->type); + + // NOTE(bill): Regular call + ssaValue *value = ssa_build_expr(proc, ce->proc); + Type *proc_type_ = ssa_value_type(value); + GB_ASSERT(proc_type_->kind == Type_Procedure); + auto *type = &proc_type_->procedure; + + isize arg_index = 0; + isize arg_count = type->param_count; + ssaValue **args = gb_alloc_array(proc->module->allocator, ssaValue *, arg_count); + + for (AstNode *arg = ce->arg_list; arg != NULL; arg = arg->next) { + ssaValue *a = ssa_build_expr(proc, arg); + Type *at = ssa_value_type(a); + if (at->kind == Type_Tuple) { + GB_PANIC("TODO(bill): tuple call arguments"); + } else { + args[arg_index++] = a; + } + } + + for (isize i = 0; i < arg_count; i++) { + Entity *e = type->params->tuple.variables[i]; + args[i] = ssa_emit_conv(proc, args[i], e->type); + } + + ssaValue *call = ssa_make_instr_call(proc, value, args, arg_count, tv->type); + return ssa_emit(proc, call); case_end; case_ast_node(se, SliceExpr, expr); - ssaValue *base = NULL; ssaValue *low = NULL; ssaValue *high = NULL; ssaValue *max = NULL; - switch (tv->type->kind) { - case Type_Slice: - case Type_Array: - base = ssa_lvalue_address(ssa_build_addr(proc, se->expr), proc); - break; - case Type_Basic: - GB_PANIC("SliceExpr Type_Basic"); - break; - } if (se->low != NULL) low = ssa_build_expr(proc, se->low); if (se->high != NULL) high = ssa_build_expr(proc, se->high); if (se->triple_indexed) max = ssa_build_expr(proc, se->max); - return ssa_emit_slice(proc, tv->type, base, low, high, max); + switch (tv->type->kind) { + case Type_Slice: + case Type_Array: { + ssaValue *base = ssa_lvalue_address(ssa_build_addr(proc, se->expr), proc); + return ssa_emit_slice(proc, tv->type, base, low, high, max); + } break; + case Type_Basic: { + // NOTE(bill): max is not needed + ssaValue *base = ssa_lvalue_address(ssa_build_addr(proc, se->expr), proc); + return ssa_emit_substring(proc, base, low, high); + } break; + } + + GB_PANIC("Unknown slicable type"); case_end; case_ast_node(ie, IndexExpr, expr); @@ -1234,17 +1368,17 @@ ssaLvalue ssa_build_addr(ssaProcedure *proc, AstNode *expr) { ssaValue *elem = ssa_slice_elem(proc, slice); v = ssa_emit_ptr_offset(proc, elem, index); } break; - case Type_Pointer: { - ssaValue *ptr = ssa_emit_load(proc, ssa_lvalue_address(ssa_build_addr(proc, ie->expr), proc)); - ssaValue *index = ssa_emit_conv(proc, ssa_build_expr(proc, ie->index), t_int); - v = ssa_emit_ptr_offset(proc, ptr, index); - } break; - case Type_Basic: { // string + case Type_Basic: { // Basic_string ssaValue *str = ssa_lvalue_address(ssa_build_addr(proc, ie->expr), proc); ssaValue *index = ssa_emit_conv(proc, ssa_build_expr(proc, ie->index), t_int); ssaValue *elem = ssa_string_elem(proc, str); v = ssa_emit_ptr_offset(proc, elem, index); } break; + case Type_Pointer: { + ssaValue *ptr = ssa_emit_load(proc, ssa_lvalue_address(ssa_build_addr(proc, ie->expr), proc)); + ssaValue *index = ssa_emit_conv(proc, ssa_build_expr(proc, ie->index), t_int); + v = ssa_emit_ptr_offset(proc, ptr, index); + } break; } // NOTE(bill): lvalue address encodes the pointer, thus the deref @@ -1318,12 +1452,12 @@ void ssa_build_stmt_list(ssaProcedure *proc, AstNode *list) { ssa_build_stmt(proc, stmt); } -void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { - switch (s->kind) { - case_ast_node(bs, EmptyStmt, s); +void ssa_build_stmt(ssaProcedure *proc, AstNode *node) { + switch (node->kind) { + case_ast_node(bs, EmptyStmt, node); case_end; - case_ast_node(vd, VarDecl, s); + case_ast_node(vd, VarDecl, node); if (vd->kind == Declaration_Mutable) { if (vd->name_count == vd->value_count) { // 1:1 assigment gbArray(ssaLvalue) lvals; @@ -1365,7 +1499,7 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { } case_end; - case_ast_node(ids, IncDecStmt, s); + case_ast_node(ids, IncDecStmt, node); Token op = ids->op; if (op.kind == Token_Increment) { op.kind = Token_Add; @@ -1378,7 +1512,7 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { case_end; - case_ast_node(as, AssignStmt, s); + case_ast_node(as, AssignStmt, node); switch (as->op.kind) { case Token_Eq: { gbArray(ssaLvalue) lvals; @@ -1397,7 +1531,6 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { if (as->lhs_count == as->rhs_count) { if (as->lhs_count == 1) { - AstNode *lhs = as->lhs_list; AstNode *rhs = as->rhs_list; ssaValue *init = ssa_build_expr(proc, rhs); ssa_lvalue_store(lvals[0], proc, init); @@ -1435,19 +1568,20 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { } case_end; - case_ast_node(es, ExprStmt, s); - ssaValue *value = ssa_build_expr(proc, es->expr); + case_ast_node(es, ExprStmt, node); + // NOTE(bill): No need to use return value + ssa_build_expr(proc, es->expr); case_end; - case_ast_node(bs, BlockStmt, s) + case_ast_node(bs, BlockStmt, node); ssa_build_stmt_list(proc, bs->list); case_end; - case_ast_node(bs, DeferStmt, s); + case_ast_node(bs, DeferStmt, node); GB_PANIC("DeferStmt"); case_end; - case_ast_node(rs, ReturnStmt, s); + case_ast_node(rs, ReturnStmt, node); ssaValue *v = NULL; auto *return_type_tuple = &proc->type->procedure.results->tuple; isize return_count = proc->type->procedure.result_count; @@ -1482,12 +1616,12 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { case_end; - case_ast_node(is, IfStmt, s); + case_ast_node(is, IfStmt, node); if (is->init != NULL) { ssa_build_stmt(proc, is->init); } - ssaBlock *then = ssa_add_block(proc, s, make_string("if.then")); - ssaBlock *done = ssa__make_block(proc, s, make_string("if.done")); // NOTE(bill): Append later + ssaBlock *then = ssa_add_block(proc, node, make_string("if.then")); + ssaBlock *done = ssa__make_block(proc, node, make_string("if.done")); // NOTE(bill): Append later ssaBlock *else_ = done; if (is->else_stmt != NULL) { else_ = ssa_add_block(proc, is->else_stmt, make_string("if.else")); @@ -1507,21 +1641,21 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { proc->curr_block = done; case_end; - case_ast_node(fs, ForStmt, s); + case_ast_node(fs, ForStmt, node); if (fs->init != NULL) { ssa_build_stmt(proc, fs->init); } - ssaBlock *body = ssa_add_block(proc, s, make_string("for.body")); - ssaBlock *done = ssa__make_block(proc, s, make_string("for.done")); // NOTE(bill): Append later + ssaBlock *body = ssa_add_block(proc, node, make_string("for.body")); + ssaBlock *done = ssa__make_block(proc, node, make_string("for.done")); // NOTE(bill): Append later ssaBlock *loop = body; if (fs->cond != NULL) { - loop = ssa_add_block(proc, fs->cond, make_string("for.loop")); + loop = ssa_add_block(proc, node, make_string("for.loop")); } ssaBlock *cont = loop; if (fs->post != NULL) { - cont = ssa_add_block(proc, fs->cond, make_string("for.post")); + cont = ssa_add_block(proc, node, make_string("for.post")); } ssa_emit_jump(proc, loop); proc->curr_block = loop; @@ -1545,7 +1679,7 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { case_end; - case_ast_node(bs, BranchStmt, s); + case_ast_node(bs, BranchStmt, node); ssaBlock *block = NULL; switch (bs->token.kind) { #define BRANCH_GET_BLOCK(kind_) \ @@ -1557,6 +1691,7 @@ void ssa_build_stmt(ssaProcedure *proc, AstNode *s) { BRANCH_GET_BLOCK(break); BRANCH_GET_BLOCK(continue); BRANCH_GET_BLOCK(fallthrough); + #undef BRANCH_GET_BLOCK } ssa_emit_jump(proc, block); ssa_emit_unreachable(proc); diff --git a/src/common.cpp b/src/common.cpp index fa99247c3..d1905f1e8 100644 --- a/src/common.cpp +++ b/src/common.cpp @@ -2,78 +2,7 @@ #define GB_IMPLEMENTATION #include "gb/gb.h" -// NOTE(bill): Used for UTF-8 strings -typedef struct String { - u8 *text; - isize len; -} String; -// NOTE(bill): used for printf style arguments -#define LIT(x) (x).len, (x).text - - - - -gb_inline String make_string(u8 *text, isize len) { - String s; - s.text = text; - if (len < 0) - len = gb_strlen(cast(char *)text); - s.len = len; - return s; -} - -gb_inline String make_string(char *text) { - return make_string(cast(u8 *)cast(void *)text, gb_strlen(text)); -} - -gb_inline b32 are_strings_equal(String a, String b) { - if (a.len == b.len) { - return gb_memcompare(a.text, b.text, a.len) == 0; - } - return false; -} - -gb_inline b32 are_strings_equal_ignore_case(String a, String b) { - if (a.len == b.len) { - for (isize i = 0; i < a.len; i++) { - char x = cast(char)a.text[i]; - char y = cast(char)b.text[i]; - if (gb_char_to_lower(x) != gb_char_to_lower(y)) - return false; - } - return true; - } - return false; -} - - -gb_inline isize string_extension_position(String str) { - isize dot_pos = -1; - isize i = str.len; - b32 seen_dot = false; - while (i --> 0) { - if (str.text[i] == GB_PATH_SEPARATOR) - break; - if (str.text[i] == '.') { - dot_pos = i; - break; - } - } - - return dot_pos; -} - -gb_inline b32 string_has_extension(String str, String ext) { - if (str.len > ext.len+1) { - u8 *s = str.text+str.len - ext.len-1; - if (s[0] == '.') { - s++; - return gb_memcompare(s, ext.text, ext.len) == 0; - } - return false; - } - return false; -} +#include "string.cpp" // Hasing @@ -86,7 +15,8 @@ gb_inline u64 hash_string(String s) { } gb_inline u64 hash_pointer(void *ptr) { - u64 p = cast(u64)cast(uintptr)ptr; + uintptr u = cast(uintptr)ptr; + u64 p = cast(u64)u; return p; } @@ -125,9 +55,9 @@ typedef struct MapFindResult { template struct MapEntry { - u64 key; + u64 key; isize next; - T value; + T value; }; template diff --git a/src/gb/gb.h b/src/gb/gb.h index cd14c6be1..2afc3b131 100644 --- a/src/gb/gb.h +++ b/src/gb/gb.h @@ -1370,6 +1370,7 @@ GB_DEF u8 * gb_ucs2_to_utf8_buf(u16 const *str); // NOTE(bill): Uses locally per // NOTE(bill): Returns size of codepoint in bytes GB_DEF isize gb_utf8_decode (u8 const *str, isize str_len, Rune *codepoint); GB_DEF isize gb_utf8_codepoint_size(u8 const *str, isize str_len); +GB_DEF isize gb_utf8_encode_rune (u8 buf[4], Rune r); //////////////////////////////////////////////////////////////// // @@ -5871,7 +5872,7 @@ gb_inline i32 gb_hex_digit_to_int(char c) { return c - 'a' + 10; else if (gb_is_between(c, 'A', 'F')) return c - 'A' + 10; - return 0; + return -1; } @@ -6691,6 +6692,43 @@ isize gb_utf8_codepoint_size(u8 const *str, isize str_len) { return i+1; } +isize gb_utf8_encode_rune(u8 buf[4], Rune r) { + u32 i = cast(u32)r; + u8 mask = 0x3f; + if (i <= (1<<7)-1) { + buf[0] = cast(u8)r; + return 1; + } + if (i <= (1<<11)-1) { + buf[0] = 0xc0 | cast(u8)(r>>6); + buf[1] = 0x80 | cast(u8)(r)&mask; + return 2; + } + + // Invalid or Surrogate range + if (i > GB_RUNE_MAX || + gb_is_between(i, 0xd800, 0xdfff)) { + r = GB_RUNE_INVALID; + + buf[0] = 0xe0 | cast(u8)(r>>12); + buf[1] = 0x80 | cast(u8)(r>>6)&mask; + buf[2] = 0x80 | cast(u8)(r)&mask; + return 3; + } + + if (i <= (1<<16)-1) { + buf[0] = 0xe0 | cast(u8)(r>>12); + buf[1] = 0x80 | cast(u8)(r>>6)&mask; + buf[2] = 0x80 | cast(u8)(r)&mask; + return 3; + } + + buf[0] = 0xf0 | cast(u8)(r>>18); + buf[1] = 0x80 | cast(u8)(r>>12)&mask; + buf[2] = 0x80 | cast(u8)(r>>6)&mask; + buf[3] = 0x80 | cast(u8)(r)&mask; + return 4; +} diff --git a/src/main.cpp b/src/main.cpp index c892d051a..dbcda7397 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -33,7 +33,7 @@ int main(int argc, char **argv) { check_parsed_files(&checker); ssaGen ssa = {}; - if (false && ssa_gen_init(&ssa, &checker)) { + if (ssa_gen_init(&ssa, &checker)) { defer (ssa_gen_destroy(&ssa)); ssa_gen_code(&ssa); diff --git a/src/parser.cpp b/src/parser.cpp index 184668c4b..faf462cd8 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -2063,10 +2063,7 @@ void parse_file(Parser *p, AstFile *f) { } else { if (node->kind == AstNode_ImportDecl) { auto *id = &node->ImportDecl; - String file = id->filepath.string; - String file_str = {}; - if (file.text[0] == '"') - file_str = make_string(file.text+1, file.len-2); + String file_str = id->filepath.string; char ext[] = ".odin"; isize ext_len = gb_size_of(ext)-1; diff --git a/src/string.cpp b/src/string.cpp new file mode 100644 index 000000000..44c814897 --- /dev/null +++ b/src/string.cpp @@ -0,0 +1,262 @@ + +// NOTE(bill): Used for UTF-8 strings +typedef struct String { + u8 *text; + isize len; +} String; +// NOTE(bill): used for printf style arguments +#define LIT(x) (x).len, (x).text + + + + +gb_inline String make_string(u8 *text, isize len) { + String s; + s.text = text; + if (len < 0) + len = gb_strlen(cast(char *)text); + s.len = len; + return s; +} + +gb_inline String make_string(char *text) { + return make_string(cast(u8 *)cast(void *)text, gb_strlen(text)); +} + +gb_inline b32 are_strings_equal(String a, String b) { + if (a.len == b.len) { + return gb_memcompare(a.text, b.text, a.len) == 0; + } + return false; +} + +gb_inline b32 are_strings_equal_ignore_case(String a, String b) { + if (a.len == b.len) { + for (isize i = 0; i < a.len; i++) { + char x = cast(char)a.text[i]; + char y = cast(char)b.text[i]; + if (gb_char_to_lower(x) != gb_char_to_lower(y)) + return false; + } + return true; + } + return false; +} + + +gb_inline isize string_extension_position(String str) { + isize dot_pos = -1; + isize i = str.len; + b32 seen_dot = false; + while (i --> 0) { + if (str.text[i] == GB_PATH_SEPARATOR) + break; + if (str.text[i] == '.') { + dot_pos = i; + break; + } + } + + return dot_pos; +} + +gb_inline b32 string_has_extension(String str, String ext) { + if (str.len > ext.len+1) { + u8 *s = str.text+str.len - ext.len-1; + if (s[0] == '.') { + s++; + return gb_memcompare(s, ext.text, ext.len) == 0; + } + return false; + } + return false; +} + +b32 string_contains_char(String s, u8 c) { + for (isize i = 0; i < s.len; i++) { + if (s.text[i] == c) + return true; + } + return false; +} + +b32 unquote_char(String s, u8 quote, Rune *rune, b32 *multi, String *tail_string) { + if (s.text[0] == quote && + (quote == '\'' || quote == '"')) { + return false; + } else if (s.text[0] >= 0x80) { + Rune r = -1; + isize size = gb_utf8_decode(s.text, s.len, &r); + *rune = r; + *tail_string = make_string(s.text+size, s.len-size); + return true; + } else if (s.text[0] != '\\') { + *rune = s.text[0]; + *tail_string = make_string(s.text+1, s.len-1); + return true; + } + + if (s.len <= 1) { + return false; + } + u8 c = s.text[1]; + s = make_string(s.text+2, s.len-2); + + switch (c) { + default: return false; + + case 'a': *rune = '\a'; break; + case 'b': *rune = '\b'; break; + case 'f': *rune = '\f'; break; + case 'n': *rune = '\n'; break; + case 'r': *rune = '\r'; break; + case 't': *rune = '\t'; break; + case 'v': *rune = '\v'; break; + case '\\': *rune = '\\'; break; + + + case '\'': + case '"': + if (c != quote) { + return false; + } + *rune = c; + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': { + i32 r = c - '0'; + if (s.len < 2) { + return false; + } + for (isize i = 0; i < 2; i++) { + i32 d = s.text[i] - '0'; + if (d < 0 || d > 7) { + return false; + } + r = (r<<3) | d; + } + s = make_string(s.text+2, s.len-2); + if (r > 0xff) { + return false; + } + *rune = r; + } break; + + case 'x': + case 'u': + case 'U': { + isize n = 0; + switch (c) { + case 'x': n = 2; break; + case 'u': n = 4; break; + case 'U': n = 8; break; + } + + Rune r = 0; + if (s.len < n) { + return false; + } + for (isize i = 0; i < n; i++) { + i32 d = gb_hex_digit_to_int(s.text[i]); + if (d < 0) { + return false; + } + r = (r<<4) | d; + } + s = make_string(s.text+n, s.len-n); + if (c == 'x') { + *rune = r; + break; + } + if (r > GB_RUNE_MAX) { + return false; + } + *rune = r; + *multi = true; + } break; + } + *tail_string = s; + return true; +} + + +// 0 == failure +// 1 == original memory +// 2 == new allocation +i32 unquote_string(gbAllocator a, String *s_) { + GB_ASSERT(s_ != NULL); + String s = *s_; + isize n = s.len; + if (n < 2) + return 0; + u8 quote = s.text[0]; + if (quote != s.text[n-1]) + return 0; + s.text += 1; + s.len -= 2; + + if (quote == '`') { + if (string_contains_char(s, '`')) { + return 0; + } + *s_ = s; + return 1; + } + if (quote != '"' && quote != '\'') + return 0; + + if (string_contains_char(s, '\n')) + return 0; + + if (!string_contains_char(s, '\\') && !string_contains_char(s, quote)) { + if (quote == '"') { + *s_ = s; + return 1; + } else if (quote == '\'') { + Rune r = GB_RUNE_INVALID; + isize size = gb_utf8_decode(s.text, s.len, &r); + if ((size == s.len) && (r != -1 || size != 1)) { + *s_ = s; + return 1; + } + } + } + + u8 rune_temp[4] = {}; + isize buf_len = 3*s.len / 2; + u8 *buf = gb_alloc_array(a, u8, buf_len); + isize len = 0; + while (s.len > 0) { + String tail_string = {}; + Rune r = 0; + b32 multi = false; + b32 success = unquote_char(s, quote, &r, &multi, &tail_string); + if (!success) { + gb_free(a, buf); + return 0; + } + s = tail_string; + + if (r < 0x80 || !multi) { + buf[len++] = cast(u8)r; + } else { + isize size = gb_utf8_encode_rune(rune_temp, r); + gb_memcopy(buf+len, rune_temp, size); + len += size; + } + + if (quote == '\'' && s.len != 0) { + gb_free(a, buf); + return 0; + } + } + *s_ = make_string(buf, len); + return 2; +} diff --git a/src/tokenizer.cpp b/src/tokenizer.cpp index aa59cf319..7c0565cf2 100644 --- a/src/tokenizer.cpp +++ b/src/tokenizer.cpp @@ -114,9 +114,7 @@ TOKEN_KIND(_KeywordBegin, "_KeywordBegin"), \ TOKEN_KIND(union, "union"), \ TOKEN_KIND(enum, "enum"), \ TOKEN_KIND(_KeywordEnd, "_KeywordEnd"), \ -\ - TOKEN_KIND(Count, ""), \ - + TOKEN_KIND(Count, "") enum TokenKind { #define TOKEN_KIND(e, s) GB_JOIN2(Token_, e) @@ -168,6 +166,7 @@ struct ErrorCollector { }; void error(ErrorCollector *ec, Token token, char *fmt, ...) { + ec->count++; // NOTE(bill): Duplicate error, skip it if (!token_pos_are_equal(ec->prev, token.pos)) { ec->prev = token.pos; @@ -180,7 +179,6 @@ void error(ErrorCollector *ec, Token token, char *fmt, ...) { va_end(va); } - ec->count++; } void warning(Token token, char *fmt, ...) { @@ -266,6 +264,7 @@ struct Tokenizer { isize line_count; isize error_count; + gbArray(String) allocated_strings; }; @@ -342,6 +341,9 @@ TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) { advance_to_next_rune(t); if (t->curr_rune == GB_RUNE_BOM) advance_to_next_rune(t); // Ignore BOM at file beginning + + gb_array_init(t->allocated_strings, gb_heap_allocator()); + return TokenizerInit_None; } @@ -360,12 +362,18 @@ TokenizerInitError init_tokenizer(Tokenizer *t, String fullpath) { if (gb_file_size(&f) == 0) return TokenizerInit_Empty; + + return TokenizerInit_None; } gb_inline void destroy_tokenizer(Tokenizer *t) { - if (t->start != NULL) + if (t->start != NULL) { gb_free(gb_heap_allocator(), t->start); + } + if (t->allocated_strings != NULL) { + gb_array_free(t->allocated_strings); + } } void tokenizer_skip_whitespace(Tokenizer *t) { @@ -624,21 +632,48 @@ Token tokenizer_get_token(Tokenizer *t) { case GB_RUNE_EOF: token.kind = Token_EOF; break; + + case '`': // Raw String Literal case '"': // String Literal + { + Rune quote = curr_rune; token.kind = Token_String; - for (;;) { - Rune r = t->curr_rune; - if (r == '\n' || r < 0) { - tokenizer_err(t, "String literal not terminated"); - break; + if (curr_rune == '"') { + for (;;) { + Rune r = t->curr_rune; + if (r == '\n' || r < 0) { + tokenizer_err(t, "String literal not terminated"); + break; + } + advance_to_next_rune(t); + if (r == quote) + break; + if (r == '\\') + scan_escape(t, '"'); + } + } else { + for (;;) { + Rune r = t->curr_rune; + if (r < 0) { + tokenizer_err(t, "String literal not terminated"); + break; + } + advance_to_next_rune(t); + if (r == quote) + break; } - advance_to_next_rune(t); - if (r == '"') - break; - if (r == '\\') - scan_escape(t, '"'); } - break; + token.string.len = t->curr - token.string.text; + i32 success = unquote_string(gb_heap_allocator(), &token.string); + if (success > 0) { + if (success == 2) { + gb_array_append(t->allocated_strings, token.string); + } + return token; + } else { + tokenizer_err(t, "Invalid string literal"); + } + } break; case '\'': { // Rune Literal b32 valid = true; @@ -663,6 +698,16 @@ Token tokenizer_get_token(Tokenizer *t) { if (valid && len != 1) tokenizer_err(t, "Illegal rune literal"); + token.string.len = t->curr - token.string.text; + i32 success = unquote_string(gb_heap_allocator(), &token.string); + if (success > 0) { + if (success == 2) { + gb_array_append(t->allocated_strings, token.string); + } + return token; + } else { + tokenizer_err(t, "Invalid rune literal"); + } } break; case '.': -- cgit v1.2.3