aboutsummaryrefslogtreecommitdiff
path: root/core
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2024-11-24 12:32:10 +0000
committerflysand7 <thebumboni@gmail.com>2024-12-01 11:54:54 +1100
commitd85de2e54e04709c1e73018fc0e7978a616b93d8 (patch)
treeae98ee3001c5acf7a6ef9c0f2a479d0133f0c7ba /core
parent4d9a9ec3f51f71db723eec93ebef60b5202f894b (diff)
Remove `core:c/frontend`
Diffstat (limited to 'core')
-rw-r--r--core/c/frontend/preprocessor/const_expr.odin25
-rw-r--r--core/c/frontend/preprocessor/preprocess.odin1510
-rw-r--r--core/c/frontend/preprocessor/unquote.odin154
-rw-r--r--core/c/frontend/tokenizer/doc.odin31
-rw-r--r--core/c/frontend/tokenizer/hide_set.odin68
-rw-r--r--core/c/frontend/tokenizer/token.odin169
-rw-r--r--core/c/frontend/tokenizer/tokenizer.odin667
-rw-r--r--core/c/frontend/tokenizer/unicode.odin116
8 files changed, 0 insertions, 2740 deletions
diff --git a/core/c/frontend/preprocessor/const_expr.odin b/core/c/frontend/preprocessor/const_expr.odin
deleted file mode 100644
index ff13f6432..000000000
--- a/core/c/frontend/preprocessor/const_expr.odin
+++ /dev/null
@@ -1,25 +0,0 @@
-package c_frontend_preprocess
-
-import "core:c/frontend/tokenizer"
-
-const_expr :: proc(rest: ^^Token, tok: ^Token) -> i64 {
- // TODO(bill): Handle const_expr correctly
- // This is effectively a mini-parser
-
- assert(rest != nil)
- assert(tok != nil)
- rest^ = tokenizer.new_eof(tok)
- switch v in tok.val {
- case i64:
- return v
- case f64:
- return i64(v)
- case string:
- return 0
- case []u16:
- // TODO
- case []u32:
- // TODO
- }
- return 0
-}
diff --git a/core/c/frontend/preprocessor/preprocess.odin b/core/c/frontend/preprocessor/preprocess.odin
deleted file mode 100644
index b5eab0bb3..000000000
--- a/core/c/frontend/preprocessor/preprocess.odin
+++ /dev/null
@@ -1,1510 +0,0 @@
-package c_frontend_preprocess
-
-import "../tokenizer"
-
-import "core:strings"
-import "core:strconv"
-import "core:path/filepath"
-import "core:unicode/utf8"
-import "core:unicode/utf16"
-import "core:os"
-import "core:io"
-
-@(private)
-Tokenizer :: tokenizer.Tokenizer
-@(private)
-Token :: tokenizer.Token
-
-Error_Handler :: tokenizer.Error_Handler
-
-Macro_Param :: struct {
- next: ^Macro_Param,
- name: string,
-}
-
-Macro_Arg :: struct {
- next: ^Macro_Arg,
- name: string,
- tok: ^Token,
- is_va_args: bool,
-}
-
-Macro_Kind :: enum u8 {
- Function_Like,
- Value_Like,
-}
-
-Macro_Handler :: #type proc(^Preprocessor, ^Token) -> ^Token
-
-Macro :: struct {
- name: string,
- kind: Macro_Kind,
- params: ^Macro_Param,
- va_args_name: string,
- body: ^Token,
- handler: Macro_Handler,
-}
-
-Cond_Incl_State :: enum u8 {
- In_Then,
- In_Elif,
- In_Else,
-}
-
-Cond_Incl :: struct {
- next: ^Cond_Incl,
- tok: ^Token,
- state: Cond_Incl_State,
- included: bool,
-}
-
-Pragma_Handler :: #type proc(^Preprocessor, ^Token)
-
-Preprocessor :: struct {
- // Lookup tables
- macros: map[string]^Macro,
- pragma_once: map[string]bool,
- include_guards: map[string]string,
- filepath_cache: map[string]string,
-
- // Include path data
- include_paths: []string,
-
- // Counter for __COUNTER__ macro
- counter: i64,
-
- // Include information
- cond_incl: ^Cond_Incl,
- include_level: int,
- include_next_index: int,
-
- wide_char_size: int,
-
- // Mutable data
- err: Error_Handler,
- warn: Error_Handler,
- pragma_handler: Pragma_Handler,
- error_count: int,
- warning_count: int,
-}
-
-MAX_INCLUDE_LEVEL :: 1024
-
-error :: proc(cpp: ^Preprocessor, tok: ^Token, msg: string, args: ..any) {
- if cpp.err != nil {
- cpp.err(tok.pos, msg, ..args)
- }
- cpp.error_count += 1
-}
-
-warn :: proc(cpp: ^Preprocessor, tok: ^Token, msg: string, args: ..any) {
- if cpp.warn != nil {
- cpp.warn(tok.pos, msg, ..args)
- }
- cpp.warning_count += 1
-}
-
-is_hash :: proc(tok: ^Token) -> bool {
- return tok.at_bol && tok.lit == "#"
-}
-
-skip_line :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
- tok := tok
- if tok.at_bol {
- return tok
- }
- warn(cpp, tok, "extra token")
- for tok.at_bol {
- tok = tok.next
- }
- return tok
-}
-
-
-append_token :: proc(a, b: ^Token) -> ^Token {
- if a.kind == .EOF {
- return b
- }
-
- head: Token
- curr := &head
-
- for tok := a; tok.kind != .EOF; tok = tok.next {
- curr.next = tokenizer.copy_token(tok)
- curr = curr.next
- }
- curr.next = b
- return head.next
-}
-
-
-is_hex_digit :: proc(x: byte) -> bool {
- switch x {
- case '0'..='9', 'a'..='f', 'A'..='F':
- return true
- }
- return false
-}
-from_hex :: proc(x: byte) -> i32 {
- switch x {
- case '0'..='9':
- return i32(x) - '0'
- case 'a'..='f':
- return i32(x) - 'a' + 10
- case 'A'..='F':
- return i32(x) - 'A' + 10
- }
- return 16
-}
-
-
-convert_pp_number :: proc(tok: ^Token) {
- convert_pp_int :: proc(tok: ^Token) -> bool {
- p := tok.lit
- base := 10
- if len(p) > 2 {
- if strings.equal_fold(p[:2], "0x") && is_hex_digit(p[2]) {
- p = p[2:]
- base = 16
- } else if strings.equal_fold(p[:2], "0b") && p[2] == '0' || p[2] == '1' {
- p = p[2:]
- base = 2
- }
- }
- if base == 10 && p[0] == '0' {
- base = 8
- }
-
-
- tok.val, _ = strconv.parse_i64_of_base(p, base)
-
- l, u: int
-
- suf: [3]byte
- suf_n := 0
- i := len(p)-1
- for /**/; i >= 0 && suf_n < len(suf); i -= 1 {
- switch p[i] {
- case 'l', 'L':
- suf[suf_n] = 'l'
- l += 1
- suf_n += 1
- case 'u', 'U':
- suf[suf_n] = 'u'
- u += 1
- suf_n += 1
- }
- }
- if i < len(p) {
- if !is_hex_digit(p[i]) && p[i] != '.' {
- return false
- }
- }
- if u > 1 {
- return false
- }
-
- if l > 2 {
- return false
- }
-
- if u == 1 {
- switch l {
- case 0: tok.type_hint = .Unsigned_Int
- case 1: tok.type_hint = .Unsigned_Long
- case 2: tok.type_hint = .Unsigned_Long_Long
- }
- } else {
- switch l {
- case 0: tok.type_hint = .Int
- case 1: tok.type_hint = .Long
- case 2: tok.type_hint = .Long_Long
- }
- }
- return true
- }
-
- if convert_pp_int(tok) {
- return
- }
-
- fval, _ := strconv.parse_f64(tok.lit)
- tok.val = fval
-
- end := tok.lit[len(tok.lit)-1]
- switch end {
- case 'f', 'F':
- tok.type_hint = .Float
- case 'l', 'L':
- tok.type_hint = .Long_Double
- case:
- tok.type_hint = .Double
- }
-
-}
-
-convert_pp_char :: proc(tok: ^Token) {
- assert(len(tok.lit) >= 2)
- r, _, _, _ := unquote_char(tok.lit, tok.lit[0])
- tok.val = i64(r)
-
- tok.type_hint = .Int
- switch tok.prefix {
- case "u": tok.type_hint = .UTF_16
- case "U": tok.type_hint = .UTF_32
- case "L": tok.type_hint = .UTF_Wide
- }
-}
-
-wide_char_size :: proc(cpp: ^Preprocessor) -> int {
- char_size := 4
- if cpp.wide_char_size > 0 {
- char_size = clamp(cpp.wide_char_size, 1, 4)
- assert(char_size & (char_size-1) == 0)
- }
- return char_size
-}
-
-convert_pp_string :: proc(cpp: ^Preprocessor, tok: ^Token) {
- assert(len(tok.lit) >= 2)
- str, _, _ := unquote_string(tok.lit)
- tok.val = str
-
- char_size := 1
-
- switch tok.prefix {
- case "u8":
- tok.type_hint = .UTF_8
- char_size = 1
- case "u":
- tok.type_hint = .UTF_16
- char_size = 2
- case "U":
- tok.type_hint = .UTF_32
- char_size = 4
- case "L":
- tok.type_hint = .UTF_Wide
- char_size = wide_char_size(cpp)
- }
-
- switch char_size {
- case 2:
- n: int
- buf := make([]u16, len(str))
- for c in str {
- ch := c
- if ch < 0x10000 {
- buf[n] = u16(ch)
- n += 1
- } else {
- ch -= 0x10000
- buf[n+0] = 0xd800 + u16((ch >> 10) & 0x3ff)
- buf[n+1] = 0xdc00 + u16(ch & 0x3ff)
- n += 2
- }
- }
- tok.val = buf[:n]
- case 4:
- n: int
- buf := make([]u32, len(str))
- for ch in str {
- buf[n] = u32(ch)
- n += 1
- }
- tok.val = buf[:n]
- }
-
-}
-
-convert_pp_token :: proc(cpp: ^Preprocessor, t: ^Token, is_keyword: tokenizer.Is_Keyword_Proc) {
- switch {
- case t.kind == .Char:
- convert_pp_char(t)
- case t.kind == .String:
- convert_pp_string(cpp, t)
- case is_keyword != nil && is_keyword(t):
- t.kind = .Keyword
- case t.kind == .PP_Number:
- convert_pp_number(t)
- }
-}
-convert_pp_tokens :: proc(cpp: ^Preprocessor, tok: ^Token, is_keyword: tokenizer.Is_Keyword_Proc) {
- for t := tok; t != nil && t.kind != .EOF; t = t.next {
- convert_pp_token(cpp, tok, is_keyword)
- }
-}
-
-join_adjacent_string_literals :: proc(cpp: ^Preprocessor, initial_tok: ^Token) {
- for tok1 := initial_tok; tok1.kind != .EOF; /**/ {
- if tok1.kind != .String || tok1.next.kind != .String {
- tok1 = tok1.next
- continue
- }
-
- type_hint := tokenizer.Token_Type_Hint.None
- char_size := 1
-
- start := tok1
- for t := tok1; t != nil && t.kind == .String; t = t.next {
- if t.val == nil {
- convert_pp_string(cpp, t)
- }
- tok1 = t.next
- if type_hint != t.type_hint {
- if t.type_hint != .None && type_hint != .None {
- error(cpp, t, "unsupported non-standard concatenation of string literals of different types")
- }
- prev_char_size := char_size
-
- #partial switch type_hint {
- case .UTF_8: char_size = max(char_size, 1)
- case .UTF_16: char_size = max(char_size, 2)
- case .UTF_32: char_size = max(char_size, 4)
- case .UTF_Wide: char_size = max(char_size, wide_char_size(cpp))
- }
-
- if type_hint == .None || prev_char_size < char_size {
- type_hint = t.type_hint
- }
- }
- }
-
- // NOTE(bill): Verbose logic in order to correctly concantenate strings, even if they different in type
- max_len := 0
- switch char_size {
- case 1:
- for t := start; t != nil && t.kind == .String; t = t.next {
- #partial switch v in t.val {
- case string: max_len += len(v)
- case []u16: max_len += 2*len(v)
- case []u32: max_len += 4*len(v)
- }
- }
- n := 0
- buf := make([]byte, max_len)
- for t := start; t != nil && t.kind == .String; t = t.next {
- #partial switch v in t.val {
- case string:
- n += copy(buf[n:], v)
- case []u16:
- for i := 0; i < len(v); /**/ {
- c1 := v[i]
- r: rune
- if !utf16.is_surrogate(rune(c1)) {
- r = rune(c1)
- i += 1
- } else if i+1 == len(v) {
- r = utf16.REPLACEMENT_CHAR
- i += 1
- } else {
- c2 := v[i+1]
- i += 2
- r = utf16.decode_surrogate_pair(rune(c1), rune(c2))
- }
-
- b, w := utf8.encode_rune(r)
- n += copy(buf[n:], b[:w])
- }
- case []u32:
- for r in v {
- b, w := utf8.encode_rune(rune(r))
- n += copy(buf[n:], b[:w])
- }
- }
- }
-
- new_tok := tokenizer.copy_token(start)
- new_tok.lit = ""
- new_tok.val = string(buf[:n])
- new_tok.next = tok1
- new_tok.type_hint = type_hint
- start^ = new_tok^
- case 2:
- for t := start; t != nil && t.kind == .String; t = t.next {
- #partial switch v in t.val {
- case string: max_len += len(v)
- case []u16: max_len += len(v)
- case []u32: max_len += 2*len(v)
- }
- }
- n := 0
- buf := make([]u16, max_len)
- for t := start; t != nil && t.kind == .String; t = t.next {
- #partial switch v in t.val {
- case string:
- for r in v {
- if r >= 0x10000 {
- c1, c2 := utf16.encode_surrogate_pair(r)
- buf[n+0] = u16(c1)
- buf[n+1] = u16(c2)
- n += 2
- } else {
- buf[n] = u16(r)
- n += 1
- }
- }
- case []u16:
- n += copy(buf[n:], v)
- case []u32:
- for r in v {
- if r >= 0x10000 {
- c1, c2 := utf16.encode_surrogate_pair(rune(r))
- buf[n+0] = u16(c1)
- buf[n+1] = u16(c2)
- n += 2
- } else {
- buf[n] = u16(r)
- n += 1
- }
- }
- }
- }
-
- new_tok := tokenizer.copy_token(start)
- new_tok.lit = ""
- new_tok.val = buf[:n]
- new_tok.next = tok1
- new_tok.type_hint = type_hint
- start^ = new_tok^
- case 4:
- for t := start; t != nil && t.kind == .String; t = t.next {
- #partial switch v in t.val {
- case string: max_len += len(v)
- case []u16: max_len += len(v)
- case []u32: max_len += len(v)
- }
- }
- n := 0
- buf := make([]u32, max_len)
- for t := start; t != nil && t.kind == .String; t = t.next {
- #partial switch v in t.val {
- case string:
- for r in v {
- buf[n] = u32(r)
- n += 1
- }
- case []u16:
- for i := 0; i < len(v); /**/ {
- c1 := v[i]
- if !utf16.is_surrogate(rune(c1)) {
- buf[n] = u32(c1)
- n += 1
- i += 1
- } else if i+1 == len(v) {
- buf[n] = utf16.REPLACEMENT_CHAR
- n += 1
- i += 1
- } else {
- c2 := v[i+1]
- i += 2
- r := utf16.decode_surrogate_pair(rune(c1), rune(c2))
- buf[n] = u32(r)
- n += 1
- }
- }
- case []u32:
- n += copy(buf[n:], v)
- }
- }
-
- new_tok := tokenizer.copy_token(start)
- new_tok.lit = ""
- new_tok.val = buf[:n]
- new_tok.next = tok1
- new_tok.type_hint = type_hint
- start^ = new_tok^
- }
- }
-}
-
-
-quote_string :: proc(s: string) -> []byte {
- b := strings.builder_make(0, len(s)+2)
- io.write_quoted_string(strings.to_writer(&b), s, '"')
- return b.buf[:]
-}
-
-
-_init_tokenizer_from_preprocessor :: proc(t: ^Tokenizer, cpp: ^Preprocessor) -> ^Tokenizer {
- t.warn = cpp.warn
- t.err = cpp.err
- return t
-}
-
-new_string_token :: proc(cpp: ^Preprocessor, str: string, tok: ^Token) -> ^Token {
- assert(tok != nil)
- assert(str != "")
- t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp)
- src := quote_string(str)
- return tokenizer.inline_tokenize(t, tok, src)
-}
-
-stringize :: proc(cpp: ^Preprocessor, hash, arg: ^Token) -> ^Token {
- s := join_tokens(arg, nil)
- return new_string_token(cpp, s, hash)
-}
-
-
-new_number_token :: proc(cpp: ^Preprocessor, i: i64, tok: ^Token) -> ^Token {
- t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp)
- buf: [32]byte
- n := len(strconv.append_int(buf[:], i, 10))
- src := make([]byte, n)
- copy(src, buf[:n])
- return tokenizer.inline_tokenize(t, tok, src)
-}
-
-
-find_macro :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Macro {
- if tok.kind != .Ident {
- return nil
- }
- return cpp.macros[tok.lit]
-}
-
-add_macro :: proc(cpp: ^Preprocessor, name: string, kind: Macro_Kind, body: ^Token) -> ^Macro {
- m := new(Macro)
- m.name = name
- m.kind = kind
- m.body = body
- cpp.macros[name] = m
- return m
-}
-
-
-undef_macro :: proc(cpp: ^Preprocessor, name: string) {
- delete_key(&cpp.macros, name)
-}
-
-add_builtin :: proc(cpp: ^Preprocessor, name: string, handler: Macro_Handler) -> ^Macro {
- m := add_macro(cpp, name, .Value_Like, nil)
- m.handler = handler
- return m
-}
-
-
-skip :: proc(cpp: ^Preprocessor, tok: ^Token, op: string) -> ^Token {
- if tok.lit != op {
- error(cpp, tok, "expected '%q'", op)
- }
- return tok.next
-}
-
-consume :: proc(rest: ^^Token, tok: ^Token, lit: string) -> bool {
- if tok.lit == lit {
- rest^ = tok.next
- return true
- }
- rest^ = tok
- return false
-}
-
-read_macro_params :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> (param: ^Macro_Param, va_args_name: string) {
- head: Macro_Param
- curr := &head
-
- tok := tok
- for tok.lit != ")" && tok.kind != .EOF {
- if curr != &head {
- tok = skip(cpp, tok, ",")
- }
-
- if tok.lit == "..." {
- va_args_name = "__VA_ARGS__"
- rest^ = skip(cpp, tok.next, ")")
- param = head.next
- return
- }
-
- if tok.kind != .Ident {
- error(cpp, tok, "expected an identifier")
- }
-
- if tok.next.lit == "..." {
- va_args_name = tok.lit
- rest^ = skip(cpp, tok.next.next, ")")
- param = head.next
- return
- }
-
- m := new(Macro_Param)
- m.name = tok.lit
- curr.next = m
- curr = curr.next
- tok = tok.next
- }
-
-
- rest^ = tok.next
- param = head.next
- return
-}
-
-copy_line :: proc(rest: ^^Token, tok: ^Token) -> ^Token {
- head: Token
- curr := &head
-
- tok := tok
- for ; !tok.at_bol; tok = tok.next {
- curr.next = tokenizer.copy_token(tok)
- curr = curr.next
- }
- curr.next = tokenizer.new_eof(tok)
- rest^ = tok
- return head.next
-}
-
-read_macro_definition :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) {
- tok := tok
- if tok.kind != .Ident {
- error(cpp, tok, "macro name must be an identifier")
- }
- name := tok.lit
- tok = tok.next
-
- if !tok.has_space && tok.lit == "(" {
- params, va_args_name := read_macro_params(cpp, &tok, tok.next)
-
- m := add_macro(cpp, name, .Function_Like, copy_line(rest, tok))
- m.params = params
- m.va_args_name = va_args_name
- } else {
- add_macro(cpp, name, .Value_Like, copy_line(rest, tok))
- }
-}
-
-
-join_tokens :: proc(tok, end: ^Token) -> string {
- n := 1
- for t := tok; t != end && t.kind != .EOF; t = t.next {
- if t != tok && t.has_space {
- n += 1
- }
- n += len(t.lit)
- }
-
- buf := make([]byte, n)
-
- pos := 0
- for t := tok; t != end && t.kind != .EOF; t = t.next {
- if t != tok && t.has_space {
- buf[pos] = ' '
- pos += 1
- }
- copy(buf[pos:], t.lit)
- pos += len(t.lit)
- }
-
- return string(buf[:pos])
-}
-
-read_include_filename :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> (filename: string, is_quote: bool) {
- tok := tok
-
- if tok.kind == .String {
- rest^ = skip_line(cpp, tok.next)
- filename = tok.lit[1:len(tok.lit)-1]
- is_quote = true
- return
- }
-
- if tok.lit == "<" {
- start := tok
- for ; tok.kind != .EOF; tok = tok.next {
- if tok.at_bol || tok.kind == .EOF {
- error(cpp, tok, "expected '>'")
- }
- is_quote = false
- if tok.lit == ">" {
- break
- }
- }
- rest^ = skip_line(cpp, tok.next)
- filename = join_tokens(start.next, tok)
- return
- }
-
- if tok.kind == .Ident {
- tok2 := preprocess_internal(cpp, copy_line(rest, tok))
- return read_include_filename(cpp, &tok2, tok2)
- }
-
- error(cpp, tok, "expected a filename")
- return
-}
-
-skip_cond_incl :: proc(tok: ^Token) -> ^Token {
- next_skip :: proc(tok: ^Token) -> ^Token {
- tok := tok
- for tok.kind != .EOF {
- if is_hash(tok) {
- switch tok.next.lit {
- case "if", "ifdef", "ifndef":
- tok = next_skip(tok.next.next)
- continue
-
- case "endif":
- return tok.next.next
- }
- }
- tok = tok.next
- }
- return tok
- }
-
- tok := tok
-
- loop: for tok.kind != .EOF {
- if is_hash(tok) {
- switch tok.next.lit {
- case "if", "ifdef", "ifndef":
- tok = next_skip(tok.next.next)
- continue loop
-
- case "elif", "else", "endif":
- break loop
- }
- }
-
- tok = tok.next
- }
- return tok
-}
-
-check_for_include_guard :: proc(tok: ^Token) -> (guard: string, ok: bool) {
- if !is_hash(tok) || tok.next.lit != "ifndef" {
- return
- }
- tok := tok
- tok = tok.next.next
-
- if tok.kind != .Ident {
- return
- }
-
- m := tok.lit
- tok = tok.next
-
- if !is_hash(tok) || tok.next.lit != "define" || tok.next.lit != "macro" {
- return
- }
-
- for tok.kind != .EOF {
- if !is_hash(tok) {
- tok = tok.next
- continue
- }
-
- if tok.next.lit == "endif" && tok.next.next.kind == .EOF {
- return m, true
- }
-
- switch tok.lit {
- case "if", "ifdef", "ifndef":
- tok = skip_cond_incl(tok.next)
- case:
- tok = tok.next
- }
- }
- return
-}
-
-include_file :: proc(cpp: ^Preprocessor, tok: ^Token, path: string, filename_tok: ^Token) -> ^Token {
- if cpp.pragma_once[path] {
- return tok
- }
-
- guard_name, guard_name_found := cpp.include_guards[path]
- if guard_name_found && cpp.macros[guard_name] != nil {
- return tok
- }
-
- if !os.exists(path) {
- error(cpp, filename_tok, "%s: cannot open file", path)
- return tok
- }
-
- cpp.include_level += 1
- if cpp.include_level > MAX_INCLUDE_LEVEL {
- error(cpp, tok, "exceeded maximum nest amount: %d", MAX_INCLUDE_LEVEL)
- return tok
- }
-
- t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp)
- tok2 := tokenizer.tokenize_file(t, path, /*file.id*/1)
- if tok2 == nil {
- error(cpp, filename_tok, "%s: cannot open file", path)
- }
- cpp.include_level -= 1
-
- guard_name, guard_name_found = check_for_include_guard(tok2)
- if guard_name_found {
- cpp.include_guards[path] = guard_name
- }
-
- return append_token(tok2, tok)
-}
-
-find_arg :: proc(args: ^Macro_Arg, tok: ^Token) -> ^Macro_Arg {
- for ap := args; ap != nil; ap = ap.next {
- if tok.lit == ap.name {
- return ap
- }
- }
- return nil
-}
-
-paste :: proc(cpp: ^Preprocessor, lhs, rhs: ^Token) -> ^Token {
- buf := strings.concatenate({lhs.lit, rhs.lit})
- t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp)
- tok := tokenizer.inline_tokenize(t, lhs, transmute([]byte)buf)
- if tok.next.kind != .EOF {
- error(cpp, lhs, "pasting forms '%s', an invalid token", buf)
- }
- return tok
-}
-
-has_varargs :: proc(args: ^Macro_Arg) -> bool {
- for ap := args; ap != nil; ap = ap.next {
- if ap.name == "__VA_ARGS__" {
- return ap.tok.kind != .EOF
- }
- }
- return false
-}
-
-substitute_token :: proc(cpp: ^Preprocessor, tok: ^Token, args: ^Macro_Arg) -> ^Token {
- head: Token
- curr := &head
- tok := tok
- for tok.kind != .EOF {
- if tok.lit == "#" {
- arg := find_arg(args, tok.next)
- if arg == nil {
- error(cpp, tok.next, "'#' is not followed by a macro parameter")
- }
- arg_tok := arg.tok if arg != nil else tok.next
- curr.next = stringize(cpp, tok, arg_tok)
- curr = curr.next
- tok = tok.next.next
- continue
- }
-
- if tok.lit == "," && tok.next.lit == "##" {
- if arg := find_arg(args, tok.next.next); arg != nil && arg.is_va_args {
- if arg.tok.kind == .EOF {
- tok = tok.next.next.next
- } else {
- curr.next = tokenizer.copy_token(tok)
- curr = curr.next
- tok = tok.next.next
- }
- continue
- }
- }
-
- if tok.lit == "##" {
- if curr == &head {
- error(cpp, tok, "'##' cannot appear at start of macro expansion")
- }
- if tok.next.kind == .EOF {
- error(cpp, tok, "'##' cannot appear at end of macro expansion")
- }
-
- if arg := find_arg(args, tok.next); arg != nil {
- if arg.tok.kind != .EOF {
- curr^ = paste(cpp, curr, arg.tok)^
- for t := arg.tok.next; t.kind != .EOF; t = t.next {
- curr.next = tokenizer.copy_token(t)
- curr = curr.next
- }
- }
- tok = tok.next.next
- continue
- }
-
- curr^ = paste(cpp, curr, tok.next)^
- tok = tok.next.next
- continue
- }
-
- arg := find_arg(args, tok)
-
- if arg != nil && tok.next.lit == "##" {
- rhs := tok.next.next
-
- if arg.tok.kind == .EOF {
- args2 := find_arg(args, rhs)
- if args2 != nil {
- for t := args.tok; t.kind != .EOF; t = t.next {
- curr.next = tokenizer.copy_token(t)
- curr = curr.next
- }
- } else {
- curr.next = tokenizer.copy_token(rhs)
- curr = curr.next
- }
- tok = rhs.next
- continue
- }
-
- for t := arg.tok; t.kind != .EOF; t = t.next {
- curr.next = tokenizer.copy_token(t)
- curr = curr.next
- }
- tok = tok.next
- continue
- }
-
- if tok.lit == "__VA_OPT__" && tok.next.lit == "(" {
- opt_arg := read_macro_arg_one(cpp, &tok, tok.next.next, true)
- if has_varargs(args) {
- for t := opt_arg.tok; t.kind != .EOF; t = t.next {
- curr.next = t
- curr = curr.next
- }
- }
- tok = skip(cpp, tok, ")")
- continue
- }
-
- if arg != nil {
- t := preprocess_internal(cpp, arg.tok)
- t.at_bol = tok.at_bol
- t.has_space = tok.has_space
- for ; t.kind != .EOF; t = t.next {
- curr.next = tokenizer.copy_token(t)
- curr = curr.next
- }
- tok = tok.next
- continue
- }
-
- curr.next = tokenizer.copy_token(tok)
- curr = curr.next
- tok = tok.next
- continue
- }
-
- curr.next = tok
- return head.next
-}
-
-read_macro_arg_one :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token, read_rest: bool) -> ^Macro_Arg {
- tok := tok
- head: Token
- curr := &head
- level := 0
- for {
- if level == 0 && tok.lit == ")" {
- break
- }
- if level == 0 && !read_rest && tok.lit == "," {
- break
- }
-
- if tok.kind == .EOF {
- error(cpp, tok, "premature end of input")
- }
-
- switch tok.lit {
- case "(": level += 1
- case ")": level -= 1
- }
-
- curr.next = tokenizer.copy_token(tok)
- curr = curr.next
- tok = tok.next
- }
- curr.next = tokenizer.new_eof(tok)
-
- arg := new(Macro_Arg)
- arg.tok = head.next
- rest^ = tok
- return arg
-}
-
-read_macro_args :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token, params: ^Macro_Param, va_args_name: string) -> ^Macro_Arg {
- tok := tok
- start := tok
- tok = tok.next.next
-
- head: Macro_Arg
- curr := &head
-
- pp := params
- for ; pp != nil; pp = pp.next {
- if curr != &head {
- tok = skip(cpp, tok, ",")
- }
- curr.next = read_macro_arg_one(cpp, &tok, tok, false)
- curr = curr.next
- curr.name = pp.name
- }
-
- if va_args_name != "" {
- arg: ^Macro_Arg
- if tok.lit == ")" {
- arg = new(Macro_Arg)
- arg.tok = tokenizer.new_eof(tok)
- } else {
- if pp != params {
- tok = skip(cpp, tok, ",")
- }
- arg = read_macro_arg_one(cpp, &tok, tok, true)
- }
- arg.name = va_args_name
- arg.is_va_args = true
- curr.next = arg
- curr = curr.next
- } else if pp != nil {
- error(cpp, start, "too many arguments")
- }
-
- skip(cpp, tok, ")")
- rest^ = tok
- return head.next
-}
-
-expand_macro :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> bool {
- if tokenizer.hide_set_contains(tok.hide_set, tok.lit) {
- return false
- }
- tok := tok
- m := find_macro(cpp, tok)
- if m == nil {
- return false
- }
-
- if m.handler != nil {
- rest^ = m.handler(cpp, tok)
- rest^.next = tok.next
- return true
- }
-
- if m.kind == .Value_Like {
- hs := tokenizer.hide_set_union(tok.hide_set, tokenizer.new_hide_set(m.name))
- body := tokenizer.add_hide_set(m.body, hs)
- for t := body; t.kind != .EOF; t = t.next {
- t.origin = tok
- }
- rest^ = append_token(body, tok.next)
- rest^.at_bol = tok.at_bol
- rest^.has_space = tok.has_space
- return true
- }
-
- if tok.next.lit != "(" {
- return false
- }
-
- macro_token := tok
- args := read_macro_args(cpp, &tok, tok, m.params, m.va_args_name)
- close_paren := tok
-
- hs := tokenizer.hide_set_intersection(macro_token.hide_set, close_paren.hide_set)
- hs = tokenizer.hide_set_union(hs, tokenizer.new_hide_set(m.name))
-
- body := substitute_token(cpp, m.body, args)
- body = tokenizer.add_hide_set(body, hs)
- for t := body; t.kind != .EOF; t = t.next {
- t.origin = macro_token
- }
- rest^ = append_token(body, tok.next)
- rest^.at_bol = macro_token.at_bol
- rest^.has_space = macro_token.has_space
- return true
-}
-
-search_include_next :: proc(cpp: ^Preprocessor, filename: string) -> (path: string, ok: bool) {
- for ; cpp.include_next_index < len(cpp.include_paths); cpp.include_next_index += 1 {
- tpath := filepath.join({cpp.include_paths[cpp.include_next_index], filename}, allocator=context.temp_allocator)
- if os.exists(tpath) {
- return strings.clone(tpath), true
- }
- }
- return
-}
-
-search_include_paths :: proc(cpp: ^Preprocessor, filename: string) -> (path: string, ok: bool) {
- if filepath.is_abs(filename) {
- return filename, true
- }
-
- if path, ok = cpp.filepath_cache[filename]; ok {
- return
- }
-
- for include_path in cpp.include_paths {
- tpath := filepath.join({include_path, filename}, allocator=context.temp_allocator)
- if os.exists(tpath) {
- path, ok = strings.clone(tpath), true
- cpp.filepath_cache[filename] = path
- return
- }
- }
-
- return
-}
-
-read_const_expr :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> ^Token {
- tok := tok
- tok = copy_line(rest, tok)
- head: Token
- curr := &head
- for tok.kind != .EOF {
- if tok.lit == "defined" {
- start := tok
- has_paren := consume(&tok, tok.next, "(")
- if tok.kind != .Ident {
- error(cpp, start, "macro name must be an identifier")
- }
- m := find_macro(cpp, tok)
- tok = tok.next
-
- if has_paren {
- tok = skip(cpp, tok, ")")
- }
-
- curr.next = new_number_token(cpp, 1 if m != nil else 0, start)
- curr = curr.next
- continue
- }
-
- curr.next = tok
- curr = curr.next
- tok = tok.next
- }
-
- curr.next = tok
- return head.next
-}
-
-eval_const_expr :: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) -> (val: i64) {
- tok := tok
- start := tok
- expr := read_const_expr(cpp, rest, tok.next)
- expr = preprocess_internal(cpp, expr)
-
- if expr.kind == .EOF {
- error(cpp, start, "no expression")
- }
-
- for t := expr; t.kind != .EOF; t = t.next {
- if t.kind == .Ident {
- next := t.next
- t^ = new_number_token(cpp, 0, t)^
- t.next = next
- }
- }
-
- val = 1
- convert_pp_tokens(cpp, expr, tokenizer.default_is_keyword)
-
- rest2: ^Token
- val = const_expr(&rest2, expr)
- if rest2 != nil && rest2.kind != .EOF {
- error(cpp, rest2, "extra token")
- }
- return
-}
-
-push_cond_incl :: proc(cpp: ^Preprocessor, tok: ^Token, included: bool) -> ^Cond_Incl {
- ci := new(Cond_Incl)
- ci.next = cpp.cond_incl
- ci.state = .In_Then
- ci.tok = tok
- ci.included = included
- cpp.cond_incl = ci
- return ci
-}
-
-read_line_marker:: proc(cpp: ^Preprocessor, rest: ^^Token, tok: ^Token) {
- tok := tok
- start := tok
- tok = preprocess(cpp, copy_line(rest, tok))
- if tok.kind != .Number {
- error(cpp, tok, "invalid line marker")
- }
- ival, _ := tok.val.(i64)
- start.file.line_delta = int(ival - i64(start.pos.line))
- tok = tok.next
- if tok.kind == .EOF {
- return
- }
-
- if tok.kind != .String {
- error(cpp, tok, "filename expected")
- }
- start.file.display_name = tok.lit
-}
-
-preprocess_internal :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
- head: Token
- curr := &head
-
- tok := tok
- for tok != nil && tok.kind != .EOF {
- if expand_macro(cpp, &tok, tok) {
- continue
- }
-
- if !is_hash(tok) {
- if tok.file != nil {
- tok.line_delta = tok.file.line_delta
- }
- curr.next = tok
- curr = curr.next
- tok = tok.next
- continue
- }
-
- start := tok
- tok = tok.next
-
- switch tok.lit {
- case "include":
- filename, is_quote := read_include_filename(cpp, &tok, tok.next)
- is_absolute := filepath.is_abs(filename)
- if is_absolute {
- tok = include_file(cpp, tok, filename, start.next.next)
- continue
- }
-
- if is_quote {
- dir := ""
- if start.file != nil {
- dir = filepath.dir(start.file.name)
- }
- path := filepath.join({dir, filename})
- if os.exists(path) {
- tok = include_file(cpp, tok, path, start.next.next)
- continue
- }
- }
-
- path, ok := search_include_paths(cpp, filename)
- if !ok {
- path = filename
- }
- tok = include_file(cpp, tok, path, start.next.next)
- continue
-
- case "include_next":
- filename, _ := read_include_filename(cpp, &tok, tok.next)
- path, ok := search_include_next(cpp, filename)
- if !ok {
- path = filename
- }
- tok = include_file(cpp, tok, path, start.next.next)
- continue
-
- case "define":
- read_macro_definition(cpp, &tok, tok.next)
- continue
-
- case "undef":
- tok = tok.next
- if tok.kind != .Ident {
- error(cpp, tok, "macro name must be an identifier")
- }
- undef_macro(cpp, tok.lit)
- tok = skip_line(cpp, tok.next)
- continue
-
- case "if":
- val := eval_const_expr(cpp, &tok, tok)
- push_cond_incl(cpp, start, val != 0)
- if val == 0 {
- tok = skip_cond_incl(tok)
- }
- continue
-
- case "ifdef":
- defined := find_macro(cpp, tok.next)
- push_cond_incl(cpp, tok, defined != nil)
- tok = skip_line(cpp, tok.next.next)
- if defined == nil {
- tok = skip_cond_incl(tok)
- }
- continue
-
- case "ifndef":
- defined := find_macro(cpp, tok.next)
- push_cond_incl(cpp, tok, defined != nil)
- tok = skip_line(cpp, tok.next.next)
- if !(defined == nil) {
- tok = skip_cond_incl(tok)
- }
- continue
-
- case "elif":
- if cpp.cond_incl == nil || cpp.cond_incl.state == .In_Else {
- error(cpp, start, "stray #elif")
- }
- if cpp.cond_incl != nil {
- cpp.cond_incl.state = .In_Elif
- }
-
- if (cpp.cond_incl != nil && !cpp.cond_incl.included) && eval_const_expr(cpp, &tok, tok) != 0 {
- cpp.cond_incl.included = true
- } else {
- tok = skip_cond_incl(tok)
- }
- continue
-
- case "else":
- if cpp.cond_incl == nil || cpp.cond_incl.state == .In_Else {
- error(cpp, start, "stray #else")
- }
- if cpp.cond_incl != nil {
- cpp.cond_incl.state = .In_Else
- }
- tok = skip_line(cpp, tok.next)
-
- if cpp.cond_incl != nil {
- tok = skip_cond_incl(tok)
- }
- continue
-
- case "endif":
- if cpp.cond_incl == nil {
- error(cpp, start, "stray #endif")
- } else {
- cpp.cond_incl = cpp.cond_incl.next
- }
- tok = skip_line(cpp, tok.next)
- continue
-
- case "line":
- read_line_marker(cpp, &tok, tok.next)
- continue
-
- case "pragma":
- if tok.next.lit == "once" {
- cpp.pragma_once[tok.pos.file] = true
- tok = skip_line(cpp, tok.next.next)
- continue
- }
-
- pragma_tok, pragma_end := tok, tok
-
- for tok != nil && tok.kind != .EOF {
- pragma_end = tok
- tok = tok.next
- if tok.at_bol {
- break
- }
- }
- pragma_end.next = tokenizer.new_eof(tok)
- if cpp.pragma_handler != nil {
- cpp.pragma_handler(cpp, pragma_tok.next)
- continue
- }
-
- continue
-
- case "error":
- error(cpp, tok, "error")
- }
-
- if tok.kind == .PP_Number {
- read_line_marker(cpp, &tok, tok)
- continue
- }
-
- if !tok.at_bol {
- error(cpp, tok, "invalid preprocessor directive")
- }
- }
-
- curr.next = tok
- return head.next
-}
-
-
-preprocess :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
- tok := tok
- tok = preprocess_internal(cpp, tok)
- if cpp.cond_incl != nil {
- error(cpp, tok, "unterminated conditional directive")
- }
- convert_pp_tokens(cpp, tok, tokenizer.default_is_keyword)
- join_adjacent_string_literals(cpp, tok)
- for t := tok; t != nil; t = t.next {
- t.pos.line += t.line_delta
- }
- return tok
-}
-
-
-define_macro :: proc(cpp: ^Preprocessor, name, def: string) {
- src := transmute([]byte)def
-
- file := new(tokenizer.File)
- file.id = -1
- file.src = src
- file.name = "<built-in>"
- file.display_name = file.name
-
-
- t := _init_tokenizer_from_preprocessor(&Tokenizer{}, cpp)
- tok := tokenizer.tokenize(t, file)
- add_macro(cpp, name, .Value_Like, tok)
-}
-
-
-file_macro :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
- tok := tok
- for tok.origin != nil {
- tok = tok.origin
- }
- i := i64(tok.pos.line + tok.file.line_delta)
- return new_number_token(cpp, i, tok)
-}
-line_macro :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
- tok := tok
- for tok.origin != nil {
- tok = tok.origin
- }
- return new_string_token(cpp, tok.file.display_name, tok)
-}
-counter_macro :: proc(cpp: ^Preprocessor, tok: ^Token) -> ^Token {
- i := cpp.counter
- cpp.counter += 1
- return new_number_token(cpp, i, tok)
-}
-
-init_default_macros :: proc(cpp: ^Preprocessor) {
- define_macro(cpp, "__C99_MACRO_WITH_VA_ARGS", "1")
- define_macro(cpp, "__alignof__", "_Alignof")
- define_macro(cpp, "__const__", "const")
- define_macro(cpp, "__inline__", "inline")
- define_macro(cpp, "__signed__", "signed")
- define_macro(cpp, "__typeof__", "typeof")
- define_macro(cpp, "__volatile__", "volatile")
-
- add_builtin(cpp, "__FILE__", file_macro)
- add_builtin(cpp, "__LINE__", line_macro)
- add_builtin(cpp, "__COUNTER__", counter_macro)
-}
-
-init_lookup_tables :: proc(cpp: ^Preprocessor, allocator := context.allocator) {
- context.allocator = allocator
- reserve(&cpp.macros, max(16, cap(cpp.macros)))
- reserve(&cpp.pragma_once, max(16, cap(cpp.pragma_once)))
- reserve(&cpp.include_guards, max(16, cap(cpp.include_guards)))
- reserve(&cpp.filepath_cache, max(16, cap(cpp.filepath_cache)))
-}
-
-
-init_defaults :: proc(cpp: ^Preprocessor, lookup_tables_allocator := context.allocator) {
- if cpp.warn == nil {
- cpp.warn = tokenizer.default_warn_handler
- }
- if cpp.err == nil {
- cpp.err = tokenizer.default_error_handler
- }
- init_lookup_tables(cpp, lookup_tables_allocator)
- init_default_macros(cpp)
-}
diff --git a/core/c/frontend/preprocessor/unquote.odin b/core/c/frontend/preprocessor/unquote.odin
deleted file mode 100644
index 5869fa7ef..000000000
--- a/core/c/frontend/preprocessor/unquote.odin
+++ /dev/null
@@ -1,154 +0,0 @@
-package c_frontend_preprocess
-
-import "core:unicode/utf8"
-
-unquote_char :: proc(str: string, quote: byte) -> (r: rune, multiple_bytes: bool, tail_string: string, success: bool) {
- hex_to_int :: proc(c: byte) -> int {
- switch c {
- case '0'..='9': return int(c-'0')
- case 'a'..='f': return int(c-'a')+10
- case 'A'..='F': return int(c-'A')+10
- }
- return -1
- }
- w: int
-
- if str[0] == quote && quote == '"' {
- return
- } else if str[0] >= 0x80 {
- r, w = utf8.decode_rune_in_string(str)
- return r, true, str[w:], true
- } else if str[0] != '\\' {
- return rune(str[0]), false, str[1:], true
- }
-
- if len(str) <= 1 {
- return
- }
- s := str
- c := s[1]
- s = s[2:]
-
- switch c {
- case: r = rune(c)
-
- case 'a': r = '\a'
- case 'b': r = '\b'
- case 'e': r = '\e'
- case 'f': r = '\f'
- case 'n': r = '\n'
- case 'r': r = '\r'
- case 't': r = '\t'
- case 'v': r = '\v'
- case '\\': r = '\\'
-
- case '"': r = '"'
- case '\'': r = '\''
-
- case '0'..='7':
- v := int(c-'0')
- if len(s) < 2 {
- return
- }
- for i in 0..<len(s) {
- d := int(s[i]-'0')
- if d < 0 || d > 7 {
- return
- }
- v = (v<<3) | d
- }
- s = s[2:]
- if v > 0xff {
- return
- }
- r = rune(v)
-
- case 'x', 'u', 'U':
- count: int
- switch c {
- case 'x': count = 2
- case 'u': count = 4
- case 'U': count = 8
- }
-
- if len(s) < count {
- return
- }
-
- for i in 0..<count {
- d := hex_to_int(s[i])
- if d < 0 {
- return
- }
- r = (r<<4) | rune(d)
- }
- s = s[count:]
- if c == 'x' {
- break
- }
- if r > utf8.MAX_RUNE {
- return
- }
- multiple_bytes = true
- }
-
- success = true
- tail_string = s
- return
-}
-
-unquote_string :: proc(lit: string, allocator := context.allocator) -> (res: string, allocated, success: bool) {
- contains_rune :: proc(s: string, r: rune) -> int {
- for c, offset in s {
- if c == r {
- return offset
- }
- }
- return -1
- }
-
- assert(len(lit) >= 2)
-
- s := lit
- quote := '"'
-
- if s == `""` {
- return "", false, true
- }
-
- if contains_rune(s, '\n') >= 0 {
- return s, false, false
- }
-
- if contains_rune(s, '\\') < 0 && contains_rune(s, quote) < 0 {
- if quote == '"' {
- return s, false, true
- }
- }
- s = s[1:len(s)-1]
-
-
- buf_len := 3*len(s) / 2
- buf := make([]byte, buf_len, allocator)
- offset := 0
- for len(s) > 0 {
- r, multiple_bytes, tail_string, ok := unquote_char(s, byte(quote))
- if !ok {
- delete(buf)
- return s, false, false
- }
- s = tail_string
- if r < 0x80 || !multiple_bytes {
- buf[offset] = byte(r)
- offset += 1
- } else {
- b, w := utf8.encode_rune(r)
- copy(buf[offset:], b[:w])
- offset += w
- }
- }
-
- new_string := string(buf[:offset])
-
- return new_string, true, true
-}
diff --git a/core/c/frontend/tokenizer/doc.odin b/core/c/frontend/tokenizer/doc.odin
deleted file mode 100644
index 43747dfe8..000000000
--- a/core/c/frontend/tokenizer/doc.odin
+++ /dev/null
@@ -1,31 +0,0 @@
-/*
-Example:
- package demo
-
- import tokenizer "core:c/frontend/tokenizer"
- import preprocessor "core:c/frontend/preprocessor"
- import "core:fmt"
-
- main :: proc() {
- t := &tokenizer.Tokenizer{};
- tokenizer.init_defaults(t);
-
- cpp := &preprocessor.Preprocessor{};
- cpp.warn, cpp.err = t.warn, t.err;
- preprocessor.init_lookup_tables(cpp);
- preprocessor.init_default_macros(cpp);
- cpp.include_paths = {"my/path/to/include"};
-
- tok := tokenizer.tokenize_file(t, "the/source/file.c", 1);
-
- tok = preprocessor.preprocess(cpp, tok);
- if tok != nil {
- for t := tok; t.kind != .EOF; t = t.next {
- fmt.println(t.lit);
- }
- }
-
- fmt.println("[Done]");
- }
-*/
-package c_frontend_tokenizer
diff --git a/core/c/frontend/tokenizer/hide_set.odin b/core/c/frontend/tokenizer/hide_set.odin
deleted file mode 100644
index ec8b77e6e..000000000
--- a/core/c/frontend/tokenizer/hide_set.odin
+++ /dev/null
@@ -1,68 +0,0 @@
-package c_frontend_tokenizer
-
-// NOTE(bill): This is a really dumb approach for a hide set,
-// but it's really simple and probably fast enough in practice
-
-
-Hide_Set :: struct {
- next: ^Hide_Set,
- name: string,
-}
-
-
-new_hide_set :: proc(name: string) -> ^Hide_Set {
- hs := new(Hide_Set)
- hs.name = name
- return hs
-}
-
-hide_set_contains :: proc(hs: ^Hide_Set, name: string) -> bool {
- for h := hs; h != nil; h = h.next {
- if h.name == name {
- return true
- }
- }
- return false
-}
-
-
-hide_set_union :: proc(a, b: ^Hide_Set) -> ^Hide_Set {
- head: Hide_Set
- curr := &head
-
- for h := a; h != nil; h = h.next {
- curr.next = new_hide_set(h.name)
- curr = curr.next
- }
- curr.next = b
- return head.next
-}
-
-
-hide_set_intersection :: proc(a, b: ^Hide_Set) -> ^Hide_Set {
- head: Hide_Set
- curr := &head
-
- for h := a; h != nil; h = h.next {
- if hide_set_contains(b, h.name) {
- curr.next = new_hide_set(h.name)
- curr = curr.next
- }
- }
- return head.next
-}
-
-
-add_hide_set :: proc(tok: ^Token, hs: ^Hide_Set) -> ^Token {
- head: Token
- curr := &head
-
- tok := tok
- for ; tok != nil; tok = tok.next {
- t := copy_token(tok)
- t.hide_set = hide_set_union(t.hide_set, hs)
- curr.next = t
- curr = curr.next
- }
- return head.next
-}
diff --git a/core/c/frontend/tokenizer/token.odin b/core/c/frontend/tokenizer/token.odin
deleted file mode 100644
index 1376a651f..000000000
--- a/core/c/frontend/tokenizer/token.odin
+++ /dev/null
@@ -1,169 +0,0 @@
-package c_frontend_tokenizer
-
-
-Pos :: struct {
- file: string,
- line: int,
- column: int,
- offset: int,
-}
-
-Token_Kind :: enum {
- Invalid,
- Ident,
- Punct,
- Keyword,
- Char,
- String,
- Number,
- PP_Number,
- Comment,
- EOF,
-}
-
-File :: struct {
- name: string,
- id: int,
- src: []byte,
-
- display_name: string,
- line_delta: int,
-}
-
-
-Token_Type_Hint :: enum u8 {
- None,
-
- Int,
- Long,
- Long_Long,
-
- Unsigned_Int,
- Unsigned_Long,
- Unsigned_Long_Long,
-
- Float,
- Double,
- Long_Double,
-
- UTF_8,
- UTF_16,
- UTF_32,
- UTF_Wide,
-}
-
-Token_Value :: union {
- i64,
- f64,
- string,
- []u16,
- []u32,
-}
-
-Token :: struct {
- kind: Token_Kind,
- next: ^Token,
- lit: string,
-
- pos: Pos,
- file: ^File,
- line_delta: int,
- at_bol: bool,
- has_space: bool,
-
- type_hint: Token_Type_Hint,
- val: Token_Value,
- prefix: string,
-
- // Preprocessor values
- hide_set: ^Hide_Set,
- origin: ^Token,
-}
-
-Is_Keyword_Proc :: #type proc(tok: ^Token) -> bool
-
-copy_token :: proc(tok: ^Token) -> ^Token {
- t, _ := new_clone(tok^)
- t.next = nil
- return t
-}
-
-new_eof :: proc(tok: ^Token) -> ^Token {
- t, _ := new_clone(tok^)
- t.kind = .EOF
- t.lit = ""
- return t
-}
-
-default_is_keyword :: proc(tok: ^Token) -> bool {
- if tok.kind == .Keyword {
- return true
- }
- if len(tok.lit) > 0 {
- return default_keyword_set[tok.lit]
- }
- return false
-}
-
-
-token_name := [Token_Kind]string {
- .Invalid = "invalid",
- .Ident = "ident",
- .Punct = "punct",
- .Keyword = "keyword",
- .Char = "char",
- .String = "string",
- .Number = "number",
- .PP_Number = "preprocessor number",
- .Comment = "comment",
- .EOF = "eof",
-}
-
-default_keyword_set := map[string]bool{
- "auto" = true,
- "break" = true,
- "case" = true,
- "char" = true,
- "const" = true,
- "continue" = true,
- "default" = true,
- "do" = true,
- "double" = true,
- "else" = true,
- "enum" = true,
- "extern" = true,
- "float" = true,
- "for" = true,
- "goto" = true,
- "if" = true,
- "int" = true,
- "long" = true,
- "register" = true,
- "restrict" = true,
- "return" = true,
- "short" = true,
- "signed" = true,
- "sizeof" = true,
- "static" = true,
- "struct" = true,
- "switch" = true,
- "typedef" = true,
- "union" = true,
- "unsigned" = true,
- "void" = true,
- "volatile" = true,
- "while" = true,
- "_Alignas" = true,
- "_Alignof" = true,
- "_Atomic" = true,
- "_Bool" = true,
- "_Generic" = true,
- "_Noreturn" = true,
- "_Thread_local" = true,
- "__restrict" = true,
- "typeof" = true,
- "asm" = true,
- "__restrict__" = true,
- "__thread" = true,
- "__attribute__" = true,
-}
diff --git a/core/c/frontend/tokenizer/tokenizer.odin b/core/c/frontend/tokenizer/tokenizer.odin
deleted file mode 100644
index 558077717..000000000
--- a/core/c/frontend/tokenizer/tokenizer.odin
+++ /dev/null
@@ -1,667 +0,0 @@
-package c_frontend_tokenizer
-
-import "core:fmt"
-import "core:os"
-import "core:strings"
-import "core:unicode/utf8"
-
-
-Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any)
-
-
-Tokenizer :: struct {
- // Immutable data
- path: string,
- src: []byte,
-
-
- // Tokenizing state
- ch: rune,
- offset: int,
- read_offset: int,
- line_offset: int,
- line_count: int,
-
- // Extra information for tokens
- at_bol: bool,
- has_space: bool,
-
- // Mutable data
- err: Error_Handler,
- warn: Error_Handler,
- error_count: int,
- warning_count: int,
-}
-
-init_defaults :: proc(t: ^Tokenizer, err: Error_Handler = default_error_handler, warn: Error_Handler = default_warn_handler) {
- t.err = err
- t.warn = warn
-}
-
-
-@(private)
-offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> (pos: Pos) {
- pos.file = t.path
- pos.offset = offset
- pos.line = t.line_count
- pos.column = offset - t.line_offset + 1
- return
-}
-
-default_error_handler :: proc(pos: Pos, msg: string, args: ..any) {
- fmt.eprintf("%s(%d:%d) ", pos.file, pos.line, pos.column)
- fmt.eprintf(msg, ..args)
- fmt.eprintf("\n")
-}
-
-default_warn_handler :: proc(pos: Pos, msg: string, args: ..any) {
- fmt.eprintf("%s(%d:%d) warning: ", pos.file, pos.line, pos.column)
- fmt.eprintf(msg, ..args)
- fmt.eprintf("\n")
-}
-
-error_offset :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
- pos := offset_to_pos(t, offset)
- if t.err != nil {
- t.err(pos, msg, ..args)
- }
- t.error_count += 1
-}
-
-warn_offset :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
- pos := offset_to_pos(t, offset)
- if t.warn != nil {
- t.warn(pos, msg, ..args)
- }
- t.warning_count += 1
-}
-
-error :: proc(t: ^Tokenizer, tok: ^Token, msg: string, args: ..any) {
- pos := tok.pos
- if t.err != nil {
- t.err(pos, msg, ..args)
- }
- t.error_count += 1
-}
-
-warn :: proc(t: ^Tokenizer, tok: ^Token, msg: string, args: ..any) {
- pos := tok.pos
- if t.warn != nil {
- t.warn(pos, msg, ..args)
- }
- t.warning_count += 1
-}
-
-
-advance_rune :: proc(t: ^Tokenizer) {
- if t.read_offset < len(t.src) {
- t.offset = t.read_offset
- if t.ch == '\n' {
- t.at_bol = true
- t.line_offset = t.offset
- t.line_count += 1
- }
- r, w := rune(t.src[t.read_offset]), 1
- switch {
- case r == 0:
- error_offset(t, t.offset, "illegal character NUL")
- case r >= utf8.RUNE_SELF:
- r, w = utf8.decode_rune(t.src[t.read_offset:])
- if r == utf8.RUNE_ERROR && w == 1 {
- error_offset(t, t.offset, "illegal UTF-8 encoding")
- } else if r == utf8.RUNE_BOM && t.offset > 0 {
- error_offset(t, t.offset, "illegal byte order mark")
- }
- }
- t.read_offset += w
- t.ch = r
- } else {
- t.offset = len(t.src)
- if t.ch == '\n' {
- t.at_bol = true
- t.line_offset = t.offset
- t.line_count += 1
- }
- t.ch = -1
- }
-}
-
-advance_rune_n :: proc(t: ^Tokenizer, n: int) {
- for _ in 0..<n {
- advance_rune(t)
- }
-}
-
-is_digit :: proc(r: rune) -> bool {
- return '0' <= r && r <= '9'
-}
-
-skip_whitespace :: proc(t: ^Tokenizer) {
- for {
- switch t.ch {
- case ' ', '\t', '\r', '\v', '\f', '\n':
- t.has_space = true
- advance_rune(t)
- case:
- return
- }
- }
-}
-
-scan_comment :: proc(t: ^Tokenizer) -> string {
- offset := t.offset-1
- next := -1
- general: {
- if t.ch == '/'{ // line comments
- advance_rune(t)
- for t.ch != '\n' && t.ch >= 0 {
- advance_rune(t)
- }
-
- next = t.offset
- if t.ch == '\n' {
- next += 1
- }
- break general
- }
-
- /* style comment */
- advance_rune(t)
- for t.ch >= 0 {
- ch := t.ch
- advance_rune(t)
- if ch == '*' && t.ch == '/' {
- advance_rune(t)
- next = t.offset
- break general
- }
- }
-
- error_offset(t, offset, "comment not terminated")
- }
-
- lit := t.src[offset : t.offset]
-
- // NOTE(bill): Strip CR for line comments
- for len(lit) > 2 && lit[1] == '/' && lit[len(lit)-1] == '\r' {
- lit = lit[:len(lit)-1]
- }
-
-
- return string(lit)
-}
-
-scan_identifier :: proc(t: ^Tokenizer) -> string {
- offset := t.offset
-
- for is_ident1(t.ch) {
- advance_rune(t)
- }
-
- return string(t.src[offset : t.offset])
-}
-
-scan_string :: proc(t: ^Tokenizer) -> string {
- offset := t.offset-1
-
- for {
- ch := t.ch
- if ch == '\n' || ch < 0 {
- error_offset(t, offset, "string literal was not terminated")
- break
- }
- advance_rune(t)
- if ch == '"' {
- break
- }
- if ch == '\\' {
- scan_escape(t)
- }
- }
-
- return string(t.src[offset : t.offset])
-}
-
-digit_val :: proc(r: rune) -> int {
- switch r {
- case '0'..='9':
- return int(r-'0')
- case 'A'..='F':
- return int(r-'A' + 10)
- case 'a'..='f':
- return int(r-'a' + 10)
- }
- return 16
-}
-
-scan_escape :: proc(t: ^Tokenizer) -> bool {
- offset := t.offset
-
- esc := t.ch
- n: int
- base, max: u32
- switch esc {
- case 'a', 'b', 'e', 'f', 'n', 't', 'v', 'r', '\\', '\'', '"':
- advance_rune(t)
- return true
-
- case '0'..='7':
- for digit_val(t.ch) < 8 {
- advance_rune(t)
- }
- return true
- case 'x':
- advance_rune(t)
- for digit_val(t.ch) < 16 {
- advance_rune(t)
- }
- return true
- case 'u':
- advance_rune(t)
- n, base, max = 4, 16, utf8.MAX_RUNE
- case 'U':
- advance_rune(t)
- n, base, max = 8, 16, utf8.MAX_RUNE
- case:
- if t.ch < 0 {
- error_offset(t, offset, "escape sequence was not terminated")
- } else {
- break
- }
- return false
- }
-
- x: u32
- main_loop: for n > 0 {
- d := u32(digit_val(t.ch))
- if d >= base {
- if t.ch == '"' || t.ch == '\'' {
- break main_loop
- }
- if t.ch < 0 {
- error_offset(t, t.offset, "escape sequence was not terminated")
- } else {
- error_offset(t, t.offset, "illegal character '%r' : %d in escape sequence", t.ch, t.ch)
- }
- return false
- }
-
- x = x*base + d
- advance_rune(t)
- n -= 1
- }
-
- if x > max || 0xd800 <= x && x <= 0xdfff {
- error_offset(t, offset, "escape sequence is an invalid Unicode code point")
- return false
- }
- return true
-}
-
-scan_rune :: proc(t: ^Tokenizer) -> string {
- offset := t.offset-1
- valid := true
- n := 0
- for {
- ch := t.ch
- if ch == '\n' || ch < 0 {
- if valid {
- error_offset(t, offset, "rune literal not terminated")
- valid = false
- }
- break
- }
- advance_rune(t)
- if ch == '\'' {
- break
- }
- n += 1
- if ch == '\\' {
- if !scan_escape(t) {
- valid = false
- }
- }
- }
-
- if valid && n != 1 {
- error_offset(t, offset, "illegal rune literal")
- }
-
- return string(t.src[offset : t.offset])
-}
-
-scan_number :: proc(t: ^Tokenizer, seen_decimal_point: bool) -> (Token_Kind, string) {
- scan_mantissa :: proc(t: ^Tokenizer, base: int) {
- for digit_val(t.ch) < base {
- advance_rune(t)
- }
- }
- scan_exponent :: proc(t: ^Tokenizer) {
- if t.ch == 'e' || t.ch == 'E' || t.ch == 'p' || t.ch == 'P' {
- advance_rune(t)
- if t.ch == '-' || t.ch == '+' {
- advance_rune(t)
- }
- if digit_val(t.ch) < 10 {
- scan_mantissa(t, 10)
- } else {
- error_offset(t, t.offset, "illegal floating-point exponent")
- }
- }
- }
- scan_fraction :: proc(t: ^Tokenizer) -> (early_exit: bool) {
- if t.ch == '.' && peek(t) == '.' {
- return true
- }
- if t.ch == '.' {
- advance_rune(t)
- scan_mantissa(t, 10)
- }
- return false
- }
-
- check_end := true
-
-
- offset := t.offset
- seen_point := seen_decimal_point
-
- if seen_point {
- offset -= 1
- scan_mantissa(t, 10)
- scan_exponent(t)
- } else {
- if t.ch == '0' {
- int_base :: proc(t: ^Tokenizer, base: int, msg: string) {
- prev := t.offset
- advance_rune(t)
- scan_mantissa(t, base)
- if t.offset - prev <= 1 {
- error_offset(t, t.offset, msg)
- }
- }
-
- advance_rune(t)
- switch t.ch {
- case 'b', 'B':
- int_base(t, 2, "illegal binary integer")
- case 'x', 'X':
- int_base(t, 16, "illegal hexadecimal integer")
- case:
- seen_point = false
- scan_mantissa(t, 10)
- if t.ch == '.' {
- seen_point = true
- if scan_fraction(t) {
- check_end = false
- }
- }
- if check_end {
- scan_exponent(t)
- check_end = false
- }
- }
- }
- }
-
- if check_end {
- scan_mantissa(t, 10)
-
- if !scan_fraction(t) {
- scan_exponent(t)
- }
- }
-
- return .Number, string(t.src[offset : t.offset])
-}
-
-scan_punct :: proc(t: ^Tokenizer, ch: rune) -> (kind: Token_Kind) {
- kind = .Punct
- switch ch {
- case:
- kind = .Invalid
-
- case '<', '>':
- if t.ch == ch {
- advance_rune(t)
- }
- if t.ch == '=' {
- advance_rune(t)
- }
- case '!', '+', '-', '*', '/', '%', '^', '=':
- if t.ch == '=' {
- advance_rune(t)
- }
- case '#':
- if t.ch == '#' {
- advance_rune(t)
- }
- case '&':
- if t.ch == '=' || t.ch == '&' {
- advance_rune(t)
- }
- case '|':
- if t.ch == '=' || t.ch == '|' {
- advance_rune(t)
- }
- case '(', ')', '[', ']', '{', '}':
- // okay
- case '~', ',', ':', ';', '?':
- // okay
- case '`':
- // okay
- case '.':
- if t.ch == '.' && peek(t) == '.' {
- advance_rune(t)
- advance_rune(t) // consume last '.'
- }
- }
- return
-}
-
-peek :: proc(t: ^Tokenizer) -> byte {
- if t.read_offset < len(t.src) {
- return t.src[t.read_offset]
- }
- return 0
-}
-peek_str :: proc(t: ^Tokenizer, str: string) -> bool {
- if t.read_offset < len(t.src) {
- return strings.has_prefix(string(t.src[t.offset:]), str)
- }
- return false
-}
-
-scan_literal_prefix :: proc(t: ^Tokenizer, str: string, prefix: ^string) -> bool {
- if peek_str(t, str) {
- offset := t.offset
- for _ in str {
- advance_rune(t)
- }
- prefix^ = string(t.src[offset:][:len(str)-1])
- return true
- }
- return false
-}
-
-
-allow_next_to_be_newline :: proc(t: ^Tokenizer) -> bool {
- if t.ch == '\n' {
- advance_rune(t)
- return true
- } else if t.ch == '\r' && peek(t) == '\n' { // allow for MS-DOS style line endings
- advance_rune(t) // \r
- advance_rune(t) // \n
- return true
- }
- return false
-}
-
-scan :: proc(t: ^Tokenizer, f: ^File) -> ^Token {
- skip_whitespace(t)
-
- offset := t.offset
-
- kind: Token_Kind
- lit: string
- prefix: string
-
- switch ch := t.ch; {
- case scan_literal_prefix(t, `u8"`, &prefix):
- kind = .String
- lit = scan_string(t)
- case scan_literal_prefix(t, `u"`, &prefix):
- kind = .String
- lit = scan_string(t)
- case scan_literal_prefix(t, `L"`, &prefix):
- kind = .String
- lit = scan_string(t)
- case scan_literal_prefix(t, `U"`, &prefix):
- kind = .String
- lit = scan_string(t)
- case scan_literal_prefix(t, `u'`, &prefix):
- kind = .Char
- lit = scan_rune(t)
- case scan_literal_prefix(t, `L'`, &prefix):
- kind = .Char
- lit = scan_rune(t)
- case scan_literal_prefix(t, `U'`, &prefix):
- kind = .Char
- lit = scan_rune(t)
-
- case is_ident0(ch):
- lit = scan_identifier(t)
- kind = .Ident
- case '0' <= ch && ch <= '9':
- kind, lit = scan_number(t, false)
- case:
- advance_rune(t)
- switch ch {
- case -1:
- kind = .EOF
- case '\\':
- kind = .Punct
- if allow_next_to_be_newline(t) {
- t.at_bol = true
- t.has_space = false
- return scan(t, f)
- }
-
- case '.':
- if is_digit(t.ch) {
- kind, lit = scan_number(t, true)
- } else {
- kind = scan_punct(t, ch)
- }
- case '"':
- kind = .String
- lit = scan_string(t)
- case '\'':
- kind = .Char
- lit = scan_rune(t)
- case '/':
- if t.ch == '/' || t.ch == '*' {
- kind = .Comment
- lit = scan_comment(t)
- t.has_space = true
- break
- }
- fallthrough
- case:
- kind = scan_punct(t, ch)
- if kind == .Invalid && ch != utf8.RUNE_BOM {
- error_offset(t, t.offset, "illegal character '%r': %d", ch, ch)
- }
- }
- }
-
- if lit == "" {
- lit = string(t.src[offset : t.offset])
- }
-
- if kind == .Comment {
- return scan(t, f)
- }
-
- tok := new(Token)
- tok.kind = kind
- tok.lit = lit
- tok.pos = offset_to_pos(t, offset)
- tok.file = f
- tok.prefix = prefix
- tok.at_bol = t.at_bol
- tok.has_space = t.has_space
-
- t.at_bol, t.has_space = false, false
-
- return tok
-}
-
-tokenize :: proc(t: ^Tokenizer, f: ^File) -> ^Token {
- setup_tokenizer: {
- t.src = f.src
- t.ch = ' '
- t.offset = 0
- t.read_offset = 0
- t.line_offset = 0
- t.line_count = len(t.src) > 0 ? 1 : 0
- t.error_count = 0
- t.path = f.name
-
-
- advance_rune(t)
- if t.ch == utf8.RUNE_BOM {
- advance_rune(t)
- }
- }
-
-
- t.at_bol = true
- t.has_space = false
-
- head: Token
- curr := &head
- for {
- tok := scan(t, f)
- if tok == nil {
- break
- }
- curr.next = tok
- curr = curr.next
- if tok.kind == .EOF {
- break
- }
- }
-
- return head.next
-}
-
-add_new_file :: proc(t: ^Tokenizer, name: string, src: []byte, id: int) -> ^File {
- file := new(File)
- file.id = id
- file.src = src
- file.name = name
- file.display_name = name
- return file
-}
-
-tokenize_file :: proc(t: ^Tokenizer, path: string, id: int, loc := #caller_location) -> ^Token {
- src, ok := os.read_entire_file(path)
- if !ok {
- return nil
- }
- return tokenize(t, add_new_file(t, path, src, id))
-}
-
-
-inline_tokenize :: proc(t: ^Tokenizer, tok: ^Token, src: []byte) -> ^Token {
- file := new(File)
- file.src = src
- if tok.file != nil {
- file.id = tok.file.id
- file.name = tok.file.name
- file.display_name = tok.file.name
- }
-
- return tokenize(t, file)
-}
diff --git a/core/c/frontend/tokenizer/unicode.odin b/core/c/frontend/tokenizer/unicode.odin
deleted file mode 100644
index 317ee160e..000000000
--- a/core/c/frontend/tokenizer/unicode.odin
+++ /dev/null
@@ -1,116 +0,0 @@
-package c_frontend_tokenizer
-
-
-in_range :: proc(range: []rune, c: rune) -> bool #no_bounds_check {
- for i := 0; range[i] != -1; i += 2 {
- if range[i] <= c && c <= range[i+1] {
- return true
- }
- }
- return false
-}
-
-
-// [https://www.sigbus.info/n1570#D] C11 allows ASCII and some multibyte characters in certan Unicode ranges to be used in an identifier.
-//
-// is_ident0 returns true if a given character is acceptable as the first character of an identifier.
-is_ident0 :: proc(c: rune) -> bool {
- return in_range(_range_ident0, c)
-}
-// is_ident0 returns true if a given character is acceptable as a non-first character of an identifier.
-is_ident1 :: proc(c: rune) -> bool {
- return is_ident0(c) || in_range(_range_ident1, c)
-}
-
-// Returns the number of columns needed to display a given character in a fixed-width font.
-// Based on https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c
-char_width :: proc(c: rune) -> int {
- switch {
- case in_range(_range_width0, c):
- return 0
- case in_range(_range_width2, c):
- return 2
- }
- return 1
-}
-
-display_width :: proc(str: string) -> (w: int) {
- for c in str {
- w += char_width(c)
- }
- return
-}
-
-
-
-_range_ident0 := []rune{
- '_', '_', 'a', 'z', 'A', 'Z', '$', '$',
- 0x00A8, 0x00A8, 0x00AA, 0x00AA, 0x00AD, 0x00AD, 0x00AF, 0x00AF,
- 0x00B2, 0x00B5, 0x00B7, 0x00BA, 0x00BC, 0x00BE, 0x00C0, 0x00D6,
- 0x00D8, 0x00F6, 0x00F8, 0x00FF, 0x0100, 0x02FF, 0x0370, 0x167F,
- 0x1681, 0x180D, 0x180F, 0x1DBF, 0x1E00, 0x1FFF, 0x200B, 0x200D,
- 0x202A, 0x202E, 0x203F, 0x2040, 0x2054, 0x2054, 0x2060, 0x206F,
- 0x2070, 0x20CF, 0x2100, 0x218F, 0x2460, 0x24FF, 0x2776, 0x2793,
- 0x2C00, 0x2DFF, 0x2E80, 0x2FFF, 0x3004, 0x3007, 0x3021, 0x302F,
- 0x3031, 0x303F, 0x3040, 0xD7FF, 0xF900, 0xFD3D, 0xFD40, 0xFDCF,
- 0xFDF0, 0xFE1F, 0xFE30, 0xFE44, 0xFE47, 0xFFFD,
- 0x10000, 0x1FFFD, 0x20000, 0x2FFFD, 0x30000, 0x3FFFD, 0x40000, 0x4FFFD,
- 0x50000, 0x5FFFD, 0x60000, 0x6FFFD, 0x70000, 0x7FFFD, 0x80000, 0x8FFFD,
- 0x90000, 0x9FFFD, 0xA0000, 0xAFFFD, 0xB0000, 0xBFFFD, 0xC0000, 0xCFFFD,
- 0xD0000, 0xDFFFD, 0xE0000, 0xEFFFD,
- -1,
-}
-
-_range_ident1 := []rune{
- '0', '9', '$', '$', 0x0300, 0x036F, 0x1DC0, 0x1DFF, 0x20D0, 0x20FF, 0xFE20, 0xFE2F,
- -1,
-}
-
-
-_range_width0 := []rune{
- 0x0000, 0x001F, 0x007f, 0x00a0, 0x0300, 0x036F, 0x0483, 0x0486,
- 0x0488, 0x0489, 0x0591, 0x05BD, 0x05BF, 0x05BF, 0x05C1, 0x05C2,
- 0x05C4, 0x05C5, 0x05C7, 0x05C7, 0x0600, 0x0603, 0x0610, 0x0615,
- 0x064B, 0x065E, 0x0670, 0x0670, 0x06D6, 0x06E4, 0x06E7, 0x06E8,
- 0x06EA, 0x06ED, 0x070F, 0x070F, 0x0711, 0x0711, 0x0730, 0x074A,
- 0x07A6, 0x07B0, 0x07EB, 0x07F3, 0x0901, 0x0902, 0x093C, 0x093C,
- 0x0941, 0x0948, 0x094D, 0x094D, 0x0951, 0x0954, 0x0962, 0x0963,
- 0x0981, 0x0981, 0x09BC, 0x09BC, 0x09C1, 0x09C4, 0x09CD, 0x09CD,
- 0x09E2, 0x09E3, 0x0A01, 0x0A02, 0x0A3C, 0x0A3C, 0x0A41, 0x0A42,
- 0x0A47, 0x0A48, 0x0A4B, 0x0A4D, 0x0A70, 0x0A71, 0x0A81, 0x0A82,
- 0x0ABC, 0x0ABC, 0x0AC1, 0x0AC5, 0x0AC7, 0x0AC8, 0x0ACD, 0x0ACD,
- 0x0AE2, 0x0AE3, 0x0B01, 0x0B01, 0x0B3C, 0x0B3C, 0x0B3F, 0x0B3F,
- 0x0B41, 0x0B43, 0x0B4D, 0x0B4D, 0x0B56, 0x0B56, 0x0B82, 0x0B82,
- 0x0BC0, 0x0BC0, 0x0BCD, 0x0BCD, 0x0C3E, 0x0C40, 0x0C46, 0x0C48,
- 0x0C4A, 0x0C4D, 0x0C55, 0x0C56, 0x0CBC, 0x0CBC, 0x0CBF, 0x0CBF,
- 0x0CC6, 0x0CC6, 0x0CCC, 0x0CCD, 0x0CE2, 0x0CE3, 0x0D41, 0x0D43,
- 0x0D4D, 0x0D4D, 0x0DCA, 0x0DCA, 0x0DD2, 0x0DD4, 0x0DD6, 0x0DD6,
- 0x0E31, 0x0E31, 0x0E34, 0x0E3A, 0x0E47, 0x0E4E, 0x0EB1, 0x0EB1,
- 0x0EB4, 0x0EB9, 0x0EBB, 0x0EBC, 0x0EC8, 0x0ECD, 0x0F18, 0x0F19,
- 0x0F35, 0x0F35, 0x0F37, 0x0F37, 0x0F39, 0x0F39, 0x0F71, 0x0F7E,
- 0x0F80, 0x0F84, 0x0F86, 0x0F87, 0x0F90, 0x0F97, 0x0F99, 0x0FBC,
- 0x0FC6, 0x0FC6, 0x102D, 0x1030, 0x1032, 0x1032, 0x1036, 0x1037,
- 0x1039, 0x1039, 0x1058, 0x1059, 0x1160, 0x11FF, 0x135F, 0x135F,
- 0x1712, 0x1714, 0x1732, 0x1734, 0x1752, 0x1753, 0x1772, 0x1773,
- 0x17B4, 0x17B5, 0x17B7, 0x17BD, 0x17C6, 0x17C6, 0x17C9, 0x17D3,
- 0x17DD, 0x17DD, 0x180B, 0x180D, 0x18A9, 0x18A9, 0x1920, 0x1922,
- 0x1927, 0x1928, 0x1932, 0x1932, 0x1939, 0x193B, 0x1A17, 0x1A18,
- 0x1B00, 0x1B03, 0x1B34, 0x1B34, 0x1B36, 0x1B3A, 0x1B3C, 0x1B3C,
- 0x1B42, 0x1B42, 0x1B6B, 0x1B73, 0x1DC0, 0x1DCA, 0x1DFE, 0x1DFF,
- 0x200B, 0x200F, 0x202A, 0x202E, 0x2060, 0x2063, 0x206A, 0x206F,
- 0x20D0, 0x20EF, 0x302A, 0x302F, 0x3099, 0x309A, 0xA806, 0xA806,
- 0xA80B, 0xA80B, 0xA825, 0xA826, 0xFB1E, 0xFB1E, 0xFE00, 0xFE0F,
- 0xFE20, 0xFE23, 0xFEFF, 0xFEFF, 0xFFF9, 0xFFFB, 0x10A01, 0x10A03,
- 0x10A05, 0x10A06, 0x10A0C, 0x10A0F, 0x10A38, 0x10A3A, 0x10A3F, 0x10A3F,
- 0x1D167, 0x1D169, 0x1D173, 0x1D182, 0x1D185, 0x1D18B, 0x1D1AA, 0x1D1AD,
- 0x1D242, 0x1D244, 0xE0001, 0xE0001, 0xE0020, 0xE007F, 0xE0100, 0xE01EF,
- -1,
-}
-
-_range_width2 := []rune{
- 0x1100, 0x115F, 0x2329, 0x2329, 0x232A, 0x232A, 0x2E80, 0x303E,
- 0x3040, 0xA4CF, 0xAC00, 0xD7A3, 0xF900, 0xFAFF, 0xFE10, 0xFE19,
- 0xFE30, 0xFE6F, 0xFF00, 0xFF60, 0xFFE0, 0xFFE6, 0x1F000, 0x1F644,
- 0x20000, 0x2FFFD, 0x30000, 0x3FFFD,
- -1,
-}