diff options
Diffstat (limited to 'core/encoding/csv')
| -rw-r--r-- | core/encoding/csv/reader.odin | 36 | ||||
| -rw-r--r-- | core/encoding/csv/writer.odin | 147 |
2 files changed, 165 insertions, 18 deletions
diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin index 9baaede24..4c28ea9f3 100644 --- a/core/encoding/csv/reader.odin +++ b/core/encoding/csv/reader.odin @@ -56,22 +56,22 @@ Reader :: struct { } -Parser_Error_Kind :: enum { +Reader_Error_Kind :: enum { Bare_Quote, Quote, Field_Count, Invalid_Delim, } -parser_error_kind_string := [Parser_Error_Kind]string{ +reader_error_kind_string := [Reader_Error_Kind]string{ .Bare_Quote = "bare \" in non-quoted field", .Quote = "extra or missing \" in quoted field", .Field_Count = "wrong field count", .Invalid_Delim = "invalid delimiter", }; -Parser_Error :: struct { - kind: Parser_Error_Kind, +Reader_Error :: struct { + kind: Reader_Error_Kind, start_line: int, line: int, column: int, @@ -79,7 +79,7 @@ Parser_Error :: struct { } Error :: union { - Parser_Error, + Reader_Error, io.Error, } @@ -182,6 +182,14 @@ read_all_from_string :: proc(input: string, records_allocator := context.allocat return read_all(&r, records_allocator); } +@private +is_valid_delim :: proc(r: rune) -> bool { + switch r { + case 0, '"', '\r', '\n', utf8.RUNE_ERROR: + return false; + } + return utf8.valid_rune(r); +} @private _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.allocator) -> ([]string, Error) { @@ -214,14 +222,6 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all return line, err; } - is_valid_delim :: proc(r: rune) -> bool { - switch r { - case 0, '"', '\r', '\n', utf8.RUNE_ERROR: - return false; - } - return utf8.valid_rune(r); - } - length_newline :: proc(b: []byte) -> int { if len(b) > 0 && b[len(b)-1] == '\n' { return 1; @@ -237,7 +237,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all if r.comma == r.comment || !is_valid_delim(r.comma) || (r.comment != 0 && !is_valid_delim(r.comment)) { - err := Parser_Error{ + err := Reader_Error{ kind = .Invalid_Delim, line = r.line_count, }; @@ -287,7 +287,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all if !r.lazy_quotes { if j := bytes.index_byte(field, '"'); j >= 0 { column := utf8.rune_count(full_line[:len(full_line) - len(line[j:])]); - err = Parser_Error{ + err = Reader_Error{ kind = .Bare_Quote, start_line = record_line, line = r.line_count, @@ -327,7 +327,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all append(&r.record_buffer, '"'); case: // invalid non-escaped quote column := utf8.rune_count(full_line[:len(full_line) - len(line) - quote_len]); - err = Parser_Error{ + err = Reader_Error{ kind = .Quote, start_line = record_line, line = r.line_count, @@ -350,7 +350,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all case: if !r.lazy_quotes && err_read == nil { column := utf8.rune_count(full_line); - err = Parser_Error{ + err = Reader_Error{ kind = .Quote, start_line = record_line, line = r.line_count, @@ -390,7 +390,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all if r.fields_per_record > 0 { if len(dst) != r.fields_per_record && err == nil { - err = Parser_Error{ + err = Reader_Error{ kind = .Field_Count, start_line = record_line, line = r.line_count, diff --git a/core/encoding/csv/writer.odin b/core/encoding/csv/writer.odin new file mode 100644 index 000000000..bcb6ecc0d --- /dev/null +++ b/core/encoding/csv/writer.odin @@ -0,0 +1,147 @@ +package csv + +import "core:io" +import "core:strings" +import "core:unicode/utf8" + +// Writer is a data structure used for writing records using a CSV-encoding. +Writer :: struct { + // Field delimiter (set to ',' with writer_init) + comma: rune, + + // if set to true, \r\n will be used as the line terminator + use_crlf: bool, + + w: io.Writer, +} + +// writer_init initializes a Writer that writes to w +writer_init :: proc(writer: ^Writer, w: io.Writer) { + writer.comma = ','; + writer.w = w; +} + +// write writes a single CSV records to w with any of the necessarily quoting. +// A record is a slice of strings, where each string is a single field. +// +// If the underlying io.Writer requires flushing, make sure to call io.flush +write :: proc(w: ^Writer, record: []string) -> io.Error { + CHAR_SET :: "\n\r\""; + + field_needs_quoting :: proc(w: ^Writer, field: string) -> bool { + switch { + case field == "": // No need to quote empty strings + return false; + case field == `\.`: // Postgres is weird + return true; + case w.comma < utf8.RUNE_SELF: // ASCII optimization + for i in 0..<len(field) { + switch field[i] { + case '\n', '\r', '"', byte(w.comma): + return true; + } + } + case: + if strings.contains_rune(field, w.comma) >= 0 { + return true; + } + if strings.contains_any(field, CHAR_SET) { + return true; + } + } + + // Leading spaces need quoting + r, _ := utf8.decode_rune_in_string(field); + return strings.is_space(r); + } + + if !is_valid_delim(w.comma) { + return .No_Progress; // TODO(bill): Is this a good error? + } + + for _, field_idx in record { + // NOTE(bill): declared like this so that the field can be modified later if necessary + field := record[field_idx]; + + if field_idx > 0 { + if _, err := io.write_rune(w.w, w.comma); err != nil { + return err; + } + } + + if !field_needs_quoting(w, field) { + if _, err := io.write_string(w.w, field); err != nil { + return err; + } + continue; + } + + if err := io.write_byte(w.w, '"'); err != nil { + return err; + } + + for len(field) > 0 { + i := strings.index_any(field, CHAR_SET); + if i < 0 { + i = len(field); + } + + if _, err := io.write_string(w.w, field[:i]); err != nil { + return err; + } + field = field[i:]; + + if len(field) > 0 { + switch field[0] { + case '\r': + if !w.use_crlf { + if err := io.write_byte(w.w, '\r'); err != nil { + return err; + } + } + case '\n': + if w.use_crlf { + if _, err := io.write_string(w.w, "\r\n"); err != nil { + return err; + } + } else { + if err := io.write_byte(w.w, '\n'); err != nil { + return err; + } + } + case '"': + if _, err := io.write_string(w.w, `""`); err != nil { + return err; + } + } + field = field[1:]; + } + } + if err := io.write_byte(w.w, '"'); err != nil { + return err; + } + } + + if w.use_crlf { + _, err := io.write_string(w.w, "\r\n"); + return err; + } + return io.write_byte(w.w, '\n'); +} + +// write_all writes multiple CSV records to w using write, and then flushes (if necessary). +write_all :: proc(w: ^Writer, records: [][]string) -> io.Error { + for record in records { + err := write(w, record); + if err != nil { + return err; + } + } + return writer_flush(w); +} + +// writer_flush flushes the underlying io.Writer. +// If the underlying io.Writer does not support flush, nil is returned. +writer_flush :: proc(w: ^Writer) -> io.Error { + return io.flush(auto_cast w.w); +} |