aboutsummaryrefslogtreecommitdiff
path: root/core/encoding/csv
diff options
context:
space:
mode:
authorgingerBill <bill@gingerbill.org>2021-01-09 00:21:47 +0000
committergingerBill <bill@gingerbill.org>2021-01-09 00:21:47 +0000
commit37253f2621ace59c367ecf86f362b8854026181d (patch)
tree4a9f256ac96472698a48eddb14d71f3f52046fa5 /core/encoding/csv
parentda380d6fc41bb36de1eb8d820e15715e986710ba (diff)
Add encoding/csv Writer
Diffstat (limited to 'core/encoding/csv')
-rw-r--r--core/encoding/csv/reader.odin36
-rw-r--r--core/encoding/csv/writer.odin147
2 files changed, 165 insertions, 18 deletions
diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin
index 9baaede24..4c28ea9f3 100644
--- a/core/encoding/csv/reader.odin
+++ b/core/encoding/csv/reader.odin
@@ -56,22 +56,22 @@ Reader :: struct {
}
-Parser_Error_Kind :: enum {
+Reader_Error_Kind :: enum {
Bare_Quote,
Quote,
Field_Count,
Invalid_Delim,
}
-parser_error_kind_string := [Parser_Error_Kind]string{
+reader_error_kind_string := [Reader_Error_Kind]string{
.Bare_Quote = "bare \" in non-quoted field",
.Quote = "extra or missing \" in quoted field",
.Field_Count = "wrong field count",
.Invalid_Delim = "invalid delimiter",
};
-Parser_Error :: struct {
- kind: Parser_Error_Kind,
+Reader_Error :: struct {
+ kind: Reader_Error_Kind,
start_line: int,
line: int,
column: int,
@@ -79,7 +79,7 @@ Parser_Error :: struct {
}
Error :: union {
- Parser_Error,
+ Reader_Error,
io.Error,
}
@@ -182,6 +182,14 @@ read_all_from_string :: proc(input: string, records_allocator := context.allocat
return read_all(&r, records_allocator);
}
+@private
+is_valid_delim :: proc(r: rune) -> bool {
+ switch r {
+ case 0, '"', '\r', '\n', utf8.RUNE_ERROR:
+ return false;
+ }
+ return utf8.valid_rune(r);
+}
@private
_read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.allocator) -> ([]string, Error) {
@@ -214,14 +222,6 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
return line, err;
}
- is_valid_delim :: proc(r: rune) -> bool {
- switch r {
- case 0, '"', '\r', '\n', utf8.RUNE_ERROR:
- return false;
- }
- return utf8.valid_rune(r);
- }
-
length_newline :: proc(b: []byte) -> int {
if len(b) > 0 && b[len(b)-1] == '\n' {
return 1;
@@ -237,7 +237,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
if r.comma == r.comment ||
!is_valid_delim(r.comma) ||
(r.comment != 0 && !is_valid_delim(r.comment)) {
- err := Parser_Error{
+ err := Reader_Error{
kind = .Invalid_Delim,
line = r.line_count,
};
@@ -287,7 +287,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
if !r.lazy_quotes {
if j := bytes.index_byte(field, '"'); j >= 0 {
column := utf8.rune_count(full_line[:len(full_line) - len(line[j:])]);
- err = Parser_Error{
+ err = Reader_Error{
kind = .Bare_Quote,
start_line = record_line,
line = r.line_count,
@@ -327,7 +327,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
append(&r.record_buffer, '"');
case: // invalid non-escaped quote
column := utf8.rune_count(full_line[:len(full_line) - len(line) - quote_len]);
- err = Parser_Error{
+ err = Reader_Error{
kind = .Quote,
start_line = record_line,
line = r.line_count,
@@ -350,7 +350,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
case:
if !r.lazy_quotes && err_read == nil {
column := utf8.rune_count(full_line);
- err = Parser_Error{
+ err = Reader_Error{
kind = .Quote,
start_line = record_line,
line = r.line_count,
@@ -390,7 +390,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
if r.fields_per_record > 0 {
if len(dst) != r.fields_per_record && err == nil {
- err = Parser_Error{
+ err = Reader_Error{
kind = .Field_Count,
start_line = record_line,
line = r.line_count,
diff --git a/core/encoding/csv/writer.odin b/core/encoding/csv/writer.odin
new file mode 100644
index 000000000..bcb6ecc0d
--- /dev/null
+++ b/core/encoding/csv/writer.odin
@@ -0,0 +1,147 @@
+package csv
+
+import "core:io"
+import "core:strings"
+import "core:unicode/utf8"
+
+// Writer is a data structure used for writing records using a CSV-encoding.
+Writer :: struct {
+ // Field delimiter (set to ',' with writer_init)
+ comma: rune,
+
+ // if set to true, \r\n will be used as the line terminator
+ use_crlf: bool,
+
+ w: io.Writer,
+}
+
+// writer_init initializes a Writer that writes to w
+writer_init :: proc(writer: ^Writer, w: io.Writer) {
+ writer.comma = ',';
+ writer.w = w;
+}
+
+// write writes a single CSV records to w with any of the necessarily quoting.
+// A record is a slice of strings, where each string is a single field.
+//
+// If the underlying io.Writer requires flushing, make sure to call io.flush
+write :: proc(w: ^Writer, record: []string) -> io.Error {
+ CHAR_SET :: "\n\r\"";
+
+ field_needs_quoting :: proc(w: ^Writer, field: string) -> bool {
+ switch {
+ case field == "": // No need to quote empty strings
+ return false;
+ case field == `\.`: // Postgres is weird
+ return true;
+ case w.comma < utf8.RUNE_SELF: // ASCII optimization
+ for i in 0..<len(field) {
+ switch field[i] {
+ case '\n', '\r', '"', byte(w.comma):
+ return true;
+ }
+ }
+ case:
+ if strings.contains_rune(field, w.comma) >= 0 {
+ return true;
+ }
+ if strings.contains_any(field, CHAR_SET) {
+ return true;
+ }
+ }
+
+ // Leading spaces need quoting
+ r, _ := utf8.decode_rune_in_string(field);
+ return strings.is_space(r);
+ }
+
+ if !is_valid_delim(w.comma) {
+ return .No_Progress; // TODO(bill): Is this a good error?
+ }
+
+ for _, field_idx in record {
+ // NOTE(bill): declared like this so that the field can be modified later if necessary
+ field := record[field_idx];
+
+ if field_idx > 0 {
+ if _, err := io.write_rune(w.w, w.comma); err != nil {
+ return err;
+ }
+ }
+
+ if !field_needs_quoting(w, field) {
+ if _, err := io.write_string(w.w, field); err != nil {
+ return err;
+ }
+ continue;
+ }
+
+ if err := io.write_byte(w.w, '"'); err != nil {
+ return err;
+ }
+
+ for len(field) > 0 {
+ i := strings.index_any(field, CHAR_SET);
+ if i < 0 {
+ i = len(field);
+ }
+
+ if _, err := io.write_string(w.w, field[:i]); err != nil {
+ return err;
+ }
+ field = field[i:];
+
+ if len(field) > 0 {
+ switch field[0] {
+ case '\r':
+ if !w.use_crlf {
+ if err := io.write_byte(w.w, '\r'); err != nil {
+ return err;
+ }
+ }
+ case '\n':
+ if w.use_crlf {
+ if _, err := io.write_string(w.w, "\r\n"); err != nil {
+ return err;
+ }
+ } else {
+ if err := io.write_byte(w.w, '\n'); err != nil {
+ return err;
+ }
+ }
+ case '"':
+ if _, err := io.write_string(w.w, `""`); err != nil {
+ return err;
+ }
+ }
+ field = field[1:];
+ }
+ }
+ if err := io.write_byte(w.w, '"'); err != nil {
+ return err;
+ }
+ }
+
+ if w.use_crlf {
+ _, err := io.write_string(w.w, "\r\n");
+ return err;
+ }
+ return io.write_byte(w.w, '\n');
+}
+
+// write_all writes multiple CSV records to w using write, and then flushes (if necessary).
+write_all :: proc(w: ^Writer, records: [][]string) -> io.Error {
+ for record in records {
+ err := write(w, record);
+ if err != nil {
+ return err;
+ }
+ }
+ return writer_flush(w);
+}
+
+// writer_flush flushes the underlying io.Writer.
+// If the underlying io.Writer does not support flush, nil is returned.
+writer_flush :: proc(w: ^Writer) -> io.Error {
+ return io.flush(auto_cast w.w);
+}