Add encoding/csv Writer

author: gingerBill <bill@gingerbill.org> 2021-01-09 00:21:47 +0000
committer: gingerBill <bill@gingerbill.org> 2021-01-09 00:21:47 +0000
commit: 37253f2621ace59c367ecf86f362b8854026181d (patch)
tree: 4a9f256ac96472698a48eddb14d71f3f52046fa5 /core/encoding/csv
parent: da380d6fc41bb36de1eb8d820e15715e986710ba (diff)
2 files changed, 165 insertions, 18 deletions
diff --git a/core/encoding/csv/reader.odin b/core/encoding/csv/reader.odin
index 9baaede24..4c28ea9f3 100644
--- a/core/encoding/csv/reader.odin
+++ b/core/encoding/csv/reader.odin
@@ -56,22 +56,22 @@ Reader :: struct {
 }
 
 
-Parser_Error_Kind :: enum {
+Reader_Error_Kind :: enum {
 	Bare_Quote,
 	Quote,
 	Field_Count,
 	Invalid_Delim,
 }
 
-parser_error_kind_string := [Parser_Error_Kind]string{
+reader_error_kind_string := [Reader_Error_Kind]string{
 	.Bare_Quote     = "bare \" in non-quoted field",
 	.Quote          = "extra or missing \" in quoted field",
 	.Field_Count    = "wrong field count",
 	.Invalid_Delim  = "invalid delimiter",
 };
 
-Parser_Error :: struct {
-	kind:          Parser_Error_Kind,
+Reader_Error :: struct {
+	kind:          Reader_Error_Kind,
 	start_line:    int,
 	line:          int,
 	column:        int,
@@ -79,7 +79,7 @@ Parser_Error :: struct {
 }
 
 Error :: union {
-	Parser_Error,
+	Reader_Error,
 	io.Error,
 }
 
@@ -182,6 +182,14 @@ read_all_from_string :: proc(input: string, records_allocator := context.allocat
 	return read_all(&r, records_allocator);
 }
 
+@private
+is_valid_delim :: proc(r: rune) -> bool {
+	switch r {
+	case 0, '"', '\r', '\n', utf8.RUNE_ERROR:
+		return false;
+	}
+	return utf8.valid_rune(r);
+}
 
 @private
 _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.allocator) -> ([]string, Error) {
@@ -214,14 +222,6 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
 		return line, err;
 	}
 
-	is_valid_delim :: proc(r: rune) -> bool {
-		switch r {
-		case 0, '"', '\r', '\n', utf8.RUNE_ERROR:
-			return false;
-		}
-		return utf8.valid_rune(r);
-	}
-
 	length_newline :: proc(b: []byte) -> int {
 		if len(b) > 0 && b[len(b)-1] == '\n' {
 			return 1;
@@ -237,7 +237,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
 	if r.comma == r.comment ||
 	   !is_valid_delim(r.comma) ||
 	   (r.comment != 0 && !is_valid_delim(r.comment)) {
-		err := Parser_Error{
+		err := Reader_Error{
 			kind = .Invalid_Delim,
 			line = r.line_count,
 		};
@@ -287,7 +287,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
 			if !r.lazy_quotes {
 				if j := bytes.index_byte(field, '"'); j >= 0 {
 					column := utf8.rune_count(full_line[:len(full_line) - len(line[j:])]);
-					err = Parser_Error{
+					err = Reader_Error{
 						kind = .Bare_Quote,
 						start_line = record_line,
 						line = r.line_count,
@@ -327,7 +327,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
 						append(&r.record_buffer, '"');
 					case: // invalid non-escaped quote
 						column := utf8.rune_count(full_line[:len(full_line) - len(line) - quote_len]);
-						err = Parser_Error{
+						err = Reader_Error{
 							kind = .Quote,
 							start_line = record_line,
 							line = r.line_count,
@@ -350,7 +350,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
 				case:
 					if !r.lazy_quotes && err_read == nil {
 						column := utf8.rune_count(full_line);
-						err = Parser_Error{
+						err = Reader_Error{
 							kind = .Quote,
 							start_line = record_line,
 							line = r.line_count,
@@ -390,7 +390,7 @@ _read_record :: proc(r: ^Reader, dst: ^[dynamic]string, allocator := context.all
 
 	if r.fields_per_record > 0 {
 		if len(dst) != r.fields_per_record && err == nil {
-			err = Parser_Error{
+			err = Reader_Error{
 				kind = .Field_Count,
 				start_line = record_line,
 				line = r.line_count,
diff --git a/core/encoding/csv/writer.odin b/core/encoding/csv/writer.odin
new file mode 100644
index 000000000..bcb6ecc0d
--- /dev/null
+++ b/core/encoding/csv/writer.odin
@@ -0,0 +1,147 @@
+package csv
+
+import "core:io"
+import "core:strings"
+import "core:unicode/utf8"
+
+// Writer is a data structure used for writing records using a CSV-encoding.
+Writer :: struct {
+	// Field delimiter (set to ',' with writer_init)
+	comma: rune,
+
+	// if set to true, \r\n will be used as the line terminator
+	use_crlf: bool,
+
+	w: io.Writer,
+}
+
+// writer_init initializes a Writer that writes to w
+writer_init :: proc(writer: ^Writer, w: io.Writer) {
+	writer.comma = ',';
+	writer.w = w;
+}
+
+// write writes a single CSV records to w with any of the necessarily quoting.
+// A record is a slice of strings, where each string is a single field.
+//
+// If the underlying io.Writer requires flushing, make sure to call io.flush
+write :: proc(w: ^Writer, record: []string) -> io.Error {
+	CHAR_SET :: "\n\r\"";
+
+	field_needs_quoting :: proc(w: ^Writer, field: string) -> bool {
+		switch {
+		case field == "": // No need to quote empty strings
+			return false;
+		case field == `\.`: // Postgres is weird
+			return true;
+		case w.comma < utf8.RUNE_SELF: // ASCII optimization
+			for i in 0..<len(field) {
+				switch field[i] {
+				case '\n', '\r', '"', byte(w.comma):
+					return true;
+				}
+			}
+		case:
+			if strings.contains_rune(field, w.comma) >= 0 {
+				return true;
+			}
+			if strings.contains_any(field, CHAR_SET) {
+				return true;
+			}
+		}
+
+		// Leading spaces need quoting
+		r, _ := utf8.decode_rune_in_string(field);
+		return strings.is_space(r);
+	}
+
+	if !is_valid_delim(w.comma) {
+		return .No_Progress; // TODO(bill): Is this a good error?
+	}
+
+	for _, field_idx in record {
+		// NOTE(bill): declared like this so that the field can be modified later if necessary
+		field := record[field_idx];
+
+		if field_idx > 0 {
+			if _, err := io.write_rune(w.w, w.comma); err != nil {
+				return err;
+			}
+		}
+
+		if !field_needs_quoting(w, field) {
+			if _, err := io.write_string(w.w, field); err != nil {
+				return err;
+			}
+			continue;
+		}
+
+		if err := io.write_byte(w.w, '"'); err != nil {
+			return err;
+		}
+
+		for len(field) > 0 {
+			i := strings.index_any(field, CHAR_SET);
+			if i < 0 {
+				i = len(field);
+			}
+
+			if _, err := io.write_string(w.w, field[:i]); err != nil {
+				return err;
+			}
+			field = field[i:];
+
+			if len(field) > 0 {
+				switch field[0] {
+				case '\r':
+					if !w.use_crlf {
+						if err := io.write_byte(w.w, '\r'); err != nil {
+							return err;
+						}
+					}
+				case '\n':
+					if w.use_crlf {
+						if _, err := io.write_string(w.w, "\r\n"); err != nil {
+							return err;
+						}
+					} else {
+						if err := io.write_byte(w.w, '\n'); err != nil {
+							return err;
+						}
+					}
+				case '"':
+					if _, err := io.write_string(w.w, `""`); err != nil {
+						return err;
+					}
+				}
+				field = field[1:];
+			}
+		}
+		if err := io.write_byte(w.w, '"'); err != nil {
+			return err;
+		}
+	}
+
+	if w.use_crlf {
+		_, err := io.write_string(w.w, "\r\n");
+		return err;
+	}
+	return io.write_byte(w.w, '\n');
+}
+
+// write_all writes multiple CSV records to w using write, and then flushes (if necessary).
+write_all :: proc(w: ^Writer, records: [][]string) -> io.Error {
+	for record in records {
+		err := write(w, record);
+		if err != nil {
+			return err;
+		}
+	}
+	return writer_flush(w);
+}
+
+// writer_flush flushes the underlying io.Writer.
+// If the underlying io.Writer does not support flush, nil is returned.
+writer_flush :: proc(w: ^Writer) -> io.Error {
+	return io.flush(auto_cast w.w);
+}
author	gingerBill <bill@gingerbill.org>	2021-01-09 00:21:47 +0000
committer	gingerBill <bill@gingerbill.org>	2021-01-09 00:21:47 +0000
commit	37253f2621ace59c367ecf86f362b8854026181d (patch)
tree	4a9f256ac96472698a48eddb14d71f3f52046fa5 /core/encoding/csv
parent	da380d6fc41bb36de1eb8d820e15715e986710ba (diff)