package csv
import "git.sr.ht/~sbinet/go-arrow/csv"
Package csv reads CSV files and presents the extracted data as records, also
writes data as record into CSV files
Code:
Output: Code:
Output: Code:
Output:Example¶
{
f := bytes.NewBufferString(`## a simple set of data: int64;float64;string
0;0;str-0
1;1;str-1
2;2;str-2
3;3;str-3
4;4;str-4
5;5;str-5
6;6;str-6
7;7;str-7
8;8;str-8
9;9;str-9
`)
schema := arrow.NewSchema(
[]arrow.Field{
{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
{Name: "str", Type: arrow.BinaryTypes.String},
},
nil,
)
r := csv.NewReader(f, schema, csv.WithComment('#'), csv.WithComma(';'))
defer r.Release()
n := 0
for r.Next() {
rec := r.Record()
for i, col := range rec.Columns() {
fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
}
n++
}
// Output:
// rec[0]["i64"]: [0]
// rec[0]["f64"]: [0]
// rec[0]["str"]: ["str-0"]
// rec[1]["i64"]: [1]
// rec[1]["f64"]: [1]
// rec[1]["str"]: ["str-1"]
// rec[2]["i64"]: [2]
// rec[2]["f64"]: [2]
// rec[2]["str"]: ["str-2"]
// rec[3]["i64"]: [3]
// rec[3]["f64"]: [3]
// rec[3]["str"]: ["str-3"]
// rec[4]["i64"]: [4]
// rec[4]["f64"]: [4]
// rec[4]["str"]: ["str-4"]
// rec[5]["i64"]: [5]
// rec[5]["f64"]: [5]
// rec[5]["str"]: ["str-5"]
// rec[6]["i64"]: [6]
// rec[6]["f64"]: [6]
// rec[6]["str"]: ["str-6"]
// rec[7]["i64"]: [7]
// rec[7]["f64"]: [7]
// rec[7]["str"]: ["str-7"]
// rec[8]["i64"]: [8]
// rec[8]["f64"]: [8]
// rec[8]["str"]: ["str-8"]
// rec[9]["i64"]: [9]
// rec[9]["f64"]: [9]
// rec[9]["str"]: ["str-9"]
}
rec[0]["i64"]: [0]
rec[0]["f64"]: [0]
rec[0]["str"]: ["str-0"]
rec[1]["i64"]: [1]
rec[1]["f64"]: [1]
rec[1]["str"]: ["str-1"]
rec[2]["i64"]: [2]
rec[2]["f64"]: [2]
rec[2]["str"]: ["str-2"]
rec[3]["i64"]: [3]
rec[3]["f64"]: [3]
rec[3]["str"]: ["str-3"]
rec[4]["i64"]: [4]
rec[4]["f64"]: [4]
rec[4]["str"]: ["str-4"]
rec[5]["i64"]: [5]
rec[5]["f64"]: [5]
rec[5]["str"]: ["str-5"]
rec[6]["i64"]: [6]
rec[6]["f64"]: [6]
rec[6]["str"]: ["str-6"]
rec[7]["i64"]: [7]
rec[7]["f64"]: [7]
rec[7]["str"]: ["str-7"]
rec[8]["i64"]: [8]
rec[8]["f64"]: [8]
rec[8]["str"]: ["str-8"]
rec[9]["i64"]: [9]
rec[9]["f64"]: [9]
rec[9]["str"]: ["str-9"]
Example (WithChunk)¶
{
f := bytes.NewBufferString(`## a simple set of data: int64;float64;string
0;0;str-0
1;1;str-1
2;2;str-2
3;3;str-3
4;4;str-4
5;5;str-5
6;6;str-6
7;7;str-7
8;8;str-8
9;9;str-9
`)
schema := arrow.NewSchema(
[]arrow.Field{
{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
{Name: "str", Type: arrow.BinaryTypes.String},
},
nil,
)
r := csv.NewReader(
f, schema,
csv.WithComment('#'), csv.WithComma(';'),
csv.WithChunk(3),
)
defer r.Release()
n := 0
for r.Next() {
rec := r.Record()
for i, col := range rec.Columns() {
fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
}
n++
}
// Output:
// rec[0]["i64"]: [0 1 2]
// rec[0]["f64"]: [0 1 2]
// rec[0]["str"]: ["str-0" "str-1" "str-2"]
// rec[1]["i64"]: [3 4 5]
// rec[1]["f64"]: [3 4 5]
// rec[1]["str"]: ["str-3" "str-4" "str-5"]
// rec[2]["i64"]: [6 7 8]
// rec[2]["f64"]: [6 7 8]
// rec[2]["str"]: ["str-6" "str-7" "str-8"]
// rec[3]["i64"]: [9]
// rec[3]["f64"]: [9]
// rec[3]["str"]: ["str-9"]
}
rec[0]["i64"]: [0 1 2]
rec[0]["f64"]: [0 1 2]
rec[0]["str"]: ["str-0" "str-1" "str-2"]
rec[1]["i64"]: [3 4 5]
rec[1]["f64"]: [3 4 5]
rec[1]["str"]: ["str-3" "str-4" "str-5"]
rec[2]["i64"]: [6 7 8]
rec[2]["f64"]: [6 7 8]
rec[2]["str"]: ["str-6" "str-7" "str-8"]
rec[3]["i64"]: [9]
rec[3]["f64"]: [9]
rec[3]["str"]: ["str-9"]
Example (Writer)¶
{
f := new(bytes.Buffer)
pool := memory.NewGoAllocator()
schema := arrow.NewSchema(
[]arrow.Field{
{Name: "i64", Type: arrow.PrimitiveTypes.Int64},
{Name: "f64", Type: arrow.PrimitiveTypes.Float64},
{Name: "str", Type: arrow.BinaryTypes.String},
},
nil,
)
b := array.NewRecordBuilder(pool, schema)
defer b.Release()
b.Field(0).(*array.Int64Builder).AppendValues([]int64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, nil)
b.Field(1).(*array.Float64Builder).AppendValues([]float64{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, nil)
b.Field(2).(*array.StringBuilder).AppendValues([]string{"str-0", "str-1", "str-2", "str-3", "str-4", "str-5", "str-6", "str-7", "str-8", "str-9"}, nil)
rec := b.NewRecord()
defer rec.Release()
w := csv.NewWriter(f, schema, csv.WithComma(';'))
err := w.Write(rec)
if err != nil {
log.Fatal(err)
}
err = w.Flush()
if err != nil {
log.Fatal(err)
}
err = w.Error()
if err != nil {
log.Fatal(err)
}
r := csv.NewReader(f, schema, csv.WithComment('#'), csv.WithComma(';'))
defer r.Release()
n := 0
for r.Next() {
rec := r.Record()
for i, col := range rec.Columns() {
fmt.Printf("rec[%d][%q]: %v\n", n, rec.ColumnName(i), col)
}
n++
}
// Output:
// rec[0]["i64"]: [0]
// rec[0]["f64"]: [0]
// rec[0]["str"]: ["str-0"]
// rec[1]["i64"]: [1]
// rec[1]["f64"]: [1]
// rec[1]["str"]: ["str-1"]
// rec[2]["i64"]: [2]
// rec[2]["f64"]: [2]
// rec[2]["str"]: ["str-2"]
// rec[3]["i64"]: [3]
// rec[3]["f64"]: [3]
// rec[3]["str"]: ["str-3"]
// rec[4]["i64"]: [4]
// rec[4]["f64"]: [4]
// rec[4]["str"]: ["str-4"]
// rec[5]["i64"]: [5]
// rec[5]["f64"]: [5]
// rec[5]["str"]: ["str-5"]
// rec[6]["i64"]: [6]
// rec[6]["f64"]: [6]
// rec[6]["str"]: ["str-6"]
// rec[7]["i64"]: [7]
// rec[7]["f64"]: [7]
// rec[7]["str"]: ["str-7"]
// rec[8]["i64"]: [8]
// rec[8]["f64"]: [8]
// rec[8]["str"]: ["str-8"]
// rec[9]["i64"]: [9]
// rec[9]["f64"]: [9]
// rec[9]["str"]: ["str-9"]
}
rec[0]["i64"]: [0]
rec[0]["f64"]: [0]
rec[0]["str"]: ["str-0"]
rec[1]["i64"]: [1]
rec[1]["f64"]: [1]
rec[1]["str"]: ["str-1"]
rec[2]["i64"]: [2]
rec[2]["f64"]: [2]
rec[2]["str"]: ["str-2"]
rec[3]["i64"]: [3]
rec[3]["f64"]: [3]
rec[3]["str"]: ["str-3"]
rec[4]["i64"]: [4]
rec[4]["f64"]: [4]
rec[4]["str"]: ["str-4"]
rec[5]["i64"]: [5]
rec[5]["f64"]: [5]
rec[5]["str"]: ["str-5"]
rec[6]["i64"]: [6]
rec[6]["f64"]: [6]
rec[6]["str"]: ["str-6"]
rec[7]["i64"]: [7]
rec[7]["f64"]: [7]
rec[7]["str"]: ["str-7"]
rec[8]["i64"]: [8]
rec[8]["f64"]: [8]
rec[8]["str"]: ["str-8"]
rec[9]["i64"]: [9]
rec[9]["f64"]: [9]
rec[9]["str"]: ["str-9"]
Index ¶
- Variables
- type Option
- func WithAllocator(mem memory.Allocator) Option
- func WithCRLF(useCRLF bool) Option
- func WithChunk(n int) Option
- func WithComma(c rune) Option
- func WithComment(c rune) Option
- func WithHeader(useHeader bool) Option
- func WithNullReader(stringsCanBeNull bool, nullValues ...string) Option
- func WithNullWriter(null string) Option
- type Reader
- func NewReader(r io.Reader, schema *arrow.Schema, opts ...Option) *Reader
- func (r *Reader) Err() error
- func (r *Reader) Next() bool
- func (r *Reader) Record() array.Record
- func (r *Reader) Release()
- func (r *Reader) Retain()
- func (r *Reader) Schema() *arrow.Schema
- type Writer
Examples ¶
Variables ¶
var DefaultNullValues = []string{"", "NULL", "null"}
DefaultNullValues is the set of values considered as NULL values by default when Reader is configured to handle NULL values.
Types ¶
type Option ¶
type Option func(config)
Option configures a CSV reader/writer.
func WithAllocator ¶
WithAllocator specifies the Arrow memory allocator used while building records.
func WithCRLF ¶
WithCRLF specifies the line terminator used while writing CSV files. If useCRLF is true, \r\n is used as the line terminator, otherwise \n is used. The default value is false.
func WithChunk ¶
WithChunk specifies the chunk size used while parsing CSV files.
If n is zero or 1, no chunking will take place and the reader will create one record per row. If n is greater than 1, chunks of n rows will be read. If n is negative, the reader will load the whole CSV file into memory and create one big record with all the rows.
func WithComma ¶
WithComma specifies the fields separation character used while parsing CSV files.
func WithComment ¶
WithComment specifies the comment character used while parsing CSV files.
func WithHeader ¶
WithHeader enables or disables CSV-header handling.
func WithNullReader ¶
WithNullReader sets options for a CSV Reader pertaining to NULL value handling. If stringsCanBeNull is true, then a string that matches one of the nullValues set will be interpreted as NULL. Numeric columns will be checked for nulls in all cases. If no nullValues arguments are passed in, the defaults set in NewReader() will be kept.
When no NULL values is given, the default set is taken from DefaultNullValues.
func WithNullWriter ¶
WithNullWriter sets the null string written for NULL values. The default is set in NewWriter().
type Reader ¶
type Reader struct {
// contains filtered or unexported fields
}
Reader wraps encoding/csv.Reader and creates array.Records from a schema.
func NewReader ¶
NewReader returns a reader that reads from the CSV file and creates array.Records from the given schema.
NewReader panics if the given schema contains fields that have types that are not primitive types.
func (*Reader) Err ¶
Err returns the last error encountered during the iteration over the underlying CSV file.
func (*Reader) Next ¶
Next returns whether a Record could be extracted from the underlying CSV file.
Next panics if the number of records extracted from a CSV row does not match the number of fields of the associated schema.
func (*Reader) Record ¶
Record returns the current record that has been extracted from the underlying CSV file. It is valid until the next call to Next.
func (*Reader) Release ¶
func (r *Reader) Release()
Release decreases the reference count by 1. When the reference count goes to zero, the memory is freed. Release may be called simultaneously from multiple goroutines.
func (*Reader) Retain ¶
func (r *Reader) Retain()
Retain increases the reference count by 1. Retain may be called simultaneously from multiple goroutines.
func (*Reader) Schema ¶
func (r *Reader) Schema() *arrow.Schema
type Writer ¶
type Writer struct {
// contains filtered or unexported fields
}
Writer wraps encoding/csv.Writer and writes array.Record based on a schema.
func NewWriter ¶
NewWriter returns a writer that writes array.Records to the CSV file with the given schema.
NewWriter panics if the given schema contains fields that have types that are not primitive types.
func (*Writer) Error ¶
Error reports any error that has occurred during a previous Write or Flush.
func (*Writer) Flush ¶
Flush writes any buffered data to the underlying csv Writer. If an error occurred during the Flush, return it
func (*Writer) Schema ¶
func (w *Writer) Schema() *arrow.Schema
func (*Writer) Write ¶
Write writes a single Record as one row to the CSV file
Source Files ¶
- Version
- v0.3.0 (latest)
- Published
- Feb 17, 2025
- Platform
- linux/amd64
- Imports
- 11 packages
- Last checked
- 2 months ago –
Tools for package owners.