diff options
author | Hiroshi SHIBATA <[email protected]> | 2022-12-09 08:46:14 +0900 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2022-12-09 16:36:22 +0900 |
commit | 643918ecfe9c980f251247de6acd3be6280da24c (patch) | |
tree | a5b4011c13ee3af5b110e377a839e79045266dcd | |
parent | 260a00d80e4dcc930b040313a99da29e4b1e6678 (diff) |
Merge csv-3.2.6
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/6890
-rw-r--r-- | lib/csv.rb | 489 | ||||
-rw-r--r-- | lib/csv/fields_converter.rb | 5 | ||||
-rw-r--r-- | lib/csv/input_record_separator.rb | 15 | ||||
-rw-r--r-- | lib/csv/parser.rb | 293 | ||||
-rw-r--r-- | lib/csv/row.rb | 229 | ||||
-rw-r--r-- | lib/csv/table.rb | 626 | ||||
-rw-r--r-- | lib/csv/version.rb | 2 | ||||
-rw-r--r-- | lib/csv/writer.rb | 10 | ||||
-rw-r--r-- | test/csv/interface/test_read.rb | 18 | ||||
-rw-r--r-- | test/csv/interface/test_write.rb | 9 | ||||
-rw-r--r-- | test/csv/parse/test_convert.rb | 55 | ||||
-rw-r--r-- | test/csv/parse/test_general.rb | 88 | ||||
-rw-r--r-- | test/csv/parse/test_header.rb | 9 | ||||
-rw-r--r-- | test/csv/parse/test_inputs_scanner.rb | 63 | ||||
-rw-r--r-- | test/csv/parse/test_liberal_parsing.rb | 11 | ||||
-rw-r--r-- | test/csv/parse/test_read.rb | 27 | ||||
-rw-r--r-- | test/csv/test_data_converters.rb | 84 | ||||
-rw-r--r-- | test/csv/test_encodings.rb | 31 | ||||
-rw-r--r-- | test/csv/test_patterns.rb | 27 | ||||
-rw-r--r-- | test/csv/test_table.rb | 73 |
20 files changed, 1749 insertions, 415 deletions
diff --git a/lib/csv.rb b/lib/csv.rb index 06a490f34c..0307033941 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -48,7 +48,7 @@ # # === Interface # -# * CSV now uses Hash-style parameters to set options. +# * CSV now uses keyword parameters to set options. # * CSV no longer has generate_row() or parse_row(). # * The old CSV's Reader and Writer classes have been dropped. # * CSV::open() is now more like Ruby's open(). @@ -95,16 +95,24 @@ require "stringio" require_relative "csv/fields_converter" require_relative "csv/input_record_separator" -require_relative "csv/match_p" require_relative "csv/parser" require_relative "csv/row" require_relative "csv/table" require_relative "csv/writer" -using CSV::MatchP if CSV.const_defined?(:MatchP) - # == \CSV -# \CSV (comma-separated variables) data is a text representation of a table: +# +# === In a Hurry? +# +# If you are familiar with \CSV data and have a particular task in mind, +# you may want to go directly to the: +# - {Recipes for CSV}[doc/csv/recipes/recipes_rdoc.html]. +# +# Otherwise, read on here, about the API: classes, methods, and constants. +# +# === \CSV Data +# +# \CSV (comma-separated values) data is a text representation of a table: # - A _row_ _separator_ delimits table rows. # A common row separator is the newline character <tt>"\n"</tt>. # - A _column_ _separator_ delimits fields in a row. @@ -346,7 +354,9 @@ using CSV::MatchP if CSV.const_defined?(:MatchP) # - +row_sep+: Specifies the row separator; used to delimit rows. # - +col_sep+: Specifies the column separator; used to delimit fields. # - +quote_char+: Specifies the quote character; used to quote fields. -# - +field_size_limit+: Specifies the maximum field size allowed. +# - +field_size_limit+: Specifies the maximum field size + 1 allowed. +# Deprecated since 3.2.3. Use +max_field_size+ instead. +# - +max_field_size+: Specifies the maximum field size allowed. # - +converters+: Specifies the field converters to be used. # - +unconverted_fields+: Specifies whether unconverted fields are to be available. # - +headers+: Specifies whether data contains headers, @@ -703,7 +713,7 @@ using CSV::MatchP if CSV.const_defined?(:MatchP) # Header converters operate only on headers (and not on other rows). # # There are three ways to use header \converters; -# these examples use built-in header converter +:dowhcase+, +# these examples use built-in header converter +:downcase+, # which downcases each parsed header. # # - Option +header_converters+ with a singleton parsing method: @@ -853,8 +863,9 @@ class CSV # <b><tt>index</tt></b>:: The zero-based index of the field in its row. # <b><tt>line</tt></b>:: The line of the data source this row is from. # <b><tt>header</tt></b>:: The header for the column, when available. + # <b><tt>quoted?</tt></b>:: True or false, whether the original value is quoted or not. # - FieldInfo = Struct.new(:index, :line, :header) + FieldInfo = Struct.new(:index, :line, :header, :quoted?) # A Regexp used to find and convert some common Date formats. DateMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2},?\s+\d{2,4} | @@ -862,10 +873,9 @@ class CSV # A Regexp used to find and convert some common DateTime formats. DateTimeMatcher = / \A(?: (\w+,?\s+)?\w+\s+\d{1,2}\s+\d{1,2}:\d{1,2}:\d{1,2},?\s+\d{2,4} | - \d{4}-\d{2}-\d{2}\s\d{2}:\d{2}:\d{2} | - # ISO-8601 + # ISO-8601 and RFC-3339 (space instead of T) recognized by DateTime.parse \d{4}-\d{2}-\d{2} - (?:T\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)? + (?:[T\s]\d{2}:\d{2}(?::\d{2}(?:\.\d+)?(?:[+-]\d{2}(?::\d{2})|Z)?)?)? )\z /x # The encoding used by all converters. @@ -915,7 +925,8 @@ class CSV symbol: lambda { |h| h.encode(ConverterEncoding).downcase.gsub(/[^\s\w]+/, "").strip. gsub(/\s+/, "_").to_sym - } + }, + symbol_raw: lambda { |h| h.encode(ConverterEncoding).to_sym } } # Default values for method options. @@ -926,6 +937,7 @@ class CSV quote_char: '"', # For parsing. field_size_limit: nil, + max_field_size: nil, converters: nil, unconverted_fields: nil, headers: false, @@ -993,7 +1005,7 @@ class CSV def instance(data = $stdout, **options) # create a _signature_ for this method call, data object and options sig = [data.object_id] + - options.values_at(*DEFAULT_OPTIONS.keys.sort_by { |sym| sym.to_s }) + options.values_at(*DEFAULT_OPTIONS.keys) # fetch or create the instance for this signature @@instances ||= Hash.new @@ -1007,65 +1019,190 @@ class CSV end # :call-seq: - # filter(**options) {|row| ... } - # filter(in_string, **options) {|row| ... } - # filter(in_io, **options) {|row| ... } - # filter(in_string, out_string, **options) {|row| ... } - # filter(in_string, out_io, **options) {|row| ... } - # filter(in_io, out_string, **options) {|row| ... } - # filter(in_io, out_io, **options) {|row| ... } - # - # Reads \CSV input and writes \CSV output. - # - # For each input row: - # - Forms the data into: - # - A CSV::Row object, if headers are in use. - # - An \Array of Arrays, otherwise. - # - Calls the block with that object. - # - Appends the block's return value to the output. + # filter(in_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table + # filter(in_string_or_io, out_string_or_io, **options) {|row| ... } -> array_of_arrays or csv_table + # filter(**options) {|row| ... } -> array_of_arrays or csv_table # - # Arguments: - # * \CSV source: - # * Argument +in_string+, if given, should be a \String object; - # it will be put into a new StringIO object positioned at the beginning. - # * Argument +in_io+, if given, should be an IO object that is - # open for reading; on return, the IO object will be closed. - # * If neither +in_string+ nor +in_io+ is given, - # the input stream defaults to {ARGF}[https://ruby-doc.org/core/ARGF.html]. - # * \CSV output: - # * Argument +out_string+, if given, should be a \String object; - # it will be put into a new StringIO object positioned at the beginning. - # * Argument +out_io+, if given, should be an IO object that is - # ppen for writing; on return, the IO object will be closed. - # * If neither +out_string+ nor +out_io+ is given, - # the output stream defaults to <tt>$stdout</tt>. - # * Argument +options+ should be keyword arguments. - # - Each argument name that is prefixed with +in_+ or +input_+ - # is stripped of its prefix and is treated as an option - # for parsing the input. - # Option +input_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>. - # - Each argument name that is prefixed with +out_+ or +output_+ - # is stripped of its prefix and is treated as an option - # for generating the output. - # Option +output_row_sep+ defaults to <tt>$INPUT_RECORD_SEPARATOR</tt>. - # - Each argument not prefixed as above is treated as an option - # both for parsing the input and for generating the output. - # - See {Options for Parsing}[#class-CSV-label-Options+for+Parsing] - # and {Options for Generating}[#class-CSV-label-Options+for+Generating]. + # - Parses \CSV from a source (\String, \IO stream, or ARGF). + # - Calls the given block with each parsed row: + # - Without headers, each row is an \Array. + # - With headers, each row is a CSV::Row. + # - Generates \CSV to an output (\String, \IO stream, or STDOUT). + # - Returns the parsed source: + # - Without headers, an \Array of \Arrays. + # - With headers, a CSV::Table. # - # Example: - # in_string = "foo,0\nbar,1\nbaz,2\n" + # When +in_string_or_io+ is given, but not +out_string_or_io+, + # parses from the given +in_string_or_io+ + # and generates to STDOUT. + # + # \String input without headers: + # + # in_string = "foo,0\nbar,1\nbaz,2" + # CSV.filter(in_string) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] + # + # Output (to STDOUT): + # + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # \String input with headers: + # + # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" + # CSV.filter(in_string, headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end # => #<CSV::Table mode:col_or_row row_count:4> + # + # Output (to STDOUT): + # + # Name,Value + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # \IO stream input without headers: + # + # File.write('t.csv', "foo,0\nbar,1\nbaz,2") + # File.open('t.csv') do |in_io| + # CSV.filter(in_io) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] + # + # Output (to STDOUT): + # + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # \IO stream input with headers: + # + # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2") + # File.open('t.csv') do |in_io| + # CSV.filter(in_io, headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # end # => #<CSV::Table mode:col_or_row row_count:4> + # + # Output (to STDOUT): + # + # Name,Value + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # When both +in_string_or_io+ and +out_string_or_io+ are given, + # parses from +in_string_or_io+ and generates to +out_string_or_io+. + # + # \String output without headers: + # + # in_string = "foo,0\nbar,1\nbaz,2" # out_string = '' # CSV.filter(in_string, out_string) do |row| - # row[0] = row[0].upcase - # row[1] *= 4 - # end - # out_string # => "FOO,0000\nBAR,1111\nBAZ,2222\n" + # row[0].upcase! + # row[1] = - row[1].to_i + # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] + # out_string # => "FOO,0\nBAR,-1\nBAZ,-2\n" + # + # \String output with headers: + # + # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" + # out_string = '' + # CSV.filter(in_string, out_string, headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end # => #<CSV::Table mode:col_or_row row_count:4> + # out_string # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n" + # + # \IO stream output without headers: + # + # in_string = "foo,0\nbar,1\nbaz,2" + # File.open('t.csv', 'w') do |out_io| + # CSV.filter(in_string, out_io) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # end # => [["FOO", 0], ["BAR", -1], ["BAZ", -2]] + # File.read('t.csv') # => "FOO,0\nBAR,-1\nBAZ,-2\n" + # + # \IO stream output with headers: + # + # in_string = "Name,Value\nfoo,0\nbar,1\nbaz,2" + # File.open('t.csv', 'w') do |out_io| + # CSV.filter(in_string, out_io, headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # end # => #<CSV::Table mode:col_or_row row_count:4> + # File.read('t.csv') # => "Name,Value\nFOO,0\nBAR,-1\nBAZ,-2\n" + # + # When neither +in_string_or_io+ nor +out_string_or_io+ given, + # parses from {ARGF}[rdoc-ref:ARGF] + # and generates to STDOUT. + # + # Without headers: + # + # # Put Ruby code into a file. + # ruby = <<-EOT + # require 'csv' + # CSV.filter do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # EOT + # File.write('t.rb', ruby) + # # Put some CSV into a file. + # File.write('t.csv', "foo,0\nbar,1\nbaz,2") + # # Run the Ruby code with CSV filename as argument. + # system(Gem.ruby, "t.rb", "t.csv") + # + # Output (to STDOUT): + # + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # With headers: + # + # # Put Ruby code into a file. + # ruby = <<-EOT + # require 'csv' + # CSV.filter(headers: true) do |row| + # row[0].upcase! + # row[1] = - row[1].to_i + # end + # EOT + # File.write('t.rb', ruby) + # # Put some CSV into a file. + # File.write('t.csv', "Name,Value\nfoo,0\nbar,1\nbaz,2") + # # Run the Ruby code with CSV filename as argument. + # system(Gem.ruby, "t.rb", "t.csv") + # + # Output (to STDOUT): + # + # Name,Value + # FOO,0 + # BAR,-1 + # BAZ,-2 + # + # Arguments: + # + # * Argument +in_string_or_io+ must be a \String or an \IO stream. + # * Argument +out_string_or_io+ must be a \String or an \IO stream. + # * Arguments <tt>**options</tt> must be keyword options. + # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]. def filter(input=nil, output=nil, **options) # parse options for input, output, or both in_options, out_options = Hash.new, {row_sep: InputRecordSeparator.value} options.each do |key, value| - case key.to_s + case key when /\Ain(?:put)?_(.+)\Z/ in_options[$1.to_sym] = value when /\Aout(?:put)?_(.+)\Z/ @@ -1107,111 +1244,90 @@ class CSV # # :call-seq: - # foreach(path, mode='r', **options) {|row| ... ) - # foreach(io, mode='r', **options {|row| ... ) - # foreach(path, mode='r', headers: ..., **options) {|row| ... ) - # foreach(io, mode='r', headers: ..., **options {|row| ... ) - # foreach(path, mode='r', **options) -> new_enumerator - # foreach(io, mode='r', **options -> new_enumerator + # foreach(path_or_io, mode='r', **options) {|row| ... ) + # foreach(path_or_io, mode='r', **options) -> new_enumerator # - # Calls the block with each row read from source +path+ or +io+. + # Calls the block with each row read from source +path_or_io+. # - # * Argument +path+, if given, must be the path to a file. - # :include: ../doc/csv/arguments/io.rdoc - # * Argument +mode+, if given, must be a \File mode - # See {Open Mode}[IO.html#method-c-new-label-Open+Mode]. - # * Arguments <tt>**options</tt> must be keyword options. - # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]. - # * This method optionally accepts an additional <tt>:encoding</tt> option - # that you can use to specify the Encoding of the data read from +path+ or +io+. - # You must provide this unless your data is in the encoding - # given by <tt>Encoding::default_external</tt>. - # Parsing will use this to determine how to parse the data. - # You may provide a second Encoding to - # have the data transcoded as it is read. For example, - # encoding: 'UTF-32BE:UTF-8' - # would read +UTF-32BE+ data from the file - # but transcode it to +UTF-8+ before parsing. - # - # ====== Without Option +headers+ + # \Path input without headers: # - # Without option +headers+, returns each row as an \Array object. - # - # These examples assume prior execution of: # string = "foo,0\nbar,1\nbaz,2\n" - # path = 't.csv' - # File.write(path, string) + # in_path = 't.csv' + # File.write(in_path, string) + # CSV.foreach(in_path) {|row| p row } # - # Read rows from a file at +path+: - # CSV.foreach(path) {|row| p row } # Output: - # ["foo", "0"] - # ["bar", "1"] - # ["baz", "2"] # - # Read rows from an \IO object: - # File.open(path) do |file| - # CSV.foreach(file) {|row| p row } - # end - # - # Output: # ["foo", "0"] # ["bar", "1"] # ["baz", "2"] # - # Returns a new \Enumerator if no block given: - # CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")> - # CSV.foreach(File.open(path)) # => #<Enumerator: CSV:foreach(#<File:t.csv>, "r")> + # \Path input with headers: + # + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # in_path = 't.csv' + # File.write(in_path, string) + # CSV.foreach(in_path, headers: true) {|row| p row } # - # Issues a warning if an encoding is unsupported: - # CSV.foreach(File.open(path), encoding: 'foo:bar') {|row| } # Output: - # warning: Unsupported encoding foo ignored - # warning: Unsupported encoding bar ignored # - # ====== With Option +headers+ + # <CSV::Row "Name":"foo" "Value":"0"> + # <CSV::Row "Name":"bar" "Value":"1"> + # <CSV::Row "Name":"baz" "Value":"2"> # - # With {option +headers+}[#class-CSV-label-Option+headers], - # returns each row as a CSV::Row object. + # \IO stream input without headers: # - # These examples assume prior execution of: - # string = "Name,Count\nfoo,0\nbar,1\nbaz,2\n" + # string = "foo,0\nbar,1\nbaz,2\n" # path = 't.csv' # File.write(path, string) - # - # Read rows from a file at +path+: - # CSV.foreach(path, headers: true) {|row| p row } + # File.open('t.csv') do |in_io| + # CSV.foreach(in_io) {|row| p row } + # end # # Output: - # #<CSV::Row "Name":"foo" "Count":"0"> - # #<CSV::Row "Name":"bar" "Count":"1"> - # #<CSV::Row "Name":"baz" "Count":"2"> # - # Read rows from an \IO object: - # File.open(path) do |file| - # CSV.foreach(file, headers: true) {|row| p row } + # ["foo", "0"] + # ["bar", "1"] + # ["baz", "2"] + # + # \IO stream input with headers: + # + # string = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # File.open('t.csv') do |in_io| + # CSV.foreach(in_io, headers: true) {|row| p row } # end # # Output: - # #<CSV::Row "Name":"foo" "Count":"0"> - # #<CSV::Row "Name":"bar" "Count":"1"> - # #<CSV::Row "Name":"baz" "Count":"2"> - # - # --- # - # Raises an exception if +path+ is a \String, but not the path to a readable file: - # # Raises Errno::ENOENT (No such file or directory @ rb_sysopen - nosuch.csv): - # CSV.foreach('nosuch.csv') {|row| } + # <CSV::Row "Name":"foo" "Value":"0"> + # <CSV::Row "Name":"bar" "Value":"1"> + # <CSV::Row "Name":"baz" "Value":"2"> # - # Raises an exception if +io+ is an \IO object, but not open for reading: - # io = File.open(path, 'w') {|row| } - # # Raises TypeError (no implicit conversion of nil into String): - # CSV.foreach(io) {|row| } + # With no block given, returns an \Enumerator: # - # Raises an exception if +mode+ is invalid: - # # Raises ArgumentError (invalid access mode nosuch): - # CSV.foreach(path, 'nosuch') {|row| } + # string = "foo,0\nbar,1\nbaz,2\n" + # path = 't.csv' + # File.write(path, string) + # CSV.foreach(path) # => #<Enumerator: CSV:foreach("t.csv", "r")> # + # Arguments: + # * Argument +path_or_io+ must be a file path or an \IO stream. + # * Argument +mode+, if given, must be a \File mode + # See {Open Mode}[https://ruby-doc.org/core/IO.html#method-c-new-label-Open+Mode]. + # * Arguments <tt>**options</tt> must be keyword options. + # See {Options for Parsing}[#class-CSV-label-Options+for+Parsing]. + # * This method optionally accepts an additional <tt>:encoding</tt> option + # that you can use to specify the Encoding of the data read from +path+ or +io+. + # You must provide this unless your data is in the encoding + # given by <tt>Encoding::default_external</tt>. + # Parsing will use this to determine how to parse the data. + # You may provide a second Encoding to + # have the data transcoded as it is read. For example, + # encoding: 'UTF-32BE:UTF-8' + # would read +UTF-32BE+ data from the file + # but transcode it to +UTF-8+ before parsing. def foreach(path, mode="r", **options, &block) return to_enum(__method__, path, mode, **options) unless block_given? open(path, mode, **options) do |csv| @@ -1349,6 +1465,46 @@ class CSV (new(str, **options) << row).string end + # :call-seq: + # CSV.generate_lines(rows) + # CSV.generate_lines(rows, **options) + # + # Returns the \String created by generating \CSV from + # using the specified +options+. + # + # Argument +rows+ must be an \Array of row. Row is \Array of \String or \CSV::Row. + # + # Special options: + # * Option <tt>:row_sep</tt> defaults to <tt>"\n"</tt> on Ruby 3.0 or later + # and <tt>$INPUT_RECORD_SEPARATOR</tt> (<tt>$/</tt>) otherwise.: + # $INPUT_RECORD_SEPARATOR # => "\n" + # * This method accepts an additional option, <tt>:encoding</tt>, which sets the base + # Encoding for the output. This method will try to guess your Encoding from + # the first non-+nil+ field in +row+, if possible, but you may need to use + # this parameter as a backup plan. + # + # For other +options+, + # see {Options for Generating}[#class-CSV-label-Options+for+Generating]. + # + # --- + # + # Returns the \String generated from an + # CSV.generate_lines([['foo', '0'], ['bar', '1'], ['baz', '2']]) # => "foo,0\nbar,1\nbaz,2\n" + # + # --- + # + # Raises an exception + # # Raises NoMethodError (undefined method `each' for :foo:Symbol) + # CSV.generate_lines(:foo) + # + def generate_lines(rows, **options) + self.generate(**options) do |csv| + rows.each do |row| + csv << row + end + end + end + # # :call-seq: # open(file_path, mode = "rb", **options ) -> new_csv @@ -1357,7 +1513,7 @@ class CSV # open(io, mode = "rb", **options ) { |csv| ... } -> object # # possible options elements: - # hash form: + # keyword form: # :invalid => nil # raise error on invalid byte sequence (default) # :invalid => :replace # replace invalid byte sequence # :undef => :replace # replace undefined conversion @@ -1424,10 +1580,14 @@ class CSV def open(filename, mode="r", **options) # wrap a File opened with the remaining +args+ with no newline # decorator - file_opts = {universal_newline: false}.merge(options) + file_opts = options.dup + unless file_opts.key?(:newline) + file_opts[:universal_newline] ||= false + end options.delete(:invalid) options.delete(:undef) options.delete(:replace) + options.delete_if {|k, _| /newline\z/.match?(k)} begin f = File.open(filename, mode, **file_opts) @@ -1746,6 +1906,7 @@ class CSV row_sep: :auto, quote_char: '"', field_size_limit: nil, + max_field_size: nil, converters: nil, unconverted_fields: nil, headers: false, @@ -1769,8 +1930,19 @@ class CSV raise ArgumentError.new("Cannot parse nil as CSV") if data.nil? if data.is_a?(String) + if encoding + if encoding.is_a?(String) + data_external_encoding, data_internal_encoding = encoding.split(":", 2) + if data_internal_encoding + data = data.encode(data_internal_encoding, data_external_encoding) + else + data = data.dup.force_encoding(data_external_encoding) + end + else + data = data.dup.force_encoding(encoding) + end + end @io = StringIO.new(data) - @io.set_encoding(encoding || data.encoding) else @io = data end @@ -1788,11 +1960,14 @@ class CSV @initial_header_converters = header_converters @initial_write_converters = write_converters + if max_field_size.nil? and field_size_limit + max_field_size = field_size_limit - 1 + end @parser_options = { column_separator: col_sep, row_separator: row_sep, quote_character: quote_char, - field_size_limit: field_size_limit, + max_field_size: max_field_size, unconverted_fields: unconverted_fields, headers: headers, return_headers: return_headers, @@ -1860,11 +2035,25 @@ class CSV # Returns the limit for field size; used for parsing; # see {Option +field_size_limit+}[#class-CSV-label-Option+field_size_limit]: # CSV.new('').field_size_limit # => nil + # + # Deprecated since 3.2.3. Use +max_field_size+ instead. def field_size_limit parser.field_size_limit end # :call-seq: + # csv.max_field_size -> integer or nil + # + # Returns the limit for field size; used for parsing; + # see {Option +max_field_size+}[#class-CSV-label-Option+max_field_size]: + # CSV.new('').max_field_size # => nil + # + # Since 3.2.3. + def max_field_size + parser.max_field_size + end + + # :call-seq: # csv.skip_lines -> regexp or nil # # Returns the \Regexp used to identify comment lines; used for parsing; @@ -1994,7 +2183,7 @@ class CSV end # :call-seq: - # csv.encoding -> endcoding + # csv.encoding -> encoding # # Returns the encoding used for parsing and generating; # see {Character Encodings (M17n or Multilingualization)}[#class-CSV-label-Character+Encodings+-28M17n+or+Multilingualization-29]: @@ -2362,7 +2551,13 @@ class CSV # p row # end def each(&block) - parser_enumerator.each(&block) + return to_enum(__method__) unless block_given? + begin + while true + yield(parser_enumerator.next) + end + rescue StopIteration + end end # :call-seq: diff --git a/lib/csv/fields_converter.rb b/lib/csv/fields_converter.rb index b206118d99..d15977d379 100644 --- a/lib/csv/fields_converter.rb +++ b/lib/csv/fields_converter.rb @@ -44,7 +44,7 @@ class CSV @converters.empty? end - def convert(fields, headers, lineno) + def convert(fields, headers, lineno, quoted_fields) return fields unless need_convert? fields.collect.with_index do |field, index| @@ -63,7 +63,8 @@ class CSV else header = nil end - field = converter[field, FieldInfo.new(index, lineno, header)] + quoted = quoted_fields[index] + field = converter[field, FieldInfo.new(index, lineno, header, quoted)] end break unless field.is_a?(String) # short-circuit pipeline for speed end diff --git a/lib/csv/input_record_separator.rb b/lib/csv/input_record_separator.rb index bbf13479f7..7a99343c0c 100644 --- a/lib/csv/input_record_separator.rb +++ b/lib/csv/input_record_separator.rb @@ -4,20 +4,7 @@ require "stringio" class CSV module InputRecordSeparator class << self - is_input_record_separator_deprecated = false - verbose, $VERBOSE = $VERBOSE, true - stderr, $stderr = $stderr, StringIO.new - input_record_separator = $INPUT_RECORD_SEPARATOR - begin - $INPUT_RECORD_SEPARATOR = "\r\n" - is_input_record_separator_deprecated = (not $stderr.string.empty?) - ensure - $INPUT_RECORD_SEPARATOR = input_record_separator - $stderr = stderr - $VERBOSE = verbose - end - - if is_input_record_separator_deprecated + if RUBY_VERSION >= "3.0.0" def value "\n" end diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 7e943acf21..afb3131cd5 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -2,15 +2,10 @@ require "strscan" -require_relative "delete_suffix" require_relative "input_record_separator" -require_relative "match_p" require_relative "row" require_relative "table" -using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix) -using CSV::MatchP if CSV.const_defined?(:MatchP) - class CSV # Note: Don't use this class directly. This is an internal class. class Parser @@ -27,6 +22,10 @@ class CSV class InvalidEncoding < StandardError end + # Raised when unexpected case is happen. + class UnexpectedError < StandardError + end + # # CSV::Scanner receives a CSV output, scans it and return the content. # It also controls the life cycle of the object with its methods +keep_start+, @@ -78,10 +77,10 @@ class CSV # +keep_end+, +keep_back+, +keep_drop+. # # CSV::InputsScanner.scan() tries to match with pattern at the current position. - # If there's a match, the scanner advances the “scan pointer” and returns the matched string. + # If there's a match, the scanner advances the "scan pointer" and returns the matched string. # Otherwise, the scanner returns nil. # - # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). + # CSV::InputsScanner.rest() returns the "rest" of the string (i.e. everything after the scan pointer). # If there is no more data (eos? = true), it returns "". # class InputsScanner @@ -96,11 +95,13 @@ class CSV end def each_line(row_separator) + return enum_for(__method__, row_separator) unless block_given? buffer = nil input = @scanner.rest position = @scanner.pos offset = 0 n_row_separator_chars = row_separator.size + # trace(__method__, :start, line, input) while true input.each_line(row_separator) do |line| @scanner.pos += line.bytesize @@ -140,25 +141,28 @@ class CSV end def scan(pattern) + # trace(__method__, pattern, :start) value = @scanner.scan(pattern) + # trace(__method__, pattern, :done, :last, value) if @last_scanner return value if @last_scanner - if value - read_chunk if @scanner.eos? - return value - else - nil - end + read_chunk if value and @scanner.eos? + # trace(__method__, pattern, :done, value) + value end def scan_all(pattern) + # trace(__method__, pattern, :start) value = @scanner.scan(pattern) + # trace(__method__, pattern, :done, :last, value) if @last_scanner return value if @last_scanner return nil if value.nil? while @scanner.eos? and read_chunk and (sub_value = @scanner.scan(pattern)) + # trace(__method__, pattern, :sub, sub_value) value << sub_value end + # trace(__method__, pattern, :done, value) value end @@ -167,68 +171,126 @@ class CSV end def keep_start - @keeps.push([@scanner.pos, nil]) + # trace(__method__, :start) + adjust_last_keep + @keeps.push([@scanner, @scanner.pos, nil]) + # trace(__method__, :done) end def keep_end - start, buffer = @keeps.pop - keep = @scanner.string.byteslice(start, @scanner.pos - start) + # trace(__method__, :start) + scanner, start, buffer = @keeps.pop + if scanner == @scanner + keep = @scanner.string.byteslice(start, @scanner.pos - start) + else + keep = @scanner.string.byteslice(0, @scanner.pos) + end if buffer buffer << keep keep = buffer end + # trace(__method__, :done, keep) keep end def keep_back - start, buffer = @keeps.pop + # trace(__method__, :start) + scanner, start, buffer = @keeps.pop if buffer + # trace(__method__, :rescan, start, buffer) string = @scanner.string - keep = string.byteslice(start, string.bytesize - start) + if scanner == @scanner + keep = string.byteslice(start, string.bytesize - start) + else + keep = string + end if keep and not keep.empty? @inputs.unshift(StringIO.new(keep)) @last_scanner = false end @scanner = StringScanner.new(buffer) else + if @scanner != scanner + message = "scanners are different but no buffer: " + message += "#{@scanner.inspect}(#{@scanner.object_id}): " + message += "#{scanner.inspect}(#{scanner.object_id})" + raise UnexpectedError, message + end + # trace(__method__, :repos, start, buffer) @scanner.pos = start end read_chunk if @scanner.eos? end def keep_drop - @keeps.pop + _, _, buffer = @keeps.pop + # trace(__method__, :done, :empty) unless buffer + return unless buffer + + last_keep = @keeps.last + # trace(__method__, :done, :no_last_keep) unless last_keep + return unless last_keep + + if last_keep[2] + last_keep[2] << buffer + else + last_keep[2] = buffer + end + # trace(__method__, :done) end def rest @scanner.rest end + def check(pattern) + @scanner.check(pattern) + end + private - def read_chunk - return false if @last_scanner + def trace(*args) + pp([*args, @scanner, @scanner&.string, @scanner&.pos, @keeps]) + end - unless @keeps.empty? - keep = @keeps.last - keep_start = keep[0] - string = @scanner.string - keep_data = string.byteslice(keep_start, @scanner.pos - keep_start) - if keep_data - keep_buffer = keep[1] - if keep_buffer - keep_buffer << keep_data - else - keep[1] = keep_data.dup - end + def adjust_last_keep + # trace(__method__, :start) + + keep = @keeps.last + # trace(__method__, :done, :empty) if keep.nil? + return if keep.nil? + + scanner, start, buffer = keep + string = @scanner.string + if @scanner != scanner + start = 0 + end + if start == 0 and @scanner.eos? + keep_data = string + else + keep_data = string.byteslice(start, @scanner.pos - start) + end + if keep_data + if buffer + buffer << keep_data + else + keep[2] = keep_data.dup end - keep[0] = 0 end + # trace(__method__, :done) + end + + def read_chunk + return false if @last_scanner + + adjust_last_keep + input = @inputs.first case input when StringIO string = input.read raise InvalidEncoding unless string.valid_encoding? + # trace(__method__, :stringio, string) @scanner = StringScanner.new(string) @inputs.shift @last_scanner = @inputs.empty? @@ -237,6 +299,7 @@ class CSV chunk = input.gets(@row_separator, @chunk_size) if chunk raise InvalidEncoding unless chunk.valid_encoding? + # trace(__method__, :chunk, chunk) @scanner = StringScanner.new(chunk) if input.respond_to?(:eof?) and input.eof? @inputs.shift @@ -244,6 +307,7 @@ class CSV end true else + # trace(__method__, :no_chunk) @scanner = StringScanner.new("".encode(@encoding)) @inputs.shift @last_scanner = @inputs.empty? @@ -278,7 +342,11 @@ class CSV end def field_size_limit - @field_size_limit + @max_field_size&.succ + end + + def max_field_size + @max_field_size end def skip_lines @@ -346,6 +414,16 @@ class CSV end message = "Invalid byte sequence in #{@encoding}" raise MalformedCSVError.new(message, lineno) + rescue UnexpectedError => error + if @scanner + ignore_broken_line + lineno = @lineno + else + lineno = @lineno + 1 + end + message = "This should not be happen: #{error.message}: " + message += "Please report this to https://github.com/ruby/csv/issues" + raise MalformedCSVError.new(message, lineno) end end @@ -390,7 +468,7 @@ class CSV @backslash_quote = false end @unconverted_fields = @options[:unconverted_fields] - @field_size_limit = @options[:field_size_limit] + @max_field_size = @options[:max_field_size] @skip_blanks = @options[:skip_blanks] @fields_converter = @options[:fields_converter] @header_fields_converter = @options[:header_fields_converter] @@ -680,9 +758,10 @@ class CSV case headers when Array @raw_headers = headers + quoted_fields = [false] * @raw_headers.size @use_headers = true when String - @raw_headers = parse_headers(headers) + @raw_headers, quoted_fields = parse_headers(headers) @use_headers = true when nil, false @raw_headers = nil @@ -692,21 +771,28 @@ class CSV @use_headers = true end if @raw_headers - @headers = adjust_headers(@raw_headers) + @headers = adjust_headers(@raw_headers, quoted_fields) else @headers = nil end end def parse_headers(row) - CSV.parse_line(row, - col_sep: @column_separator, - row_sep: @row_separator, - quote_char: @quote_character) + quoted_fields = [] + converter = lambda do |field, info| + quoted_fields << info.quoted? + field + end + headers = CSV.parse_line(row, + col_sep: @column_separator, + row_sep: @row_separator, + quote_char: @quote_character, + converters: [converter]) + [headers, quoted_fields] end - def adjust_headers(headers) - adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno) + def adjust_headers(headers, quoted_fields) + adjusted_headers = @header_fields_converter.convert(headers, nil, @lineno, quoted_fields) adjusted_headers.each {|h| h.freeze if h.is_a? String} adjusted_headers end @@ -729,28 +815,28 @@ class CSV sample[0, 128].index(@quote_character) end - SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") - if SCANNER_TEST - class UnoptimizedStringIO - def initialize(string) - @io = StringIO.new(string, "rb:#{string.encoding}") - end + class UnoptimizedStringIO # :nodoc: + def initialize(string) + @io = StringIO.new(string, "rb:#{string.encoding}") + end - def gets(*args) - @io.gets(*args) - end + def gets(*args) + @io.gets(*args) + end - def each_line(*args, &block) - @io.each_line(*args, &block) - end + def each_line(*args, &block) + @io.each_line(*args, &block) + end - def eof? - @io.eof? - end + def eof? + @io.eof? end + end - SCANNER_TEST_CHUNK_SIZE = - Integer((ENV["CSV_PARSER_SCANNER_TEST_CHUNK_SIZE"] || "1"), 10) + SCANNER_TEST = (ENV["CSV_PARSER_SCANNER_TEST"] == "yes") + if SCANNER_TEST + SCANNER_TEST_CHUNK_SIZE_NAME = "CSV_PARSER_SCANNER_TEST_CHUNK_SIZE" + SCANNER_TEST_CHUNK_SIZE_VALUE = ENV[SCANNER_TEST_CHUNK_SIZE_NAME] def build_scanner inputs = @samples.collect do |sample| UnoptimizedStringIO.new(sample) @@ -760,10 +846,17 @@ class CSV else inputs << @input end + begin + chunk_size_value = ENV[SCANNER_TEST_CHUNK_SIZE_NAME] + rescue # Ractor::IsolationError + # Ractor on Ruby 3.0 can't read ENV value. + chunk_size_value = SCANNER_TEST_CHUNK_SIZE_VALUE + end + chunk_size = Integer((chunk_size_value || "1"), 10) InputsScanner.new(inputs, @encoding, @row_separator, - chunk_size: SCANNER_TEST_CHUNK_SIZE) + chunk_size: chunk_size) end else def build_scanner @@ -826,6 +919,14 @@ class CSV end end + def validate_field_size(field) + return unless @max_field_size + return if field.size <= @max_field_size + ignore_broken_line + message = "Field size exceeded: #{field.size} > #{@max_field_size}" + raise MalformedCSVError.new(message, @lineno) + end + def parse_no_quote(&block) @scanner.each_line(@row_separator) do |line| next if @skip_lines and skip_line?(line) @@ -835,9 +936,16 @@ class CSV if line.empty? next if @skip_blanks row = [] + quoted_fields = [] else line = strip_value(line) row = line.split(@split_column_separator, -1) + quoted_fields = [false] * row.size + if @max_field_size + row.each do |column| + validate_field_size(column) + end + end n_columns = row.size i = 0 while i < n_columns @@ -846,7 +954,7 @@ class CSV end end @last_line = original_line - emit_row(row, &block) + emit_row(row, quoted_fields, &block) end end @@ -868,31 +976,37 @@ class CSV next end row = [] + quoted_fields = [] elsif line.include?(@cr) or line.include?(@lf) @scanner.keep_back @need_robust_parsing = true return parse_quotable_robust(&block) else row = line.split(@split_column_separator, -1) + quoted_fields = [] n_columns = row.size i = 0 while i < n_columns column = row[i] if column.empty? + quoted_fields << false row[i] = nil else n_quotes = column.count(@quote_character) if n_quotes.zero? + quoted_fields << false # no quote elsif n_quotes == 2 and column.start_with?(@quote_character) and column.end_with?(@quote_character) + quoted_fields << true row[i] = column[1..-2] else @scanner.keep_back @need_robust_parsing = true return parse_quotable_robust(&block) end + validate_field_size(row[i]) end i += 1 end @@ -900,13 +1014,14 @@ class CSV @scanner.keep_drop @scanner.keep_start @last_line = original_line - emit_row(row, &block) + emit_row(row, quoted_fields, &block) end @scanner.keep_drop end def parse_quotable_robust(&block) row = [] + quoted_fields = [] skip_needless_lines start_row while true @@ -916,32 +1031,39 @@ class CSV value = parse_column_value if value @scanner.scan_all(@strip_value) if @strip_value - if @field_size_limit and value.size >= @field_size_limit - ignore_broken_line - raise MalformedCSVError.new("Field size exceeded", @lineno) - end + validate_field_size(value) end if parse_column_end row << value + quoted_fields << @quoted_column_value elsif parse_row_end if row.empty? and value.nil? - emit_row([], &block) unless @skip_blanks + emit_row([], [], &block) unless @skip_blanks else row << value - emit_row(row, &block) + quoted_fields << @quoted_column_value + emit_row(row, quoted_fields, &block) row = [] + quoted_fields = [] end skip_needless_lines start_row elsif @scanner.eos? break if row.empty? and value.nil? row << value - emit_row(row, &block) + quoted_fields << @quoted_column_value + emit_row(row, quoted_fields, &block) break else if @quoted_column_value + if liberal_parsing? and (new_line = @scanner.check(@line_end)) + message = + "Illegal end-of-line sequence outside of a quoted field " + + "<#{new_line.inspect}>" + else + message = "Any value after quoted field isn't allowed" + end ignore_broken_line - message = "Any value after quoted field isn't allowed" raise MalformedCSVError.new(message, @lineno) elsif @unquoted_column_value and (new_line = @scanner.scan(@line_end)) @@ -1034,7 +1156,7 @@ class CSV if (n_quotes % 2).zero? quotes[0, (n_quotes - 2) / 2] else - value = quotes[0, (n_quotes - 1) / 2] + value = quotes[0, n_quotes / 2] while true quoted_value = @scanner.scan_all(@quoted_value) value << quoted_value if quoted_value @@ -1058,11 +1180,9 @@ class CSV n_quotes = quotes.size if n_quotes == 1 break - elsif (n_quotes % 2) == 1 - value << quotes[0, (n_quotes - 1) / 2] - break else value << quotes[0, n_quotes / 2] + break if (n_quotes % 2) == 1 end end value @@ -1098,18 +1218,15 @@ class CSV def strip_value(value) return value unless @strip - return nil if value.nil? + return value if value.nil? case @strip when String - size = value.size - while value.start_with?(@strip) - size -= 1 - value = value[1, size] + while value.delete_prefix!(@strip) + # do nothing end - while value.end_with?(@strip) - size -= 1 - value = value[0, size] + while value.delete_suffix!(@strip) + # do nothing end else value.strip! @@ -1132,22 +1249,22 @@ class CSV @scanner.keep_start end - def emit_row(row, &block) + def emit_row(row, quoted_fields, &block) @lineno += 1 raw_row = row if @use_headers if @headers.nil? - @headers = adjust_headers(row) + @headers = adjust_headers(row, quoted_fields) return unless @return_headers row = Row.new(@headers, row, true) else row = Row.new(@headers, - @fields_converter.convert(raw_row, @headers, @lineno)) + @fields_converter.convert(raw_row, @headers, @lineno, quoted_fields)) end else # convert fields, if needed... - row = @fields_converter.convert(raw_row, nil, @lineno) + row = @fields_converter.convert(raw_row, nil, @lineno, quoted_fields) end # inject unconverted fields and accessor, if requested... diff --git a/lib/csv/row.rb b/lib/csv/row.rb index 7f2e7e7807..86323f7d0a 100644 --- a/lib/csv/row.rb +++ b/lib/csv/row.rb @@ -3,30 +3,105 @@ require "forwardable" class CSV + # = \CSV::Row + # A \CSV::Row instance represents a \CSV table row. + # (see {class CSV}[../CSV.html]). # - # A CSV::Row is part Array and part Hash. It retains an order for the fields - # and allows duplicates just as an Array would, but also allows you to access - # fields by name just as you could if they were in a Hash. + # The instance may have: + # - Fields: each is an object, not necessarily a \String. + # - Headers: each serves a key, and also need not be a \String. # - # All rows returned by CSV will be constructed from this class, if header row - # processing is activated. + # === Instance Methods + # + # \CSV::Row has three groups of instance methods: + # - Its own internally defined instance methods. + # - Methods included by module Enumerable. + # - Methods delegated to class Array.: + # * Array#empty? + # * Array#length + # * Array#size + # + # == Creating a \CSV::Row Instance + # + # Commonly, a new \CSV::Row instance is created by parsing \CSV source + # that has headers: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.each {|row| p row } + # Output: + # #<CSV::Row "Name":"foo" "Value":"0"> + # #<CSV::Row "Name":"bar" "Value":"1"> + # #<CSV::Row "Name":"baz" "Value":"2"> + # + # You can also create a row directly. See ::new. + # + # == Headers + # + # Like a \CSV::Table, a \CSV::Row has headers. + # + # A \CSV::Row that was created by parsing \CSV source + # inherits its headers from the table: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # row = table.first + # row.headers # => ["Name", "Value"] + # + # You can also create a new row with headers; + # like the keys in a \Hash, the headers need not be Strings: + # row = CSV::Row.new([:name, :value], ['foo', 0]) + # row.headers # => [:name, :value] + # + # The new row retains its headers even if added to a table + # that has headers: + # table << row # => #<CSV::Table mode:col_or_row row_count:5> + # row.headers # => [:name, :value] + # row[:name] # => "foo" + # row['Name'] # => nil + # + # + # + # == Accessing Fields + # + # You may access a field in a \CSV::Row with either its \Integer index + # (\Array-style) or its header (\Hash-style). + # + # Fetch a field using method #[]: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row[1] # => 0 + # row['Value'] # => 0 + # + # Set a field using method #[]=: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row # => #<CSV::Row "Name":"foo" "Value":0> + # row[0] = 'bar' + # row['Value'] = 1 + # row # => #<CSV::Row "Name":"bar" "Value":1> # class Row - # - # Constructs a new CSV::Row from +headers+ and +fields+, which are expected - # to be Arrays. If one Array is shorter than the other, it will be padded - # with +nil+ objects. - # - # The optional +header_row+ parameter can be set to +true+ to indicate, via - # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header - # row. Otherwise, the row assumes to be a field row. - # - # A CSV::Row object supports the following Array methods through delegation: - # - # * empty?() - # * length() - # * size() - # + # :call-seq: + # CSV::Row.new(headers, fields, header_row = false) -> csv_row + # + # Returns the new \CSV::Row instance constructed from + # arguments +headers+ and +fields+; both should be Arrays; + # note that the fields need not be Strings: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0]) + # row # => #<CSV::Row "Name":"foo" "Value":0> + # + # If the \Array lengths are different, the shorter is +nil+-filled: + # row = CSV::Row.new(['Name', 'Value', 'Date', 'Size'], ['foo', 0]) + # row # => #<CSV::Row "Name":"foo" "Value":0 "Date":nil "Size":nil> + # + # Each \CSV::Row object is either a <i>field row</i> or a <i>header row</i>; + # by default, a new row is a field row; for the row created above: + # row.field_row? # => true + # row.header_row? # => false + # + # If the optional argument +header_row+ is given as +true+, + # the created row is a header row: + # row = CSV::Row.new(['Name', 'Value'], ['foo', 0], header_row = true) + # row # => #<CSV::Row "Name":"foo" "Value":0> + # row.field_row? # => false + # row.header_row? # => true def initialize(headers, fields, header_row = false) @header_row = header_row headers.each { |h| h.freeze if h.is_a? String } @@ -48,6 +123,10 @@ class CSV extend Forwardable def_delegators :@row, :empty?, :length, :size + # :call-seq: + # row.initialize_copy(other_row) -> self + # + # Calls superclass method. def initialize_copy(other) super_return_value = super @row = @row.collect(&:dup) @@ -71,7 +150,7 @@ class CSV end # :call-seq: - # row.headers + # row.headers -> array_of_headers # # Returns the headers for this row: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" @@ -83,9 +162,9 @@ class CSV end # :call-seq: - # field(index) - # field(header) - # field(header, offset) + # field(index) -> value + # field(header) -> value + # field(header, offset) -> value # # Returns the field value for the given +index+ or +header+. # @@ -137,9 +216,9 @@ class CSV # # :call-seq: - # fetch(header) - # fetch(header, default) - # fetch(header) {|row| ... } + # fetch(header) -> value + # fetch(header, default) -> value + # fetch(header) {|row| ... } -> value # # Returns the field value as specified by +header+. # @@ -193,7 +272,7 @@ class CSV end # :call-seq: - # row.has_key?(header) + # row.has_key?(header) -> true or false # # Returns +true+ if there is a field with the given +header+, # +false+ otherwise. @@ -320,7 +399,7 @@ class CSV end # :call-seq: - # row.push(*values) ->self + # row.push(*values) -> self # # Appends each of the given +values+ to +self+ as a field; returns +self+: # source = "Name,Name,Name\nFoo,Bar,Baz\n" @@ -403,7 +482,7 @@ class CSV end # :call-seq: - # self.fields(*specifiers) + # self.fields(*specifiers) -> array_of_fields # # Returns field values per the given +specifiers+, which may be any mixture of: # - \Integer index. @@ -471,15 +550,26 @@ class CSV end alias_method :values_at, :fields - # # :call-seq: - # index( header ) - # index( header, offset ) + # index(header) -> index + # index(header, offset) -> index # - # This method will return the index of a field with the provided +header+. - # The +offset+ can be used to locate duplicate header names, as described in - # CSV::Row.field(). + # Returns the index for the given header, if it exists; + # otherwise returns +nil+. # + # With the single argument +header+, returns the index + # of the first-found field with the given +header+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.index('Name') # => 0 + # row.index('NAME') # => nil + # + # With arguments +header+ and +offset+, + # returns the index of the first-found field with given +header+, + # but ignoring the first +offset+ fields: + # row.index('Name', 1) # => 1 + # row.index('Name', 3) # => nil def index(header, minimum_index = 0) # find the pair index = headers[minimum_index..-1].index(header) @@ -487,24 +577,36 @@ class CSV index.nil? ? nil : index + minimum_index end + # :call-seq: + # row.field?(value) -> true or false # - # Returns +true+ if +data+ matches a field in this row, and +false+ - # otherwise. - # + # Returns +true+ if +value+ is a field in this row, +false+ otherwise: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.field?('Bar') # => true + # row.field?('BAR') # => false def field?(data) fields.include? data end include Enumerable + # :call-seq: + # row.each {|header, value| ... } -> self # - # Yields each pair of the row as header and field tuples (much like - # iterating over a Hash). This method returns the row for chaining. - # - # If no block is given, an Enumerator is returned. - # - # Support for Enumerable. + # Calls the block with each header-value pair; returns +self+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # row.each {|header, value| p [header, value] } + # Output: + # ["Name", "Foo"] + # ["Name", "Bar"] + # ["Name", "Baz"] # + # If no block is given, returns a new Enumerator: + # row.each # => #<Enumerator: #<CSV::Row "Name":"Foo" "Name":"Bar" "Name":"Baz">:each> def each(&block) return enum_for(__method__) { size } unless block_given? @@ -515,10 +617,19 @@ class CSV alias_method :each_pair, :each + # :call-seq: + # row == other -> true or false # - # Returns +true+ if this row contains the same headers and fields in the - # same order as +other+. - # + # Returns +true+ if +other+ is a /CSV::Row that has the same + # fields (headers and values) in the same order as +self+; + # otherwise returns +false+: + # source = "Name,Name,Name\nFoo,Bar,Baz\n" + # table = CSV.parse(source, headers: true) + # row = table[0] + # other_row = table[0] + # row == other_row # => true + # other_row = table[1] + # row == other_row # => false def ==(other) return @row == other.row if other.is_a? CSV::Row @row == other @@ -548,9 +659,31 @@ class CSV end alias_method :to_hash, :to_h + # :call-seq: + # row.deconstruct_keys(keys) -> hash + # + # Returns the new \Hash suitable for pattern matching containing only the + # keys specified as an argument. + def deconstruct_keys(keys) + if keys.nil? + to_h + else + keys.to_h { |key| [key, self[key]] } + end + end + alias_method :to_ary, :to_a # :call-seq: + # row.deconstruct -> array + # + # Returns the new \Array suitable for pattern matching containing the values + # of the row. + def deconstruct + fields + end + + # :call-seq: # row.to_csv -> csv_string # # Returns the row as a \CSV String. Headers are not included: diff --git a/lib/csv/table.rb b/lib/csv/table.rb index 0b62ae89ae..fb19f5453f 100644 --- a/lib/csv/table.rb +++ b/lib/csv/table.rb @@ -3,31 +3,199 @@ require "forwardable" class CSV + # = \CSV::Table + # A \CSV::Table instance represents \CSV data. + # (see {class CSV}[../CSV.html]). # - # A CSV::Table is a two-dimensional data structure for representing CSV - # documents. Tables allow you to work with the data by row or column, - # manipulate the data, and even convert the results back to CSV, if needed. + # The instance may have: + # - Rows: each is a Table::Row object. + # - Headers: names for the columns. # - # All tables returned by CSV will be constructed from this class, if header - # row processing is activated. + # === Instance Methods # + # \CSV::Table has three groups of instance methods: + # - Its own internally defined instance methods. + # - Methods included by module Enumerable. + # - Methods delegated to class Array.: + # * Array#empty? + # * Array#length + # * Array#size + # + # == Creating a \CSV::Table Instance + # + # Commonly, a new \CSV::Table instance is created by parsing \CSV source + # using headers: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.class # => CSV::Table + # + # You can also create an instance directly. See ::new. + # + # == Headers + # + # If a table has headers, the headers serve as labels for the columns of data. + # Each header serves as the label for its column. + # + # The headers for a \CSV::Table object are stored as an \Array of Strings. + # + # Commonly, headers are defined in the first row of \CSV source: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.headers # => ["Name", "Value"] + # + # If no headers are defined, the \Array is empty: + # table = CSV::Table.new([]) + # table.headers # => [] + # + # == Access Modes + # + # \CSV::Table provides three modes for accessing table data: + # - \Row mode. + # - Column mode. + # - Mixed mode (the default for a new table). + # + # The access mode for a\CSV::Table instance affects the behavior + # of some of its instance methods: + # - #[] + # - #[]= + # - #delete + # - #delete_if + # - #each + # - #values_at + # + # === \Row Mode + # + # Set a table to row mode with method #by_row!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # + # Specify a single row by an \Integer index: + # # Get a row. + # table[1] # => #<CSV::Row "Name":"bar" "Value":"1"> + # # Set a row, then get it. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3]) + # table[1] # => #<CSV::Row "Name":"bam" "Value":3> + # + # Specify a sequence of rows by a \Range: + # # Get rows. + # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">] + # # Set rows, then get them. + # table[1..2] = [ + # CSV::Row.new(['Name', 'Value'], ['bat', 4]), + # CSV::Row.new(['Name', 'Value'], ['bad', 5]), + # ] + # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]] + # + # === Column Mode + # + # Set a table to column mode with method #by_col!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # + # Specify a column by an \Integer index: + # # Get a column. + # table[0] + # # Set a column, then get it. + # table[0] = ['FOO', 'BAR', 'BAZ'] + # table[0] # => ["FOO", "BAR", "BAZ"] + # + # Specify a column by its \String header: + # # Get a column. + # table['Name'] # => ["FOO", "BAR", "BAZ"] + # # Set a column, then get it. + # table['Name'] = ['Foo', 'Bar', 'Baz'] + # table['Name'] # => ["Foo", "Bar", "Baz"] + # + # === Mixed Mode + # + # In mixed mode, you can refer to either rows or columns: + # - An \Integer index refers to a row. + # - A \Range index refers to multiple rows. + # - A \String index refers to a column. + # + # Set a table to mixed mode with method #by_col_or_row!: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> + # + # Specify a single row by an \Integer index: + # # Get a row. + # table[1] # => #<CSV::Row "Name":"bar" "Value":"1"> + # # Set a row, then get it. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bam', 3]) + # table[1] # => #<CSV::Row "Name":"bam" "Value":3> + # + # Specify a sequence of rows by a \Range: + # # Get rows. + # table[1..2] # => [#<CSV::Row "Name":"bam" "Value":3>, #<CSV::Row "Name":"baz" "Value":"2">] + # # Set rows, then get them. + # table[1] = CSV::Row.new(['Name', 'Value'], ['bat', 4]) + # table[2] = CSV::Row.new(['Name', 'Value'], ['bad', 5]) + # table[1..2] # => [["Name", #<CSV::Row "Name":"bat" "Value":4>], ["Value", #<CSV::Row "Name":"bad" "Value":5>]] + # + # Specify a column by its \String header: + # # Get a column. + # table['Name'] # => ["foo", "bat", "bad"] + # # Set a column, then get it. + # table['Name'] = ['Foo', 'Bar', 'Baz'] + # table['Name'] # => ["Foo", "Bar", "Baz"] class Table + # :call-seq: + # CSV::Table.new(array_of_rows, headers = nil) -> csv_table + # + # Returns a new \CSV::Table object. + # + # - Argument +array_of_rows+ must be an \Array of CSV::Row objects. + # - Argument +headers+, if given, may be an \Array of Strings. + # + # --- + # + # Create an empty \CSV::Table object: + # table = CSV::Table.new([]) + # table # => #<CSV::Table mode:col_or_row row_count:1> + # + # Create a non-empty \CSV::Table object: + # rows = [ + # CSV::Row.new([], []), + # CSV::Row.new([], []), + # CSV::Row.new([], []), + # ] + # table = CSV::Table.new(rows) + # table # => #<CSV::Table mode:col_or_row row_count:4> + # + # --- # - # Constructs a new CSV::Table from +array_of_rows+, which are expected - # to be CSV::Row objects. All rows are assumed to have the same headers. + # If argument +headers+ is an \Array of Strings, + # those Strings become the table's headers: + # table = CSV::Table.new([], headers: ['Name', 'Age']) + # table.headers # => ["Name", "Age"] # - # The optional +headers+ parameter can be set to Array of headers. - # If headers aren't set, headers are fetched from CSV::Row objects. - # Otherwise, headers() method will return headers being set in - # headers argument. + # If argument +headers+ is not given and the table has rows, + # the headers are taken from the first row: + # rows = [ + # CSV::Row.new(['Foo', 'Bar'], []), + # CSV::Row.new(['foo', 'bar'], []), + # CSV::Row.new(['FOO', 'BAR'], []), + # ] + # table = CSV::Table.new(rows) + # table.headers # => ["Foo", "Bar"] # - # A CSV::Table object supports the following Array methods through - # delegation: + # If argument +headers+ is not given and the table is empty (has no rows), + # the headers are also empty: + # table = CSV::Table.new([]) + # table.headers # => [] # - # * empty?() - # * length() - # * size() + # --- # + # Raises an exception if argument +array_of_rows+ is not an \Array object: + # # Raises NoMethodError (undefined method `first' for :foo:Symbol): + # CSV::Table.new(:foo) + # + # Raises an exception if an element of +array_of_rows+ is not a \CSV::Table object: + # # Raises NoMethodError (undefined method `headers' for :foo:Symbol): + # CSV::Table.new([:foo]) def initialize(array_of_rows, headers: nil) @table = array_of_rows @headers = headers @@ -54,88 +222,141 @@ class CSV extend Forwardable def_delegators :@table, :empty?, :length, :size + # :call-seq: + # table.by_col -> table_dup # - # Returns a duplicate table object, in column mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. + # Returns a duplicate of +self+, in column mode + # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # dup_table = table.by_col + # dup_table.mode # => :col + # dup_table.equal?(table) # => false # It's a dup # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_col['Name'] # + # Also note that changes to the duplicate table will not affect the original. def by_col self.class.new(@table.dup).by_col! end + # :call-seq: + # table.by_col! -> self # - # Switches the mode of this table to column mode. All calls to indexing and - # iteration methods will work with columns until the mode is changed again. - # - # This method returns the table and is safe to chain. - # + # Sets the mode for +self+ to column mode + # (see {Column Mode}[#class-CSV::Table-label-Column+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # table1 = table.by_col! + # table.mode # => :col + # table1.equal?(table) # => true # Returned self def by_col! @mode = :col self end + # :call-seq: + # table.by_col_or_row -> table_dup # - # Returns a duplicate table object, in mixed mode. This is handy for - # chaining in a single call without changing the table mode, but be aware - # that this method can consume a fair amount of memory for bigger data sets. + # Returns a duplicate of +self+, in mixed mode + # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true).by_col! + # table.mode # => :col + # dup_table = table.by_col_or_row + # dup_table.mode # => :col_or_row + # dup_table.equal?(table) # => false # It's a dup # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_col_or_row['Name'] # + # Also note that changes to the duplicate table will not affect the original. def by_col_or_row self.class.new(@table.dup).by_col_or_row! end + # :call-seq: + # table.by_col_or_row! -> self # - # Switches the mode of this table to mixed mode. All calls to indexing and - # iteration methods will use the default intelligent indexing system until - # the mode is changed again. In mixed mode an index is assumed to be a row - # reference while anything else is assumed to be column access by headers. - # - # This method returns the table and is safe to chain. - # + # Sets the mode for +self+ to mixed mode + # (see {Mixed Mode}[#class-CSV::Table-label-Mixed+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true).by_col! + # table.mode # => :col + # table1 = table.by_col_or_row! + # table.mode # => :col_or_row + # table1.equal?(table) # => true # Returned self def by_col_or_row! @mode = :col_or_row self end + # :call-seq: + # table.by_row -> table_dup # - # Returns a duplicate table object, in row mode. This is handy for chaining - # in a single call without changing the table mode, but be aware that this - # method can consume a fair amount of memory for bigger data sets. + # Returns a duplicate of +self+, in row mode + # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]): + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # dup_table = table.by_row + # dup_table.mode # => :row + # dup_table.equal?(table) # => false # It's a dup # - # This method returns the duplicate table for chaining. Don't chain - # destructive methods (like []=()) this way though, since you are working - # with a duplicate. + # This may be used to chain method calls without changing the mode + # (but also will affect performance and memory usage): + # dup_table.by_row[1] # + # Also note that changes to the duplicate table will not affect the original. def by_row self.class.new(@table.dup).by_row! end + # :call-seq: + # table.by_row! -> self # - # Switches the mode of this table to row mode. All calls to indexing and - # iteration methods will work with rows until the mode is changed again. - # - # This method returns the table and is safe to chain. - # + # Sets the mode for +self+ to row mode + # (see {Row Mode}[#class-CSV::Table-label-Row+Mode]); returns +self+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.mode # => :col_or_row + # table1 = table.by_row! + # table.mode # => :row + # table1.equal?(table) # => true # Returned self def by_row! @mode = :row self end + # :call-seq: + # table.headers -> array_of_headers # - # Returns the headers for the first row of this table (assumed to match all - # other rows). The headers Array passed to CSV::Table.new is returned for - # empty tables. + # Returns a new \Array containing the \String headers for the table. # + # If the table is not empty, returns the headers from the first row: + # rows = [ + # CSV::Row.new(['Foo', 'Bar'], []), + # CSV::Row.new(['FOO', 'BAR'], []), + # CSV::Row.new(['foo', 'bar'], []), + # ] + # table = CSV::Table.new(rows) + # table.headers # => ["Foo", "Bar"] + # table.delete(0) + # table.headers # => ["FOO", "BAR"] + # table.delete(0) + # table.headers # => ["foo", "bar"] + # + # If the table is empty, returns a copy of the headers in the table itself: + # table.delete(0) + # table.headers # => ["Foo", "Bar"] def headers if @table.empty? @headers.dup @@ -145,17 +366,21 @@ class CSV end # :call-seq: - # table[n] -> row - # table[range] -> array_of_rows - # table[header] -> array_of_fields + # table[n] -> row or column_data + # table[range] -> array_of_rows or array_of_column_data + # table[header] -> array_of_column_data # # Returns data from the table; does not modify the table. # # --- # - # The expression <tt>table[n]</tt>, where +n+ is a non-negative \Integer, - # returns the +n+th row of the table, if that row exists, - # and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>: + # Fetch a \Row by Its \Integer Index:: + # - Form: <tt>table[n]</tt>, +n+ an integer. + # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>. + # - Return value: _nth_ row of the table, if that row exists; + # otherwise +nil+. + # + # Returns the _nth_ row of the table if that row exists: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => #<CSV::Table mode:row row_count:4> @@ -168,20 +393,45 @@ class CSV # # Returns +nil+ if +n+ is too large or too small: # table[4] # => nil - # table[-4] => nil + # table[-4] # => nil # # Raises an exception if the access mode is <tt>:row</tt> - # and +n+ is not an - # {Integer-convertible object}[rdoc-ref:implicit_conversion.rdoc@Integer-Convertible+Objects]. + # and +n+ is not an \Integer: # table.by_row! # => #<CSV::Table mode:row row_count:4> # # Raises TypeError (no implicit conversion of String into Integer): # table['Name'] # # --- # - # The expression <tt>table[range]</tt>, where +range+ is a Range object, - # returns rows from the table, beginning at row <tt>range.first</tt>, - # if those rows exist, and if the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>: + # Fetch a Column by Its \Integer Index:: + # - Form: <tt>table[n]</tt>, +n+ an \Integer. + # - Access mode: <tt>:col</tt>. + # - Return value: _nth_ column of the table, if that column exists; + # otherwise an \Array of +nil+ fields of length <tt>self.size</tt>. + # + # Returns the _nth_ column of the table if that column exists: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # table[1] # => ["0", "1", "2"] + # + # Counts backward from the last column if +n+ is negative: + # table[-2] # => ["foo", "bar", "baz"] + # + # Returns an \Array of +nil+ fields if +n+ is too large or too small: + # table[4] # => [nil, nil, nil] + # table[-4] # => [nil, nil, nil] + # + # --- + # + # Fetch Rows by \Range:: + # - Form: <tt>table[range]</tt>, +range+ a \Range object. + # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>. + # - Return value: rows from the table, beginning at row <tt>range.start</tt>, + # if those rows exists. + # + # Returns rows from the table, beginning at row <tt>range.first</tt>, + # if those rows exist: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_row! # => #<CSV::Table mode:row row_count:4> @@ -191,11 +441,11 @@ class CSV # rows = table[1..2] # => #<CSV::Row "Name":"bar" "Value":"1"> # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] # - # If there are too few rows, returns all from <tt>range.first</tt> to the end: + # If there are too few rows, returns all from <tt>range.start</tt> to the end: # rows = table[1..50] # => #<CSV::Row "Name":"bar" "Value":"1"> # rows # => [#<CSV::Row "Name":"bar" "Value":"1">, #<CSV::Row "Name":"baz" "Value":"2">] # - # Special case: if <tt>range.start == table.size</tt>, returns an empty \Array: + # Special case: if <tt>range.start == table.size</tt>, returns an empty \Array: # table[table.size..50] # => [] # # If <tt>range.end</tt> is negative, calculates the ending index from the end: @@ -211,9 +461,41 @@ class CSV # # --- # - # The expression <tt>table[header]</tt>, where +header+ is a \String, - # returns column values (\Array of \Strings) if the column exists - # and if the access mode is <tt>:col</tt> or <tt>:col_or_row</tt>: + # Fetch Columns by \Range:: + # - Form: <tt>table[range]</tt>, +range+ a \Range object. + # - Access mode: <tt>:col</tt>. + # - Return value: column data from the table, beginning at column <tt>range.start</tt>, + # if those columns exist. + # + # Returns column values from the table, if the column exists; + # the values are arranged by row: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.by_col! + # table[0..1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # Special case: if <tt>range.start == headers.size</tt>, + # returns an \Array (size: <tt>table.size</tt>) of empty \Arrays: + # table[table.headers.size..50] # => [[], [], []] + # + # If <tt>range.end</tt> is negative, calculates the ending index from the end: + # table[0..-1] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # If <tt>range.start</tt> is negative, calculates the starting index from the end: + # table[-2..2] # => [["foo", "0"], ["bar", "1"], ["baz", "2"]] + # + # If <tt>range.start</tt> is larger than <tt>table.size</tt>, + # returns an \Array of +nil+ values: + # table[4..4] # => [nil, nil, nil] + # + # --- + # + # Fetch a Column by Its \String Header:: + # - Form: <tt>table[header]</tt>, +header+ a \String header. + # - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt> + # - Return value: column data from the table, if that +header+ exists. + # + # Returns column values from the table, if the column exists: # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # table = CSV.parse(source, headers: true) # table.by_col! # => #<CSV::Table mode:col row_count:4> @@ -238,22 +520,132 @@ class CSV end end + # :call-seq: + # table[n] = row -> row + # table[n] = field_or_array_of_fields -> field_or_array_of_fields + # table[header] = field_or_array_of_fields -> field_or_array_of_fields # - # In the default mixed mode, this method assigns rows for index access and - # columns for header access. You can force the index association by first - # calling by_col!() or by_row!(). + # Puts data onto the table. # - # Rows may be set to an Array of values (which will inherit the table's - # headers()) or a CSV::Row. + # --- + # + # Set a \Row by Its \Integer Index:: + # - Form: <tt>table[n] = row</tt>, +n+ an \Integer, + # +row+ a \CSV::Row instance or an \Array of fields. + # - Access mode: <tt>:row</tt> or <tt>:col_or_row</tt>. + # - Return value: +row+. + # + # If the row exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_row = CSV::Row.new(['Name', 'Value'], ['bat', 3]) + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # return_value = table[0] = new_row + # return_value.equal?(new_row) # => true # Returned the row + # table[0].to_h # => {"Name"=>"bat", "Value"=>3} + # + # With access mode <tt>:col_or_row</tt>: + # table.by_col_or_row! # => #<CSV::Table mode:col_or_row row_count:4> + # table[0] = CSV::Row.new(['Name', 'Value'], ['bam', 4]) + # table[0].to_h # => {"Name"=>"bam", "Value"=>4} + # + # With an \Array instead of a \CSV::Row, inherits headers from the table: + # array = ['bad', 5] + # return_value = table[0] = array + # return_value.equal?(array) # => true # Returned the array + # table[0].to_h # => {"Name"=>"bad", "Value"=>5} # - # Columns may be set to a single value, which is copied to each row of the - # column, or an Array of values. Arrays of values are assigned to rows top - # to bottom in row major order. Excess values are ignored and if the Array - # does not have a value for each row the extra rows will receive a +nil+. + # If the row does not exist, extends the table by adding rows: + # assigns rows with +nil+ as needed: + # table.size # => 3 + # table[5] = ['bag', 6] + # table.size # => 6 + # table[3] # => nil + # table[4]# => nil + # table[5].to_h # => {"Name"=>"bag", "Value"=>6} + # + # Note that the +nil+ rows are actually +nil+, not a row of +nil+ fields. # - # Assigning to an existing column or row clobbers the data. Assigning to - # new columns creates them at the right end of the table. + # --- # + # Set a Column by Its \Integer Index:: + # - Form: <tt>table[n] = array_of_fields</tt>, +n+ an \Integer, + # +array_of_fields+ an \Array of \String fields. + # - Access mode: <tt>:col</tt>. + # - Return value: +array_of_fields+. + # + # If the column exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_col = [3, 4, 5] + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # return_value = table[1] = new_col + # return_value.equal?(new_col) # => true # Returned the column + # table[1] # => [3, 4, 5] + # # The rows, as revised: + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # table[0].to_h # => {"Name"=>"foo", "Value"=>3} + # table[1].to_h # => {"Name"=>"bar", "Value"=>4} + # table[2].to_h # => {"Name"=>"baz", "Value"=>5} + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # + # If there are too few values, fills with +nil+ values: + # table[1] = [0] + # table[1] # => [0, nil, nil] + # + # If there are too many values, ignores the extra values: + # table[1] = [0, 1, 2, 3, 4] + # table[1] # => [0, 1, 2] + # + # If a single value is given, replaces all fields in the column with that value: + # table[1] = 'bat' + # table[1] # => ["bat", "bat", "bat"] + # + # --- + # + # Set a Column by Its \String Header:: + # - Form: <tt>table[header] = field_or_array_of_fields</tt>, + # +header+ a \String header, +field_or_array_of_fields+ a field value + # or an \Array of \String fields. + # - Access mode: <tt>:col</tt> or <tt>:col_or_row</tt>. + # - Return value: +field_or_array_of_fields+. + # + # If the column exists, it is replaced: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # new_col = [3, 4, 5] + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # return_value = table['Value'] = new_col + # return_value.equal?(new_col) # => true # Returned the column + # table['Value'] # => [3, 4, 5] + # # The rows, as revised: + # table.by_row! # => #<CSV::Table mode:row row_count:4> + # table[0].to_h # => {"Name"=>"foo", "Value"=>3} + # table[1].to_h # => {"Name"=>"bar", "Value"=>4} + # table[2].to_h # => {"Name"=>"baz", "Value"=>5} + # table.by_col! # => #<CSV::Table mode:col row_count:4> + # + # If there are too few values, fills with +nil+ values: + # table['Value'] = [0] + # table['Value'] # => [0, nil, nil] + # + # If there are too many values, ignores the extra values: + # table['Value'] = [0, 1, 2, 3, 4] + # table['Value'] # => [0, 1, 2] + # + # If the column does not exist, extends the table by adding columns: + # table['Note'] = ['x', 'y', 'z'] + # table['Note'] # => ["x", "y", "z"] + # # The rows, as revised: + # table.by_row! + # table[0].to_h # => {"Name"=>"foo", "Value"=>0, "Note"=>"x"} + # table[1].to_h # => {"Name"=>"bar", "Value"=>1, "Note"=>"y"} + # table[2].to_h # => {"Name"=>"baz", "Value"=>2, "Note"=>"z"} + # table.by_col! + # + # If a single value is given, replaces all fields in the column with that value: + # table['Value'] = 'bat' + # table['Value'] # => ["bat", "bat", "bat"] def []=(index_or_header, value) if @mode == :row or # by index (@mode == :col_or_row and index_or_header.is_a? Integer) @@ -463,6 +855,9 @@ class CSV end end + # :call-seq: + # table.delete_if {|row_or_column| ... } -> self + # # Removes rows or columns for which the block returns a truthy value; # returns +self+. # @@ -495,9 +890,8 @@ class CSV if @mode == :row or @mode == :col_or_row # by index @table.delete_if(&block) else # by header - deleted = [] headers.each do |header| - deleted << delete(header) if yield([header, self[header]]) + delete(header) if yield([header, self[header]]) end end @@ -506,6 +900,9 @@ class CSV include Enumerable + # :call-seq: + # table.each {|row_or_column| ... ) -> self + # # Calls the block with each row or column; returns +self+. # # When the access mode is <tt>:row</tt> or <tt>:col_or_row</tt>, @@ -534,7 +931,9 @@ class CSV return enum_for(__method__) { @mode == :col ? headers.size : size } unless block_given? if @mode == :col - headers.each { |header| yield([header, self[header]]) } + headers.each.with_index do |header, i| + yield([header, @table.map {|row| row[header, i]}]) + end else @table.each(&block) end @@ -542,6 +941,9 @@ class CSV self # for chaining end + # :call-seq: + # table == other_table -> true or false + # # Returns +true+ if all each row of +self+ <tt>==</tt> # the corresponding row of +other_table+, otherwise, +false+. # @@ -565,10 +967,14 @@ class CSV @table == other end + # :call-seq: + # table.to_a -> array_of_arrays # - # Returns the table as an Array of Arrays. Headers will be the first row, - # then all of the field rows will follow. - # + # Returns the table as an \Array of \Arrays; + # the headers are in the first row: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.to_a # => [["Name", "Value"], ["foo", "0"], ["bar", "1"], ["baz", "2"]] def to_a array = [headers] @table.each do |row| @@ -578,16 +984,29 @@ class CSV array end + # :call-seq: + # table.to_csv(**options) -> csv_string # - # Returns the table as a complete CSV String. Headers will be listed first, - # then all of the field rows. + # Returns the table as \CSV string. + # See {Options for Generating}[../CSV.html#class-CSV-label-Options+for+Generating]. # - # This method assumes you want the Table.headers(), unless you explicitly - # pass <tt>:write_headers => false</tt>. + # Defaults option +write_headers+ to +true+: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.to_csv # => "Name,Value\nfoo,0\nbar,1\nbaz,2\n" # - def to_csv(write_headers: true, **options) + # Omits the headers if option +write_headers+ is given as +false+ + # (see {Option +write_headers+}[../CSV.html#class-CSV-label-Option+write_headers]): + # table.to_csv(write_headers: false) # => "foo,0\nbar,1\nbaz,2\n" + # + # Limit rows if option +limit+ is given like +2+: + # table.to_csv(limit: 2) # => "Name,Value\nfoo,0\nbar,1\n" + def to_csv(write_headers: true, limit: nil, **options) array = write_headers ? [headers.to_csv(**options)] : [] - @table.each do |row| + limit ||= @table.size + limit = @table.size + 1 + limit if limit < 0 + limit = 0 if limit < 0 + @table.first(limit).each do |row| array.push(row.fields.to_csv(**options)) unless row.header_row? end @@ -613,9 +1032,24 @@ class CSV end end - # Shows the mode and size of this table in a US-ASCII String. + # :call-seq: + # table.inspect => string + # + # Returns a <tt>US-ASCII</tt>-encoded \String showing table: + # - Class: <tt>CSV::Table</tt>. + # - Access mode: <tt>:row</tt>, <tt>:col</tt>, or <tt>:col_or_row</tt>. + # - Size: Row count, including the header row. + # + # Example: + # source = "Name,Value\nfoo,0\nbar,1\nbaz,2\n" + # table = CSV.parse(source, headers: true) + # table.inspect # => "#<CSV::Table mode:col_or_row row_count:4>\nName,Value\nfoo,0\nbar,1\nbaz,2\n" + # def inspect - "#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>".encode("US-ASCII") + inspected = +"#<#{self.class} mode:#{@mode} row_count:#{to_a.size}>" + summary = to_csv(limit: 5) + inspected << "\n" << summary if summary.encoding.ascii_compatible? + inspected end end end diff --git a/lib/csv/version.rb b/lib/csv/version.rb index d1d0dc0e02..e05d63d801 100644 --- a/lib/csv/version.rb +++ b/lib/csv/version.rb @@ -2,5 +2,5 @@ class CSV # The version of the installed library. - VERSION = "3.2.2" + VERSION = "3.2.6" end diff --git a/lib/csv/writer.rb b/lib/csv/writer.rb index 4a9a35c5af..030a295bc9 100644 --- a/lib/csv/writer.rb +++ b/lib/csv/writer.rb @@ -1,11 +1,8 @@ # frozen_string_literal: true require_relative "input_record_separator" -require_relative "match_p" require_relative "row" -using CSV::MatchP if CSV.const_defined?(:MatchP) - class CSV # Note: Don't use this class directly. This is an internal class. class Writer @@ -42,7 +39,10 @@ class CSV @headers ||= row if @use_headers @lineno += 1 - row = @fields_converter.convert(row, nil, lineno) if @fields_converter + if @fields_converter + quoted_fields = [false] * row.size + row = @fields_converter.convert(row, nil, lineno, quoted_fields) + end i = -1 converted_row = row.collect do |field| @@ -97,7 +97,7 @@ class CSV return unless @headers converter = @options[:header_fields_converter] - @headers = converter.convert(@headers, nil, 0) + @headers = converter.convert(@headers, nil, 0, []) @headers.each do |header| header.freeze if header.is_a?(String) end diff --git a/test/csv/interface/test_read.rb b/test/csv/interface/test_read.rb index d73622d554..001177036a 100644 --- a/test/csv/interface/test_read.rb +++ b/test/csv/interface/test_read.rb @@ -26,7 +26,7 @@ class TestCSVInterfaceRead < Test::Unit::TestCase def test_foreach rows = [] - CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").each do |row| + CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n") do |row| rows << row end assert_equal(@rows, rows) @@ -37,7 +37,7 @@ class TestCSVInterfaceRead < Test::Unit::TestCase def test_foreach_in_ractor ractor = Ractor.new(@input.path) do |path| rows = [] - CSV.foreach(path, col_sep: "\t", row_sep: "\r\n").each do |row| + CSV.foreach(path, col_sep: "\t", row_sep: "\r\n") do |row| rows << row end rows @@ -52,13 +52,13 @@ class TestCSVInterfaceRead < Test::Unit::TestCase def test_foreach_mode rows = [] - CSV.foreach(@input.path, "r", col_sep: "\t", row_sep: "\r\n").each do |row| + CSV.foreach(@input.path, "r", col_sep: "\t", row_sep: "\r\n") do |row| rows << row end assert_equal(@rows, rows) end - def test_foreach_enumurator + def test_foreach_enumerator rows = CSV.foreach(@input.path, col_sep: "\t", row_sep: "\r\n").to_a assert_equal(@rows, rows) end @@ -205,6 +205,16 @@ class TestCSVInterfaceRead < Test::Unit::TestCase end end + def test_open_with_newline + CSV.open(@input.path, col_sep: "\t", universal_newline: true) do |csv| + assert_equal(@rows, csv.to_a) + end + File.binwrite(@input.path, "1,2,3\r\n" "4,5\n") + CSV.open(@input.path, newline: :universal) do |csv| + assert_equal(@rows, csv.to_a) + end + end + def test_parse assert_equal(@rows, CSV.parse(@data, col_sep: "\t", row_sep: "\r\n")) diff --git a/test/csv/interface/test_write.rb b/test/csv/interface/test_write.rb index 02c2c5c5ce..0cd39a7663 100644 --- a/test/csv/interface/test_write.rb +++ b/test/csv/interface/test_write.rb @@ -85,6 +85,15 @@ testrow LINE end + def test_generate_lines + lines = CSV.generate_lines([["foo", "bar"], [1, 2], [3, 4]]) + assert_equal(<<-LINES, lines) +foo,bar +1,2 +3,4 + LINES + end + def test_headers_detection headers = ["a", "b", "c"] CSV.open(@output.path, "w", headers: true) do |csv| diff --git a/test/csv/parse/test_convert.rb b/test/csv/parse/test_convert.rb index 21d9f20b28..c9195c71d9 100644 --- a/test/csv/parse/test_convert.rb +++ b/test/csv/parse/test_convert.rb @@ -15,6 +15,22 @@ class TestCSVParseConvert < Test::Unit::TestCase @time = Time.utc(2018, 12, 30, 6, 41, 29) @windows_safe_time_data = @time.strftime("%a %b %d %H:%M:%S %Y") + + @preserving_converter = lambda do |field, info| + f = field.encode(CSV::ConverterEncoding) + return f if info.quoted? + begin + Integer(f, 10) + rescue + f + end + end + + @quoted_header_converter = lambda do |field, info| + f = field.encode(CSV::ConverterEncoding) + return f if info.quoted? + f.to_sym + end end def test_integer @@ -107,4 +123,43 @@ class TestCSVParseConvert < Test::Unit::TestCase assert_equal([nil, "empty", "a"], CSV.parse_line(',"",a', empty_value: "empty")) end + + def test_quoted_parse_line + row = CSV.parse_line('1,"2",3', converters: @preserving_converter) + assert_equal([1, "2", 3], row) + end + + def test_quoted_parse + expected = [["quoted", "unquoted"], ["109", 1], ["10A", 2]] + rows = CSV.parse(<<~CSV, converters: @preserving_converter) + "quoted",unquoted + "109",1 + "10A",2 + CSV + assert_equal(expected, rows) + end + + def test_quoted_alternating_quote + row = CSV.parse_line('"1",2,"3"', converters: @preserving_converter) + assert_equal(['1', 2, '3'], row) + end + + def test_quoted_parse_headers + expected = [["quoted", :unquoted], ["109", "1"], ["10A", "2"]] + table = CSV.parse(<<~CSV, headers: true, header_converters: @quoted_header_converter) + "quoted",unquoted + "109",1 + "10A",2 + CSV + assert_equal(expected, table.to_a) + end + + def test_quoted_parse_with_string_headers + expected = [["quoted", :unquoted], %w[109 1], %w[10A 2]] + table = CSV.parse(<<~CSV, headers: '"quoted",unquoted', header_converters: @quoted_header_converter) + "109",1 + "10A",2 + CSV + assert_equal(expected, table.to_a) + end end diff --git a/test/csv/parse/test_general.rb b/test/csv/parse/test_general.rb index c740462c01..902be2ce4a 100644 --- a/test/csv/parse/test_general.rb +++ b/test/csv/parse/test_general.rb @@ -199,6 +199,32 @@ line,5,jkl field_size_limit: 2048 ) end + def test_field_size_limit_max_allowed + column = "abcde" + assert_equal([[column]], + CSV.parse("\"#{column}\"", + field_size_limit: column.size + 1)) + end + + def test_field_size_limit_quote_simple + column = "abcde" + assert_parse_errors_out("\"#{column}\"", + field_size_limit: column.size) + end + + def test_field_size_limit_no_quote_implicitly + column = "abcde" + assert_parse_errors_out("#{column}", + field_size_limit: column.size) + end + + def test_field_size_limit_no_quote_explicitly + column = "abcde" + assert_parse_errors_out("#{column}", + field_size_limit: column.size, + quote_char: nil) + end + def test_field_size_limit_in_extended_column_not_exceeding data = <<~DATA "a","b" @@ -221,6 +247,59 @@ line,5,jkl assert_parse_errors_out(data, field_size_limit: 5) end + def test_max_field_size_controls_lookahead + assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', + max_field_size: 2048 ) + end + + def test_max_field_size_max_allowed + column = "abcde" + assert_equal([[column]], + CSV.parse("\"#{column}\"", + max_field_size: column.size)) + end + + def test_max_field_size_quote_simple + column = "abcde" + assert_parse_errors_out("\"#{column}\"", + max_field_size: column.size - 1) + end + + def test_max_field_size_no_quote_implicitly + column = "abcde" + assert_parse_errors_out("#{column}", + max_field_size: column.size - 1) + end + + def test_max_field_size_no_quote_explicitly + column = "abcde" + assert_parse_errors_out("#{column}", + max_field_size: column.size - 1, + quote_char: nil) + end + + def test_max_field_size_in_extended_column_not_exceeding + data = <<~DATA + "a","b" + " + 2 + ","" + DATA + assert_nothing_raised(CSV::MalformedCSVError) do + CSV.parse(data, max_field_size: 3) + end + end + + def test_max_field_size_in_extended_column_exceeding + data = <<~DATA + "a","b" + " + 2345 + ","" + DATA + assert_parse_errors_out(data, max_field_size: 4) + end + def test_row_sep_auto_cr assert_equal([["a"]], CSV.parse("a\r")) end @@ -246,14 +325,7 @@ line,5,jkl private def assert_parse_errors_out(data, **options) assert_raise(CSV::MalformedCSVError) do - timeout = 0.2 - if defined?(RubyVM::YJIT.enabled?) and RubyVM::YJIT.enabled? - timeout = 1 # for --yjit-call-threshold=1 - end - if defined?(RubyVM::MJIT.enabled?) and RubyVM::MJIT.enabled? - timeout = 5 # for --jit-wait - end - Timeout.timeout(timeout) do + Timeout.timeout(0.2) do CSV.parse(data, **options) fail("Parse didn't error out") end diff --git a/test/csv/parse/test_header.rb b/test/csv/parse/test_header.rb index 481c5107c6..e8c3786d68 100644 --- a/test/csv/parse/test_header.rb +++ b/test/csv/parse/test_header.rb @@ -218,6 +218,13 @@ A,B,C assert_equal([:one, :two_three], csv.headers) end + def test_builtin_symbol_raw_converter + csv = CSV.parse( "a b,c d", headers: true, + return_headers: true, + header_converters: :symbol_raw ) + assert_equal([:"a b", :"c d"], csv.headers) + end + def test_builtin_symbol_converter_with_punctuation csv = CSV.parse( "One, Two & Three ($)", headers: true, return_headers: true, @@ -228,7 +235,7 @@ A,B,C def test_builtin_converters_with_blank_header csv = CSV.parse( "one,,three", headers: true, return_headers: true, - header_converters: [:downcase, :symbol] ) + header_converters: [:downcase, :symbol, :symbol_raw] ) assert_equal([:one, nil, :three], csv.headers) end diff --git a/test/csv/parse/test_inputs_scanner.rb b/test/csv/parse/test_inputs_scanner.rb new file mode 100644 index 0000000000..06e1c845d5 --- /dev/null +++ b/test/csv/parse/test_inputs_scanner.rb @@ -0,0 +1,63 @@ +require_relative "../helper" + +class TestCSVParseInputsScanner < Test::Unit::TestCase + include Helper + + def test_scan_keep_over_chunks_nested_back + input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 2) + scanner.keep_start + assert_equal("abc", scanner.scan_all(/[a-c]+/)) + scanner.keep_start + assert_equal("def", scanner.scan_all(/[d-f]+/)) + scanner.keep_back + scanner.keep_back + assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) + end + + def test_scan_keep_over_chunks_nested_drop_back + input = CSV::Parser::UnoptimizedStringIO.new("abcdefghijklmnl") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 3) + scanner.keep_start + assert_equal("ab", scanner.scan(/../)) + scanner.keep_start + assert_equal("c", scanner.scan(/./)) + assert_equal("d", scanner.scan(/./)) + scanner.keep_drop + scanner.keep_back + assert_equal("abcdefg", scanner.scan_all(/[a-g]+/)) + end + + def test_each_line_keep_over_chunks_multibyte + input = CSV::Parser::UnoptimizedStringIO.new("ab\n\u{3000}a\n") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 1) + each_line = scanner.each_line("\n") + assert_equal("ab\n", each_line.next) + scanner.keep_start + assert_equal("\u{3000}a\n", each_line.next) + scanner.keep_back + assert_equal("\u{3000}a\n", scanner.scan_all(/[^,]+/)) + end + + def test_each_line_keep_over_chunks_fit_chunk_size + input = CSV::Parser::UnoptimizedStringIO.new("\na") + scanner = CSV::Parser::InputsScanner.new([input], + Encoding::UTF_8, + nil, + chunk_size: 1) + each_line = scanner.each_line("\n") + assert_equal("\n", each_line.next) + scanner.keep_start + assert_equal("a", each_line.next) + scanner.keep_back + end +end diff --git a/test/csv/parse/test_liberal_parsing.rb b/test/csv/parse/test_liberal_parsing.rb index 2f7b34689f..5796d10828 100644 --- a/test/csv/parse/test_liberal_parsing.rb +++ b/test/csv/parse/test_liberal_parsing.rb @@ -28,6 +28,17 @@ class TestCSVParseLiberalParsing < Test::Unit::TestCase CSV.parse_line(input, liberal_parsing: true)) end + def test_endline_after_quoted_field_end + csv = CSV.new("A\r\n\"B\"\nC\r\n", liberal_parsing: true) + assert_equal(["A"], csv.gets) + error = assert_raise(CSV::MalformedCSVError) do + csv.gets + end + assert_equal('Illegal end-of-line sequence outside of a quoted field <"\n"> in line 2.', + error.message) + assert_equal(["C"], csv.gets) + end + def test_quote_after_column_separator error = assert_raise(CSV::MalformedCSVError) do CSV.parse_line('is,this "three," or four,fields', liberal_parsing: true) diff --git a/test/csv/parse/test_read.rb b/test/csv/parse/test_read.rb new file mode 100644 index 0000000000..ba6fe985a9 --- /dev/null +++ b/test/csv/parse/test_read.rb @@ -0,0 +1,27 @@ +# -*- coding: utf-8 -*- +# frozen_string_literal: false + +require_relative "../helper" + +class TestCSVParseRead < Test::Unit::TestCase + extend DifferentOFS + + def test_shift + data = <<-CSV +1 +2 +3 + CSV + csv = CSV.new(data) + assert_equal([ + ["1"], + [["2"], ["3"]], + nil, + ], + [ + csv.shift, + csv.read, + csv.shift, + ]) + end +end diff --git a/test/csv/test_data_converters.rb b/test/csv/test_data_converters.rb index 1620e077be..c20a5d1f4b 100644 --- a/test/csv/test_data_converters.rb +++ b/test/csv/test_data_converters.rb @@ -103,4 +103,88 @@ class TestCSVDataConverters < Test::Unit::TestCase assert_equal(datetime, CSV::Converters[:date_time][iso8601_string]) end + + def test_builtin_date_time_converter_rfc3339_minute + rfc3339_string = "2018-01-14 22:25" + datetime = DateTime.new(2018, 1, 14, 22, 25) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_second + rfc3339_string = "2018-01-14 22:25:19" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_under_second + rfc3339_string = "2018-01-14 22:25:19.1" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_under_second_offset + rfc3339_string = "2018-01-14 22:25:19.1+09:00" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_offset + rfc3339_string = "2018-01-14 22:25:19+09:00" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_utc + rfc3339_string = "2018-01-14 22:25:19Z" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_minute + rfc3339_string = "2018-01-14\t22:25" + datetime = DateTime.new(2018, 1, 14, 22, 25) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_second + rfc3339_string = "2018-01-14\t22:25:19" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_under_second + rfc3339_string = "2018-01-14\t22:25:19.1" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_under_second_offset + rfc3339_string = "2018-01-14\t22:25:19.1+09:00" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19.1, "+9") + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_offset + rfc3339_string = "2018-01-14\t22:25:19+09:00" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19, "+9") + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end + + def test_builtin_date_time_converter_rfc3339_tab_utc + rfc3339_string = "2018-01-14\t22:25:19Z" + datetime = DateTime.new(2018, 1, 14, 22, 25, 19) + assert_equal(datetime, + CSV::Converters[:date_time][rfc3339_string]) + end end diff --git a/test/csv/test_encodings.rb b/test/csv/test_encodings.rb index 8d228c05f3..f08d551f69 100644 --- a/test/csv/test_encodings.rb +++ b/test/csv/test_encodings.rb @@ -288,6 +288,37 @@ class TestCSVEncodings < Test::Unit::TestCase error.message) end + def test_string_input_transcode + # U+3042 HIRAGANA LETTER A + # U+3044 HIRAGANA LETTER I + # U+3046 HIRAGANA LETTER U + value = "\u3042\u3044\u3046" + csv = CSV.new(value, encoding: "UTF-8:EUC-JP") + assert_equal([[value.encode("EUC-JP")]], + csv.read) + end + + def test_string_input_set_encoding_string + # U+3042 HIRAGANA LETTER A + # U+3044 HIRAGANA LETTER I + # U+3046 HIRAGANA LETTER U + value = "\u3042\u3044\u3046".encode("EUC-JP") + csv = CSV.new(value.dup.force_encoding("UTF-8"), encoding: "EUC-JP") + assert_equal([[value.encode("EUC-JP")]], + csv.read) + end + + def test_string_input_set_encoding_encoding + # U+3042 HIRAGANA LETTER A + # U+3044 HIRAGANA LETTER I + # U+3046 HIRAGANA LETTER U + value = "\u3042\u3044\u3046".encode("EUC-JP") + csv = CSV.new(value.dup.force_encoding("UTF-8"), + encoding: Encoding.find("EUC-JP")) + assert_equal([[value.encode("EUC-JP")]], + csv.read) + end + private def assert_parses(fields, encoding, **options) diff --git a/test/csv/test_patterns.rb b/test/csv/test_patterns.rb new file mode 100644 index 0000000000..881f03a3a4 --- /dev/null +++ b/test/csv/test_patterns.rb @@ -0,0 +1,27 @@ +# frozen_string_literal: true + +require_relative "helper" + +class TestCSVPatternMatching < Test::Unit::TestCase + + def test_hash + case CSV::Row.new(%i{A B C}, [1, 2, 3]) + in B: b, C: c + assert_equal([2, 3], [b, c]) + end + end + + def test_hash_rest + case CSV::Row.new(%i{A B C}, [1, 2, 3]) + in B: b, **rest + assert_equal([2, { A: 1, C: 3 }], [b, rest]) + end + end + + def test_array + case CSV::Row.new(%i{A B C}, [1, 2, 3]) + in *, matched + assert_equal(3, matched) + end + end +end diff --git a/test/csv/test_table.rb b/test/csv/test_table.rb index 968e64eae7..e8ab74044e 100644 --- a/test/csv/test_table.rb +++ b/test/csv/test_table.rb @@ -274,6 +274,22 @@ A,B,C,Type,Index @table.each { |row| assert_instance_of(CSV::Row, row) } end + def test_each_by_col_duplicated_headers + table = CSV.parse(<<-CSV, headers: true) +a,a,,,b +1,2,3,4,5 +11,12,13,14,15 + CSV + assert_equal([ + ["a", ["1", "11"]], + ["a", ["2", "12"]], + [nil, ["3", "13"]], + [nil, ["4", "14"]], + ["b", ["5", "15"]], + ], + table.by_col.each.to_a) + end + def test_each_split yielded_values = [] @table.each do |column1, column2, column3| @@ -320,6 +336,43 @@ A,B,C assert_equal(csv, @header_table.to_csv) end + def test_to_csv_limit_positive + assert_equal(<<-CSV, @table.to_csv(limit: 2)) +A,B,C +1,2,3 +4,5,6 + CSV + end + + def test_to_csv_limit_positive_over + assert_equal(<<-CSV, @table.to_csv(limit: 5)) +A,B,C +1,2,3 +4,5,6 +7,8,9 + CSV + end + + def test_to_csv_limit_zero + assert_equal(<<-CSV, @table.to_csv(limit: 0)) +A,B,C + CSV + end + + def test_to_csv_limit_negative + assert_equal(<<-CSV, @table.to_csv(limit: -2)) +A,B,C +1,2,3 +4,5,6 + CSV + end + + def test_to_csv_limit_negative_over + assert_equal(<<-CSV, @table.to_csv(limit: -5)) +A,B,C + CSV + end + def test_append # verify that we can chain the call assert_equal(@table, @table << [10, 11, 12]) @@ -549,7 +602,25 @@ A assert_send([Encoding, :compatible?, Encoding.find("US-ASCII"), @table.inspect.encoding], - "inspect() was not ASCII compatible." ) + "inspect() was not ASCII compatible." ) + end + + def test_inspect_with_rows + additional_rows = [ CSV::Row.new(%w{A B C}, [101, 102, 103]), + CSV::Row.new(%w{A B C}, [104, 105, 106]), + CSV::Row.new(%w{A B C}, [107, 108, 109]) ] + table = CSV::Table.new(@rows + additional_rows) + str_table = table.inspect + + assert_equal(<<-CSV, str_table) +#<CSV::Table mode:col_or_row row_count:7> +A,B,C +1,2,3 +4,5,6 +7,8,9 +101,102,103 +104,105,106 + CSV end def test_dig_mixed |