diff options
author | jeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-10-18 15:30:24 +0000 |
---|---|---|
committer | jeg2 <jeg2@b2dd03c8-39d4-4d8f-98ff-823fe69b080e> | 2008-10-18 15:30:24 +0000 |
commit | b7be78d774daf5b9a9fdd63e6d20e30309b9efa0 (patch) | |
tree | f7272b9cfeace2b5ee9a3a283c0085ad15b398a0 /test/csv/test_csv_parsing.rb | |
parent | 54e2cb81799f0ff7e443758b50e906ea5b1833d3 (diff) |
* test/csv/*: Renamed tc_* files to test_* to fit in within Ruby's
testing process.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@19843 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
Diffstat (limited to 'test/csv/test_csv_parsing.rb')
-rw-r--r-- | test/csv/test_csv_parsing.rb | 192 |
1 files changed, 192 insertions, 0 deletions
diff --git a/test/csv/test_csv_parsing.rb b/test/csv/test_csv_parsing.rb new file mode 100644 index 0000000000..e391687537 --- /dev/null +++ b/test/csv/test_csv_parsing.rb @@ -0,0 +1,192 @@ +#!/usr/bin/env ruby -w +# encoding: UTF-8 + +# tc_csv_parsing.rb +# +# Created by James Edward Gray II on 2005-10-31. +# Copyright 2005 James Edward Gray II. You can redistribute or modify this code +# under the terms of Ruby's license. + +require "test/unit" +require "timeout" + +require "csv" + +# +# Following tests are my interpretation of the +# {CSV RCF}[http://www.ietf.org/rfc/rfc4180.txt]. I only deviate from that +# document in one place (intentionally) and that is to make the default row +# separator <tt>$/</tt>. +# +class TestCSVParsing < Test::Unit::TestCase + BIG_DATA = "123456789\n" * 1024 + + def test_mastering_regex_example + ex = %Q{Ten Thousand,10000, 2710 ,,"10,000","It's ""10 Grand"", baby",10K} + assert_equal( [ "Ten Thousand", "10000", " 2710 ", nil, "10,000", + "It's \"10 Grand\", baby", "10K" ], + CSV.parse_line(ex) ) + end + + # Old Ruby 1.8 CSV library tests. + def test_std_lib_csv + [ ["\t", ["\t"]], + ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], + ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], + ["\"\"\"\n\",\"\"\"\n\"", ["\"\n", "\"\n"]], + ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], + ["\"\"", [""]], + ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], + ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], + ["foo,\"\r\",baz", ["foo", "\r", "baz"]], + ["foo,\"\",baz", ["foo", "", "baz"]], + ["\",\"", [","]], + ["foo", ["foo"]], + [",,", [nil, nil, nil]], + [",", [nil, nil]], + ["foo,\"\n\",baz", ["foo", "\n", "baz"]], + ["foo,,baz", ["foo", nil, "baz"]], + ["\"\"\"\r\",\"\"\"\r\"", ["\"\r", "\"\r"]], + ["\",\",\",\"", [",", ","]], + ["foo,bar,", ["foo", "bar", nil]], + [",foo,bar", [nil, "foo", "bar"]], + ["foo,bar", ["foo", "bar"]], + [";", [";"]], + ["\t,\t", ["\t", "\t"]], + ["foo,\"\r\n\r\",baz", ["foo", "\r\n\r", "baz"]], + ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], + ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]], + [";,;", [";", ";"]] ].each do |csv_test| + assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) + end + + [ ["foo,\"\"\"\"\"\",baz", ["foo", "\"\"", "baz"]], + ["foo,\"\"\"bar\"\"\",baz", ["foo", "\"bar\"", "baz"]], + ["foo,\"\r\n\",baz", ["foo", "\r\n", "baz"]], + ["\"\"", [""]], + ["foo,\"\"\"\",baz", ["foo", "\"", "baz"]], + ["foo,\"\r.\n\",baz", ["foo", "\r.\n", "baz"]], + ["foo,\"\r\",baz", ["foo", "\r", "baz"]], + ["foo,\"\",baz", ["foo", "", "baz"]], + ["foo", ["foo"]], + [",,", [nil, nil, nil]], + [",", [nil, nil]], + ["foo,\"\n\",baz", ["foo", "\n", "baz"]], + ["foo,,baz", ["foo", nil, "baz"]], + ["foo,bar", ["foo", "bar"]], + ["foo,\"\r\n\n\",baz", ["foo", "\r\n\n", "baz"]], + ["foo,\"foo,bar\",baz", ["foo", "foo,bar", "baz"]] ].each do |csv_test| + assert_equal(csv_test.last, CSV.parse_line(csv_test.first)) + end + end + + # From: http://ruby-talk.org/cgi-bin/scat.rb/ruby/ruby-core/6496 + def test_aras_edge_cases + [ [%Q{a,b}, ["a", "b"]], + [%Q{a,"""b"""}, ["a", "\"b\""]], + [%Q{a,"""b"}, ["a", "\"b"]], + [%Q{a,"b"""}, ["a", "b\""]], + [%Q{a,"\nb"""}, ["a", "\nb\""]], + [%Q{a,"""\nb"}, ["a", "\"\nb"]], + [%Q{a,"""\nb\n"""}, ["a", "\"\nb\n\""]], + [%Q{a,"""\nb\n""",\nc}, ["a", "\"\nb\n\"", nil]], + [%Q{a,,,}, ["a", nil, nil, nil]], + [%Q{,}, [nil, nil]], + [%Q{"",""}, ["", ""]], + [%Q{""""}, ["\""]], + [%Q{"""",""}, ["\"",""]], + [%Q{,""}, [nil,""]], + [%Q{,"\r"}, [nil,"\r"]], + [%Q{"\r\n,"}, ["\r\n,"]], + [%Q{"\r\n,",}, ["\r\n,", nil]] ].each do |edge_case| + assert_equal(edge_case.last, CSV.parse_line(edge_case.first)) + end + end + + def test_james_edge_cases + # A read at eof? should return nil. + assert_equal(nil, CSV.parse_line("")) + # + # With Ruby 1.8 CSV it's impossible to tell an empty line from a line + # containing a single +nil+ field. The old CSV library returns + # <tt>[nil]</tt> in these cases, but <tt>Array.new</tt> makes more sense to + # me. + # + assert_equal(Array.new, CSV.parse_line("\n1,2,3\n")) + end + + def test_malformed_csv + assert_raise(CSV::MalformedCSVError) do + CSV.parse_line("1,2\r,3", row_sep: "\n") + end + + bad_data = <<-END_DATA.gsub(/^ +/, "") + line,1,abc + line,2,"def\nghi" + + line,4,some\rjunk + line,5,jkl + END_DATA + lines = bad_data.lines.to_a + assert_equal(6, lines.size) + assert_match(/\Aline,4/, lines.find { |l| l =~ /some\rjunk/ }) + + csv = CSV.new(bad_data) + begin + loop do + assert_not_nil(csv.shift) + assert_send([csv.lineno, :<, 4]) + end + rescue CSV::MalformedCSVError + assert_equal( "Unquoted fields do not allow \\r or \\n (line 4).", + $!.message ) + end + + assert_raise(CSV::MalformedCSVError) { CSV.parse_line('1,2,"3...') } + + bad_data = <<-END_DATA.gsub(/^ +/, "") + line,1,abc + line,2,"def\nghi" + + line,4,8'10" + line,5,jkl + END_DATA + lines = bad_data.lines.to_a + assert_equal(6, lines.size) + assert_match(/\Aline,4/, lines.find { |l| l =~ /8'10"/ }) + + csv = CSV.new(bad_data) + begin + loop do + assert_not_nil(csv.shift) + assert_send([csv.lineno, :<, 4]) + end + rescue CSV::MalformedCSVError + assert_equal("Illegal quoting on line 4.", $!.message) + end + end + + def test_the_parse_fails_fast_when_it_can_for_unquoted_fields + assert_parse_errors_out('valid,fields,bad start"' + BIG_DATA) + end + + def test_the_parse_fails_fast_when_it_can_for_unescaped_quotes + assert_parse_errors_out('valid,fields,"bad start"unescaped' + BIG_DATA) + end + + def test_field_size_limit_controls_lookahead + assert_parse_errors_out( 'valid,fields,"' + BIG_DATA + '"', + field_size_limit: 2048 ) + end + + private + + def assert_parse_errors_out(*args) + assert_raise(CSV::MalformedCSVError) do + Timeout.timeout(0.2) do + CSV.parse(*args) + fail("Parse didn't error out") + end + end + end +end |