diff options
author | Sutou Kouhei <[email protected]> | 2019-10-12 14:03:21 +0900 |
---|---|---|
committer | GitHub <[email protected]> | 2019-10-12 14:03:21 +0900 |
commit | 92df7d98b62f48cf21cdec522f2e7b34380fd718 (patch) | |
tree | a0d169e177ebd5607caefa26cef90cc70df48232 /lib/csv/parser.rb | |
parent | d6e68bb263e79cb802fa683d9c4139ddca2fd4f5 (diff) |
Import CSV 3.1.2 (#2547)
Notes
Notes:
Merged-By: kou <[email protected]>
Diffstat (limited to 'lib/csv/parser.rb')
-rw-r--r-- | lib/csv/parser.rb | 60 |
1 files changed, 52 insertions, 8 deletions
diff --git a/lib/csv/parser.rb b/lib/csv/parser.rb index 2ef2a28ff3..42145f8923 100644 --- a/lib/csv/parser.rb +++ b/lib/csv/parser.rb @@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix) using CSV::MatchP if CSV.const_defined?(:MatchP) class CSV + # Note: Don't use this class directly. This is an internal class. class Parser + # + # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO + # or String object being read from or written to. Your data is never transcoded + # (unless you ask Ruby to transcode it for you) and will literally be parsed in + # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the + # Encoding of your data. This is accomplished by transcoding the parser itself + # into your Encoding. + # + + # Raised when encoding is invalid. class InvalidEncoding < StandardError end + # + # CSV::Scanner receives a CSV output, scans it and return the content. + # It also controls the life cycle of the object with its methods +keep_start+, + # +keep_end+, +keep_back+, +keep_drop+. + # + # Uses StringScanner (the official strscan gem). Strscan provides lexical + # scanning operations on a String. We inherit its object and take advantage + # on the methods. For more information, please visit: + # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html + # class Scanner < StringScanner alias_method :scan_all, :scan @@ -38,7 +59,7 @@ class CSV def keep_end start = @keeps.pop - string[start, pos - start] + string.byteslice(start, pos - start) end def keep_back @@ -50,6 +71,18 @@ class CSV end end + # + # CSV::InputsScanner receives IO inputs, encoding and the chunk_size. + # It also controls the life cycle of the object with its methods +keep_start+, + # +keep_end+, +keep_back+, +keep_drop+. + # + # CSV::InputsScanner.scan() tries to match with pattern at the current position. + # If there's a match, the scanner advances the “scan pointer” and returns the matched string. + # Otherwise, the scanner returns nil. + # + # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer). + # If there is no more data (eos? = true), it returns "". + # class InputsScanner def initialize(inputs, encoding, chunk_size: 8192) @inputs = inputs.dup @@ -137,7 +170,7 @@ class CSV def keep_end start, buffer = @keeps.pop - keep = @scanner.string[start, @scanner.pos - start] + keep = @scanner.string.byteslice(start, @scanner.pos - start) if buffer buffer << keep keep = buffer @@ -192,7 +225,7 @@ class CSV input = @inputs.first case input when StringIO - string = input.string + string = input.read raise InvalidEncoding unless string.valid_encoding? @scanner = StringScanner.new(string) @inputs.shift @@ -319,6 +352,7 @@ class CSV end private + # A set of tasks to prepare the file in order to parse it def prepare prepare_variable prepare_quote_character @@ -447,7 +481,13 @@ class CSV end def prepare_separators - @column_separator = @options[:column_separator].to_s.encode(@encoding) + column_separator = @options[:column_separator] + @column_separator = column_separator.to_s.encode(@encoding) + if @column_separator.size < 1 + message = ":col_sep must be 1 or more characters: " + message += column_separator.inspect + raise ArgumentError, message + end @row_separator = resolve_row_separator(@options[:row_separator]).encode(@encoding) @@ -534,7 +574,9 @@ class CSV cr = "\r".encode(@encoding) lf = "\n".encode(@encoding) if @input.is_a?(StringIO) - separator = detect_row_separator(@input.string, cr, lf) + pos = @input.pos + separator = detect_row_separator(@input.read, cr, lf) + @input.seek(pos) elsif @input.respond_to?(:gets) if @input.is_a?(File) chunk_size = 32 * 1024 @@ -651,7 +693,9 @@ class CSV return false if @quote_character.nil? if @input.is_a?(StringIO) - sample = @input.string + pos = @input.pos + sample = @input.read + @input.seek(pos) else return false if @samples.empty? sample = @samples.first @@ -684,7 +728,7 @@ class CSV UnoptimizedStringIO.new(sample) end if @input.is_a?(StringIO) - inputs << UnoptimizedStringIO.new(@input.string) + inputs << UnoptimizedStringIO.new(@input.read) else inputs << @input end @@ -697,7 +741,7 @@ class CSV def build_scanner string = nil if @samples.empty? and @input.is_a?(StringIO) - string = @input.string + string = @input.read elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof? string = @samples[0] end |