diff options
author | Burdette Lamar <[email protected]> | 2022-02-25 13:12:59 -0600 |
---|---|---|
committer | GitHub <[email protected]> | 2022-02-25 13:12:59 -0600 |
commit | 26ffda2fd217651e73eb71e6da8f89eb17866f9d (patch) | |
tree | 6f41f5b0ce7c206fb9a249d8f739df48330053d3 /transcode.c | |
parent | 189ac52bba8b1355186431acfa335d40991a7406 (diff) |
[DOC] Enhanced RDoc for some encoding methods (#5598)
In String, treats:
#b
#scrub
#scrub!
#unicode_normalize
#unicode_normalize!
#encode
#encode!
Also adds a note to IO.new (suggested by @jeremyevans).
Notes
Notes:
Merged-By: BurdetteLamar <[email protected]>
Diffstat (limited to 'transcode.c')
-rw-r--r-- | transcode.c | 101 |
1 files changed, 44 insertions, 57 deletions
diff --git a/transcode.c b/transcode.c index 9cc4d00f28..400ad13775 100644 --- a/transcode.c +++ b/transcode.c @@ -2801,16 +2801,11 @@ str_encode_associate(VALUE str, int encidx) /* * call-seq: - * str.encode!(encoding, **options) -> str - * str.encode!(dst_encoding, src_encoding, **options) -> str + * encode!(dst_encoding = Encoding.default_internal, **enc_opts) -> self + * encode!(dst_encoding, src_encoding, **enc_opts) -> self + * + * Like #encode, but applies encoding changes to +self+; returns +self+. * - * The first form transcodes the contents of <i>str</i> from - * str.encoding to +encoding+. - * The second form transcodes the contents of <i>str</i> from - * src_encoding to dst_encoding. - * The +options+ keyword arguments give details for conversion. See String#encode - * for details. - * Returns the string even if no changes were made. */ static VALUE @@ -2837,58 +2832,50 @@ static VALUE encoded_dup(VALUE newstr, VALUE str, int encidx); /* * call-seq: - * str.encode(encoding, **options) -> str - * str.encode(dst_encoding, src_encoding, **options) -> str - * str.encode(**options) -> str + * encode(dst_encoding = Encoding.default_internal, **enc_opts) -> string + * encode(dst_encoding, src_encoding, **enc_opts) -> string + * + * Returns a copy of +self+ transcoded as determined by +dst_encoding+. + * By default, raises an exception if +self+ + * contains an invalid byte or a character not defined in +dst_encoding+; + * that behavior may be modified by encoding options; see below. + * + * With no arguments: + * + * - Uses the same encoding if <tt>Encoding.default_internal</tt> is +nil+ + * (the default): + * + * Encoding.default_internal # => nil + * s = "Ruby\x99".force_encoding('Windows-1252') + * s.encoding # => #<Encoding:Windows-1252> + * s.bytes # => [82, 117, 98, 121, 153] + * t = s.encode # => "Ruby\x99" + * t.encoding # => #<Encoding:Windows-1252> + * t.bytes # => [82, 117, 98, 121, 226, 132, 162] + * + * - Otherwise, uses the encoding <tt>Encoding.default_internal</tt>: + * + * Encoding.default_internal = 'UTF-8' + * t = s.encode # => "Ruby™" + * t.encoding # => #<Encoding:UTF-8> + * + * With only argument +dst_encoding+ given, uses that encoding: + * + * s = "Ruby\x99".force_encoding('Windows-1252') + * s.encoding # => #<Encoding:Windows-1252> + * t = s.encode('UTF-8') # => "Ruby™" + * t.encoding # => #<Encoding:UTF-8> * - * The first form returns a copy of +str+ transcoded - * to encoding +encoding+. - * The second form returns a copy of +str+ transcoded - * from src_encoding to dst_encoding. - * The last form returns a copy of +str+ transcoded to - * <tt>Encoding.default_internal</tt>. + * With arguments +dst_encoding+ and +src_encoding+ given, + * interprets +self+ using +src_encoding+, encodes the new string using +dst_encoding+: * - * By default, the first and second form raise - * Encoding::UndefinedConversionError for characters that are - * undefined in the destination encoding, and - * Encoding::InvalidByteSequenceError for invalid byte sequences - * in the source encoding. The last form by default does not raise - * exceptions but uses replacement strings. + * s = "Ruby\x99" + * t = s.encode('UTF-8', 'Windows-1252') # => "Ruby™" + * t.encoding # => #<Encoding:UTF-8> * - * The +options+ keyword arguments give details for conversion. - * The arguments are: + * Optional keyword arguments +enc_opts+ specify encoding options; + * see {Encoding Options}[rdoc-ref:encoding.rdoc@Encoding+Options]. * - * :invalid :: - * If the value is +:replace+, #encode replaces invalid byte sequences in - * +str+ with the replacement character. The default is to raise the - * Encoding::InvalidByteSequenceError exception - * :undef :: - * If the value is +:replace+, #encode replaces characters which are - * undefined in the destination encoding with the replacement character. - * The default is to raise the Encoding::UndefinedConversionError. - * :replace :: - * Sets the replacement string to the given value. The default replacement - * string is "\uFFFD" for Unicode encoding forms, and "?" otherwise. - * :fallback :: - * Sets the replacement string by the given object for undefined - * character. The object should be a Hash, a Proc, a Method, or an - * object which has [] method. - * Its key is an undefined character encoded in the source encoding - * of current transcoder. Its value can be any encoding until it - * can be converted into the destination encoding of the transcoder. - * :xml :: - * The value must be +:text+ or +:attr+. - * If the value is +:text+ #encode replaces undefined characters with their - * (upper-case hexadecimal) numeric character references. '&', '<', and '>' - * are converted to "&", "<", and ">", respectively. - * If the value is +:attr+, #encode also quotes the replacement result - * (using '"'), and replaces '"' with """. - * :cr_newline :: - * Replaces LF ("\n") with CR ("\r") if value is true. - * :crlf_newline :: - * Replaces LF ("\n") with CRLF ("\r\n") if value is true. - * :universal_newline :: - * Replaces CRLF ("\r\n") and CR ("\r") with LF ("\n") if value is true. */ static VALUE |