diff options
author | Yusuke Endoh <[email protected]> | 2020-10-21 13:29:19 +0900 |
---|---|---|
committer | aycabta <[email protected]> | 2020-12-05 02:58:58 +0900 |
commit | 76cac4c05a7be61a94a709b8b850118ad0bfa684 (patch) | |
tree | 3131f03a4c77ab2bccc1a66d4e626a823d096379 /lib/reline/unicode.rb | |
parent | b3e0db80606614f11412604f1657a135002326e9 (diff) |
[ruby/reline] Improve the performance of `get_mbchar_width`
It is about three times faster to use one big regexp instead of
sequential matching.
https://github.com/ruby/reline/commit/e36f6c0707
Diffstat (limited to 'lib/reline/unicode.rb')
-rw-r--r-- | lib/reline/unicode.rb | 40 |
1 files changed, 25 insertions, 15 deletions
diff --git a/lib/reline/unicode.rb b/lib/reline/unicode.rb index cd8c27e85b..df2f6719a4 100644 --- a/lib/reline/unicode.rb +++ b/lib/reline/unicode.rb @@ -72,20 +72,32 @@ class Reline::Unicode }.join end + require 'reline/unicode/east_asian_width' + + MBCharWidthRE = / + (?<width_2_1> + [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...) + ) + | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH) + | (?<width_0>^\p{M}) + | (?<width_2_2> + #{ EastAsianWidth::TYPE_F } + | #{ EastAsianWidth::TYPE_W } + ) + | (?<width_1> + #{ EastAsianWidth::TYPE_H } + | #{ EastAsianWidth::TYPE_NA } + | #{ EastAsianWidth::TYPE_N } + ) + /x + def self.get_mbchar_width(mbchar) - case mbchar.encode(Encoding::UTF_8) - when *EscapedChars # ^ + char, such as ^M, ^H, ^[, ... - 2 - when /^\u{2E3B}/ # THREE-EM DASH - 3 - when /^\p{M}/ - 0 - when EastAsianWidth::TYPE_A - Reline.ambiguous_width - when EastAsianWidth::TYPE_F, EastAsianWidth::TYPE_W - 2 - when EastAsianWidth::TYPE_H, EastAsianWidth::TYPE_NA, EastAsianWidth::TYPE_N - 1 + m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE) + case + when m[:width_2_1], m[:width_2_2] then 2 + when m[:width_3] then 3 + when m[:width_0] then 0 + when m[:width_1] then 1 else nil end @@ -591,5 +603,3 @@ class Reline::Unicode [byte_size, width] end end - -require 'reline/unicode/east_asian_width' |