diff options
author | schneems <[email protected]> | 2022-07-26 15:21:09 -0500 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2022-08-19 10:02:24 +0900 |
commit | 490af8dbdb66263f29d0b4e43752fbb298b94862 (patch) | |
tree | 5f161e99d27a1417f446e8b1516263fd76d6f0bc /lib/syntax_suggest/code_frontier.rb | |
parent | a50df1ab0eb312e5cdcf010d2c1b362ec41f3c59 (diff) |
Sync SyntaxSuggest
```
$ tool/sync_default_gems.rb syntax_suggest
```
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/5859
Diffstat (limited to 'lib/syntax_suggest/code_frontier.rb')
-rw-r--r-- | lib/syntax_suggest/code_frontier.rb | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/lib/syntax_suggest/code_frontier.rb b/lib/syntax_suggest/code_frontier.rb new file mode 100644 index 0000000000..8e93b32514 --- /dev/null +++ b/lib/syntax_suggest/code_frontier.rb @@ -0,0 +1,178 @@ +# frozen_string_literal: true + +module SyntaxSuggest + # The main function of the frontier is to hold the edges of our search and to + # evaluate when we can stop searching. + + # There are three main phases in the algorithm: + # + # 1. Sanitize/format input source + # 2. Search for invalid blocks + # 3. Format invalid blocks into something meaninful + # + # The Code frontier is a critical part of the second step + # + # ## Knowing where we've been + # + # Once a code block is generated it is added onto the frontier. Then it will be + # sorted by indentation and frontier can be filtered. Large blocks that fully enclose a + # smaller block will cause the smaller block to be evicted. + # + # CodeFrontier#<<(block) # Adds block to frontier + # CodeFrontier#pop # Removes block from frontier + # + # ## Knowing where we can go + # + # Internally the frontier keeps track of "unvisited" lines which are exposed via `next_indent_line` + # when called, this method returns, a line of code with the highest indentation. + # + # The returned line of code can be used to build a CodeBlock and then that code block + # is added back to the frontier. Then, the lines are removed from the + # "unvisited" so we don't double-create the same block. + # + # CodeFrontier#next_indent_line # Shows next line + # CodeFrontier#register_indent_block(block) # Removes lines from unvisited + # + # ## Knowing when to stop + # + # The frontier knows how to check the entire document for a syntax error. When blocks + # are added onto the frontier, they're removed from the document. When all code containing + # syntax errors has been added to the frontier, the document will be parsable without a + # syntax error and the search can stop. + # + # CodeFrontier#holds_all_syntax_errors? # Returns true when frontier holds all syntax errors + # + # ## Filtering false positives + # + # Once the search is completed, the frontier may have multiple blocks that do not contain + # the syntax error. To limit the result to the smallest subset of "invalid blocks" call: + # + # CodeFrontier#detect_invalid_blocks + # + class CodeFrontier + def initialize(code_lines:, unvisited: UnvisitedLines.new(code_lines: code_lines)) + @code_lines = code_lines + @unvisited = unvisited + @queue = PriorityEngulfQueue.new + + @check_next = true + end + + def count + @queue.length + end + + # Performance optimization + # + # Parsing with ripper is expensive + # If we know we don't have any blocks with invalid + # syntax, then we know we cannot have found + # the incorrect syntax yet. + # + # When an invalid block is added onto the frontier + # check document state + private def can_skip_check? + check_next = @check_next + @check_next = false + + if check_next + false + else + true + end + end + + # Returns true if the document is valid with all lines + # removed. By default it checks all blocks in present in + # the frontier array, but can be used for arbitrary arrays + # of codeblocks as well + def holds_all_syntax_errors?(block_array = @queue, can_cache: true) + return false if can_cache && can_skip_check? + + without_lines = block_array.to_a.flat_map do |block| + block.lines + end + + SyntaxSuggest.valid_without?( + without_lines: without_lines, + code_lines: @code_lines + ) + end + + # Returns a code block with the largest indentation possible + def pop + @queue.pop + end + + def next_indent_line + @unvisited.peek + end + + def expand? + return false if @queue.empty? + return true if @unvisited.empty? + + frontier_indent = @queue.peek.current_indent + unvisited_indent = next_indent_line.indent + + if ENV["SYNTAX_SUGGEST_DEBUG"] + puts "```" + puts @queue.peek.to_s + puts "```" + puts " @frontier indent: #{frontier_indent}" + puts " @unvisited indent: #{unvisited_indent}" + end + + # Expand all blocks before moving to unvisited lines + frontier_indent >= unvisited_indent + end + + # Keeps track of what lines have been added to blocks and which are not yet + # visited. + def register_indent_block(block) + @unvisited.visit_block(block) + self + end + + # When one element fully encapsulates another we remove the smaller + # block from the frontier. This prevents double expansions and all-around + # weird behavior. However this guarantee is quite expensive to maintain + def register_engulf_block(block) + end + + # Add a block to the frontier + # + # This method ensures the frontier always remains sorted (in indentation order) + # and that each code block's lines are removed from the indentation hash so we + # don't re-evaluate the same line multiple times. + def <<(block) + @unvisited.visit_block(block) + + @queue.push(block) + + @check_next = true if block.invalid? + + self + end + + # Example: + # + # combination([:a, :b, :c, :d]) + # # => [[:a], [:b], [:c], [:d], [:a, :b], [:a, :c], [:a, :d], [:b, :c], [:b, :d], [:c, :d], [:a, :b, :c], [:a, :b, :d], [:a, :c, :d], [:b, :c, :d], [:a, :b, :c, :d]] + def self.combination(array) + guesses = [] + 1.upto(array.length).each do |size| + guesses.concat(array.combination(size).to_a) + end + guesses + end + + # Given that we know our syntax error exists somewhere in our frontier, we want to find + # the smallest possible set of blocks that contain all the syntax errors + def detect_invalid_blocks + self.class.combination(@queue.to_a.select(&:invalid?)).detect do |block_array| + holds_all_syntax_errors?(block_array, can_cache: false) + end || [] + end + end +end |